View difference between Paste ID: BarvM7DQ and 5BYGTyW7
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/perl 
2
#===============================================================================
3
#
4
#         FILE:  tab
5
#
6
#        USAGE:  "tab" or "untab"
7
#
8
#  DESCRIPTION:  This will turn comma seperated input from stdin into a text table.  
9
#                It can also then convert it back if program is invoked as "untab".
10
#
11
#                As well as being used on the command line, tab/untab can be
12
#                used from within vi, and can work on tables that are commented
13
#                out with # or //.
14
#
15
#      OPTIONS: 
16
#                -------------------------------------------------------------------------
17
#                | Command/Option | Purpose                                              | 
18
#                |----------------|------------------------------------------------------|
19
#                | tab            | Reads from stdin and tabulates comma seperated input | 
20
#                | tab <-t>       | Tabulates input and assumes first row are titles     | 
21
#                | tab <-h>       | Prints this help                                     | 
22
#                | tab <-nb>      | Tabulates without a border                           | 
23
#                | tab <-fw X>    | Wrap fields greater than X big don't break words     | 
24
#                | tab <-fs X>    | Wrap fields greater than X big and break words       | 
25
#                | tab <-vp X>    | Vertically pad table by X lines                      | 
26
#                | tab <-hp X>    | Horizontally pad fields by X chars                   | 
27
#                | tab <-b X>     | Tabulates with a border made from char X             | 
28
#                |----------------|------------------------------------------------------|
29
#                | untab          | Reads from stdin and untabulates table input         | 
30
#                | untab <-b X>   | Untabulate a table with border char X                | 
31
#                | untab <-nb>    | Untabulate a borderless table                        | 
32
#                -------------------------------------------------------------------------
33
#
34
# REQUIREMENTS:  ---
35
#         BUGS:  ---
36
#        NOTES:  ---
37-
#       AUTHOR:  Ben Staniford (BTS), <[email protected]>
37+
#       AUTHOR:  Ben Staniford (BTS), <ben.staniford@g nospam mai l.com>
38
#      COMPANY:  
39
#      VERSION:  1.0
40
#      CREATED:  23/08/07 11:53:19 BST
41
#     REVISION:  ---
42
#===============================================================================
43
44
# TODO
45
# 1. Make tab and untab keep existing indentation including inside comments
46
# 2. Store the comment regexp only once in a global variable
47
# 3. Allow facility to set the delimiter on the command line
48
49
use strict;
50
use warnings;
51
52
#Default values (Normally set from cmd line)
53
my $HPADDING=1;                    #How much horizontal padding
54
my $VPADDING=0;                    #How much vertical padding
55
my $VBORDER="|";                   #What is our vertical border?
56
my $HBORDER="-";                   #What is our horizontal border/divider?
57
my $wrapped_line_vpad=1;           #Should we vertically pad fields that have been word wrapped?
58
my $break_words_on_wrap=1;         #Should we break words when wrapping
59
my $field_wrap_boundary=0;         #How big should a field be before we wrap it?
60
61
#Globals
62
my @max_field_sizes=();
63
my $max_col_count=0;
64
my $comment_char="";
65
my $titles=0;
66
67
#Funcs
68
sub tabulate();
69
sub get_fields($);
70
sub print_header_footer();
71
sub add_padding($);
72
sub	untabulate();
73
sub add_field_empty_space($$);
74
sub print_usage();
75
sub wrap_oversized_fields(@);
76
sub print_table_divider($);
77
78
#No STDERR under any circumstances
79
open (STDERR, ">/dev/null");
80
81
#Arguments
82
my $args = join ' ', @ARGV;
83
if ($args =~ /-t/) 			{ $titles=1; }
84
if ($args =~ /-nb/) 		{ $VBORDER=""; $HBORDER=""; $HPADDING=2;}
85
if ($args =~ /-b\s+(\S)/)  	{ $VBORDER=$1; $HBORDER=$1; $HPADDING=1;}
86
if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;}
87
if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;}
88
if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1}
89
if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1}
90
elsif ($args =~ /-h/) 		{ print_usage(); exit 0; }
91
92
#If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior
93
#Note, untab uses most of the same program arguments above.
94
chomp(my $PROGRAM_NAME = `basename $0`);
95
if ($PROGRAM_NAME eq "untab") {
96
	untabulate();
97
} else {
98
	tabulate();
99
}
100
exit 0;
101
102
# ------------------------------------------------------------------
103
# | Name         | Purpose                            | Parameters | 
104
# |--------------|------------------------------------|------------|
105
# | tabulate()   | Main function that tabulates stdin |            | 
106
# ------------------------------------------------------------------
107
sub tabulate() {
108
109
	#Step 1, load the data into a list
110
	my @table=<STDIN>;
111
112
	#Step 2, If we have field length restrictions, reorder the table as
113
	#needed.  Note, this can't be untabbed.
114
	if ($field_wrap_boundary != 0) {
115
		@table = wrap_oversized_fields(@table);
116
	}
117
118
	#Step 3, calculate the number of rows and columns from the input as well as the 
119
	#maximum field size for each column.  Also, work out if this table is in a comment.
120
	for my $line (@table) {
121
		chomp $line;
122
		my @fields = get_fields($line);
123
		my $counter=0;
124
125
		#Work out if the data is inside a comment
126
		if ($counter==0 && $line=~/^(\#|\/\/)/) {
127
			$comment_char=$1;
128
		}
129
130
		$line =~ s/^$comment_char//;
131
		
132
		for my $field (@fields) {
133
			if (!defined $max_field_sizes[$counter]) {
134
				$max_field_sizes[$counter] = 0;
135
			}
136
			if ($max_field_sizes[$counter] < length($field)) {
137
				$max_field_sizes[$counter] = length($field);
138
			}
139
			$counter++;
140
		}
141
142
		if ($counter > $max_col_count) {
143
			$max_col_count=$counter;
144
		}
145
	}
146
147
	#Step 4, print out the table
148
	print_header_footer();
149
	my $lcounter=0;
150
	for my $line (@table) {
151
		chomp $line;
152
		my @fields = get_fields($line);
153
154
		if ($comment_char ne "") {
155
			print "$comment_char ";
156
		}
157
158
		if ($VBORDER ne "") {
159
			print $VBORDER.add_padding(" ");
160
		}
161
		$lcounter++;
162
163
		my $counter=0;
164
		#Print fields
165
		for my $field (@fields) {
166
			print "$field".add_field_empty_space(length($field), $counter).add_padding(" ");
167
			print $VBORDER.add_padding(" ");
168
			$counter++;
169
		}
170
		#Print any empty fields (if they exist)
171
		if ($counter < $max_col_count) {
172
			for (my $i=0;$i<($max_col_count - $counter);$i++) {
173
				print add_field_empty_space(0, $counter+$i).add_padding(" "); 
174
				print $VBORDER.add_padding(" ");
175
			}
176
		}
177
		print "\n";
178
		if ($VPADDING==1) {
179
			print_table_divider(" ");
180
		}
181
		if ($titles && $lcounter==1) {
182
			#print_header_footer();
183
			print_table_divider($HBORDER);
184
		}
185
	}
186
	print_header_footer();
187
}
188
189
# -----------------------------------------------------------------------------
190
# | Name                  | Purpose                | Parameters               | 
191
# |-----------------------|------------------------|--------------------------|
192
# | print_table_divider() | Print out a divider in | Character divider should | 
193
# |                       | the table              | made from                | 
194
# -----------------------------------------------------------------------------
195
sub print_table_divider($) {
196
197
	my $divider_char = shift;
198
	
199
	if ($divider_char eq $HBORDER && $HBORDER eq "") {
200
		return;
201
	}
202
	if ($comment_char ne "") {
203
		print "$comment_char ";
204
	}
205
	for my $size (@max_field_sizes) {
206
		print $VBORDER.add_padding($divider_char);
207
		for (my $i=0;$i<$size;$i++) {
208
			print $divider_char;
209
		}
210
		print "".add_padding($divider_char);
211
	}
212
	print $VBORDER."\n";
213
}
214
215
216
# ----------------------------------------------------------------------------
217
# | Name                   | Purpose                            | Parameters | 
218
# ----------------------------------------------------------------------------
219
# | print_header_footer()  | Print out the tables header/footer |            | 
220
# ----------------------------------------------------------------------------
221
sub print_header_footer() {
222
223
	my $divider_char = $HBORDER;
224
	
225
	if ($divider_char eq $HBORDER && $HBORDER eq "") {
226
		return;
227
	}
228
	if ($comment_char ne "") {
229
		print "$comment_char ";
230
	}
231
	for my $size (@max_field_sizes) {
232
		print $HBORDER.add_padding($divider_char);
233
		for (my $i=0;$i<$size;$i++) {
234
			print $divider_char;
235
		}
236
		print "".add_padding($divider_char);
237
	}
238
	print $HBORDER."\n";
239
240
}
241
242
# ------------------------------------------------------------------------------
243
# | Name                     | Purpose                    | Parameters         | 
244
# ------------------------------------------------------------------------------
245
# | add_field_empty_space()  | Print out the field spacer | Field Length (int) | 
246
# |                          |                            | Field Number (int) | 
247
# ------------------------------------------------------------------------------
248
sub add_field_empty_space($$) {
249
	my $ret="";
250
	my $field_length=shift;
251
	my $field_number=shift;
252
	my $empty_space_size=$max_field_sizes[$field_number] - $field_length;
253
	for (my $i=0;$i<$empty_space_size;$i++) {
254
		$ret.=" ";
255
	}
256
	return $ret;
257
}
258
 
259
# -----------------------------------------------------------------------------
260
# | Name          | Purpose                      | Parameters                 | 
261
# |---------------|------------------------------|----------------------------|
262
# | add_padding   | Print out the padding string | Padding character (string) | 
263
# -----------------------------------------------------------------------------
264
sub add_padding($) {
265
	my $padding_char = shift;
266
	my $ret="";
267
	for (my $i=0;$i<$HPADDING;$i++) {
268
		$ret.=$padding_char;
269
	}
270
	return $ret;
271
}
272
273
# -----------------------------------------------------------------------------
274
# | Name         | Purpose                              | Parameters          | 
275
# |--------------|--------------------------------------|---------------------|
276
# | get_fields   | Extract a list of fields from a line | Input line (string) | 
277
# -----------------------------------------------------------------------------
278
sub get_fields($) {
279
	my $line=shift;
280
	my @fields = split ',',$line;
281
	my @ret=();
282
	for my $field (@fields) {
283
		$field =~ s/^\s*//;
284
		$field =~ s/\s*$//;
285
		push @ret, $field;
286
	}
287
	return @ret;
288
}
289
290
# -----------------------------------------------------------------------------------
291
# | Name           | Purpose                                           | Parameters | 
292
# |----------------|---------------------------------------------------|------------|
293
# | untabulate()   | Perform the inverse function and untabulate stdin |            | 
294
# -----------------------------------------------------------------------------------
295
sub untabulate() {
296
297
	my $counter=0;
298
	while (<STDIN>) {
299
300
		chomp;
301
302
		#Work out if the data is inside a comment
303
		if ($counter==0 && $_=~/^(\#|\/\/)/) {
304
			$comment_char=$1;
305
		}
306
307
		#Handle a borderless table specifically
308
		if ($HBORDER eq "" && $VBORDER eq "") {
309
			s/\s{2,200}/,/g;
310
			s/,$//;
311
		#This is a table with a border
312
		} else {
313
			my $hb_regexp="\\$HBORDER";
314
			my $vb_regexp="\\$VBORDER";
315
			s/^$hb_regexp*$//g;
316
			s/^(?:$hb_regexp|$vb_regexp)*$//;
317
			s/^$comment_char\s*$hb_regexp*$//g;
318
			s/\s*$vb_regexp\s*/,/g;
319
		}
320
321
		s/^$comment_char\,/$comment_char/;
322
		for (my $i=0;$i<20;$i++) {
323
			s/^\,//;
324
			s/\,$//;
325
		}
326
327
		s/,/, /g;   #If you want spaces as the default after commas
328
		if ($_ !~ /$comment_char\s/) {
329
			s/$comment_char/$comment_char /;
330
		}
331
		if ($_ !~ /^\s*$/) {
332
			print "$_\n";
333
		}
334
		$counter++;
335
	}
336
337
}
338
339
# ----------------------------------------------------------------------------------------
340
# | Name                      | Purpose                                  | Parameters    | 
341
# |---------------------------|------------------------------------------|---------------|
342
# | wrap_oversized_fields()   | Wrap fields that are more than specified | List of lines | 
343
# |                           | size.  This works by rewriting the       |               | 
344
# |                           | comma seperated data so that extra lines |               | 
345
# |                           | are made.  For this reason this          |               | 
346
# |                           | function cannot easily be undone by "    |               | 
347
# |                           | untab"                                   |               | 
348
# ----------------------------------------------------------------------------------------
349
sub wrap_oversized_fields(@) {
350
351
	my @table=@_;
352
	my @ret;
353
354
	#Go through each line in the table
355
	for my $line (@table) {
356
		my @overflow_buffer=();
357
		chomp $line;
358
		my $lcounter=0;
359
360
		#Work out if the data is inside a comment
361
		if ($lcounter==0 && $line=~/^(\#|\/\/)/) {
362
			$comment_char=$1;
363
		}
364
365
		$line =~ s/^$comment_char//;
366
367
		my @fields = get_fields($line);
368
		my @overflow_fields = ();
369
		my $fcounter = 0;
370
371
		#Go through fields in each line looking for fields that must be split
372
		for my $field (@fields) {
373
			if (length($field) > $field_wrap_boundary) {
374
				my $temp;
375
				#Wrap and preserve words
376
				if ($break_words_on_wrap) {
377
					while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) {
378
						$overflow_fields[$fcounter].=",$1";
379
						$field=$';
380
					}
381
				#Wrap and split words
382
				} else {
383
					while ($temp = substr($field, 0, $field_wrap_boundary)) {
384
						$temp =~ s/^\s*//;
385
						$overflow_fields[$fcounter].=",$temp";
386
						$field=substr($field, $field_wrap_boundary,length($field));
387
					}
388
				}
389
			} else {
390
				$overflow_fields[$fcounter]=$field;
391
			}
392
			$fcounter ++;
393
		}
394
395
		#Build the extra lines that must be put back into @table using the
396
		#@overflow_fields table
397
		$fcounter=0;
398
		my $keep_processing=1;
399
		while ($keep_processing) {
400
401
			$keep_processing=0;
402
403
			my $counter = 0;
404
			for (@overflow_fields) {
405
406
				my $field, my $remainder;
407
				if( ($field, $remainder) = /^,(.*?)(,.*)$/) {
408
					my $a=1;
409
				} else {
410
					$field=$_;
411
					$field=~s/^,//;
412
				}
413
				if ($field ne "") { $keep_processing = 1; }
414
415
				#Put any extra lines we make into the overflow buffer so that
416
				#that can be added into our return result.
417
				$overflow_buffer[$fcounter].=",$field";
418
				$overflow_fields[$counter]=$remainder;
419
				$counter++;
420
			}
421
			$fcounter++;
422
		}
423
424
		#Put the contents of the overflow buffer into our return result
425
		for $line (@overflow_buffer) {
426
			#print "OB: $line\n";
427
			$line =~ s/^,//;
428
			if ($line !~ /^,*$/) {
429
				push @ret, $line;
430
			} elsif ($wrapped_line_vpad==1) {
431
				push @ret, $line;
432
			}
433
		}
434
435
		$lcounter++;
436
	}
437
	return @ret;
438
439
}
440
441
sub print_usage() {
442
443
print <<END;
444
-------------------------------------------------------------------------
445
| Command/Option | Purpose                                              | 
446
|----------------|------------------------------------------------------|
447
| tab            | Reads from stdin and tabulates comma seperated input | 
448
| tab <-t>       | Tabulates input and assumes first row are titles     | 
449
| tab <-h>       | Prints this help                                     | 
450
| tab <-nb>      | Tabulates without a border                           | 
451
| tab <-fw X>    | Wrap fields greater than X big don't break words     | 
452
| tab <-fs X>    | Wrap fields greater than X big and break words       | 
453
| tab <-vp X>    | Vertically pad table by X lines                      | 
454
| tab <-hp X>    | Horizontally pad fields by X chars                   | 
455
| tab <-b X>     | Tabulates with a border made from char X             | 
456
|----------------|------------------------------------------------------|
457
| untab          | Reads from stdin and untabulates table input         | 
458
| untab <-b X>   | Untabulate a table with border char X                | 
459
| untab <-nb>    | Untabulate a borderless table                        | 
460
-------------------------------------------------------------------------
461
END
462
		
463
464
}