SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/perl | |
2 | #=============================================================================== | |
3 | # | |
4 | # FILE: tab | |
5 | # | |
6 | # USAGE: "tab" or "untab" | |
7 | # | |
8 | # DESCRIPTION: This will turn comma seperated input from stdin into a text table. | |
9 | # It can also then convert it back if program is invoked as "untab". | |
10 | # | |
11 | # As well as being used on the command line, tab/untab can be | |
12 | # used from within vi, and can work on tables that are commented | |
13 | # out with # or //. | |
14 | # | |
15 | # OPTIONS: | |
16 | # ------------------------------------------------------------------------- | |
17 | # | Command/Option | Purpose | | |
18 | # |----------------|------------------------------------------------------| | |
19 | # | tab | Reads from stdin and tabulates comma seperated input | | |
20 | # | tab <-t> | Tabulates input and assumes first row are titles | | |
21 | # | tab <-h> | Prints this help | | |
22 | # | tab <-nb> | Tabulates without a border | | |
23 | # | tab <-fw X> | Wrap fields greater than X big don't break words | | |
24 | # | tab <-fs X> | Wrap fields greater than X big and break words | | |
25 | # | tab <-vp X> | Vertically pad table by X lines | | |
26 | # | tab <-hp X> | Horizontally pad fields by X chars | | |
27 | # | tab <-b X> | Tabulates with a border made from char X | | |
28 | # |----------------|------------------------------------------------------| | |
29 | # | untab | Reads from stdin and untabulates table input | | |
30 | # | untab <-b X> | Untabulate a table with border char X | | |
31 | # | untab <-nb> | Untabulate a borderless table | | |
32 | # ------------------------------------------------------------------------- | |
33 | # | |
34 | # REQUIREMENTS: --- | |
35 | # BUGS: --- | |
36 | # NOTES: --- | |
37 | - | # AUTHOR: Ben Staniford (BTS), <[email protected]> |
37 | + | # AUTHOR: Ben Staniford (BTS), <ben.staniford@g nospam mai l.com> |
38 | # COMPANY: | |
39 | # VERSION: 1.0 | |
40 | # CREATED: 23/08/07 11:53:19 BST | |
41 | # REVISION: --- | |
42 | #=============================================================================== | |
43 | ||
44 | # TODO | |
45 | # 1. Make tab and untab keep existing indentation including inside comments | |
46 | # 2. Store the comment regexp only once in a global variable | |
47 | # 3. Allow facility to set the delimiter on the command line | |
48 | ||
49 | use strict; | |
50 | use warnings; | |
51 | ||
52 | #Default values (Normally set from cmd line) | |
53 | my $HPADDING=1; #How much horizontal padding | |
54 | my $VPADDING=0; #How much vertical padding | |
55 | my $VBORDER="|"; #What is our vertical border? | |
56 | my $HBORDER="-"; #What is our horizontal border/divider? | |
57 | my $wrapped_line_vpad=1; #Should we vertically pad fields that have been word wrapped? | |
58 | my $break_words_on_wrap=1; #Should we break words when wrapping | |
59 | my $field_wrap_boundary=0; #How big should a field be before we wrap it? | |
60 | ||
61 | #Globals | |
62 | my @max_field_sizes=(); | |
63 | my $max_col_count=0; | |
64 | my $comment_char=""; | |
65 | my $titles=0; | |
66 | ||
67 | #Funcs | |
68 | sub tabulate(); | |
69 | sub get_fields($); | |
70 | sub print_header_footer(); | |
71 | sub add_padding($); | |
72 | sub untabulate(); | |
73 | sub add_field_empty_space($$); | |
74 | sub print_usage(); | |
75 | sub wrap_oversized_fields(@); | |
76 | sub print_table_divider($); | |
77 | ||
78 | #No STDERR under any circumstances | |
79 | open (STDERR, ">/dev/null"); | |
80 | ||
81 | #Arguments | |
82 | my $args = join ' ', @ARGV; | |
83 | if ($args =~ /-t/) { $titles=1; } | |
84 | if ($args =~ /-nb/) { $VBORDER=""; $HBORDER=""; $HPADDING=2;} | |
85 | if ($args =~ /-b\s+(\S)/) { $VBORDER=$1; $HBORDER=$1; $HPADDING=1;} | |
86 | if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;} | |
87 | if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;} | |
88 | if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1} | |
89 | if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1} | |
90 | elsif ($args =~ /-h/) { print_usage(); exit 0; } | |
91 | ||
92 | #If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior | |
93 | #Note, untab uses most of the same program arguments above. | |
94 | chomp(my $PROGRAM_NAME = `basename $0`); | |
95 | if ($PROGRAM_NAME eq "untab") { | |
96 | untabulate(); | |
97 | } else { | |
98 | tabulate(); | |
99 | } | |
100 | exit 0; | |
101 | ||
102 | # ------------------------------------------------------------------ | |
103 | # | Name | Purpose | Parameters | | |
104 | # |--------------|------------------------------------|------------| | |
105 | # | tabulate() | Main function that tabulates stdin | | | |
106 | # ------------------------------------------------------------------ | |
107 | sub tabulate() { | |
108 | ||
109 | #Step 1, load the data into a list | |
110 | my @table=<STDIN>; | |
111 | ||
112 | #Step 2, If we have field length restrictions, reorder the table as | |
113 | #needed. Note, this can't be untabbed. | |
114 | if ($field_wrap_boundary != 0) { | |
115 | @table = wrap_oversized_fields(@table); | |
116 | } | |
117 | ||
118 | #Step 3, calculate the number of rows and columns from the input as well as the | |
119 | #maximum field size for each column. Also, work out if this table is in a comment. | |
120 | for my $line (@table) { | |
121 | chomp $line; | |
122 | my @fields = get_fields($line); | |
123 | my $counter=0; | |
124 | ||
125 | #Work out if the data is inside a comment | |
126 | if ($counter==0 && $line=~/^(\#|\/\/)/) { | |
127 | $comment_char=$1; | |
128 | } | |
129 | ||
130 | $line =~ s/^$comment_char//; | |
131 | ||
132 | for my $field (@fields) { | |
133 | if (!defined $max_field_sizes[$counter]) { | |
134 | $max_field_sizes[$counter] = 0; | |
135 | } | |
136 | if ($max_field_sizes[$counter] < length($field)) { | |
137 | $max_field_sizes[$counter] = length($field); | |
138 | } | |
139 | $counter++; | |
140 | } | |
141 | ||
142 | if ($counter > $max_col_count) { | |
143 | $max_col_count=$counter; | |
144 | } | |
145 | } | |
146 | ||
147 | #Step 4, print out the table | |
148 | print_header_footer(); | |
149 | my $lcounter=0; | |
150 | for my $line (@table) { | |
151 | chomp $line; | |
152 | my @fields = get_fields($line); | |
153 | ||
154 | if ($comment_char ne "") { | |
155 | print "$comment_char "; | |
156 | } | |
157 | ||
158 | if ($VBORDER ne "") { | |
159 | print $VBORDER.add_padding(" "); | |
160 | } | |
161 | $lcounter++; | |
162 | ||
163 | my $counter=0; | |
164 | #Print fields | |
165 | for my $field (@fields) { | |
166 | print "$field".add_field_empty_space(length($field), $counter).add_padding(" "); | |
167 | print $VBORDER.add_padding(" "); | |
168 | $counter++; | |
169 | } | |
170 | #Print any empty fields (if they exist) | |
171 | if ($counter < $max_col_count) { | |
172 | for (my $i=0;$i<($max_col_count - $counter);$i++) { | |
173 | print add_field_empty_space(0, $counter+$i).add_padding(" "); | |
174 | print $VBORDER.add_padding(" "); | |
175 | } | |
176 | } | |
177 | print "\n"; | |
178 | if ($VPADDING==1) { | |
179 | print_table_divider(" "); | |
180 | } | |
181 | if ($titles && $lcounter==1) { | |
182 | #print_header_footer(); | |
183 | print_table_divider($HBORDER); | |
184 | } | |
185 | } | |
186 | print_header_footer(); | |
187 | } | |
188 | ||
189 | # ----------------------------------------------------------------------------- | |
190 | # | Name | Purpose | Parameters | | |
191 | # |-----------------------|------------------------|--------------------------| | |
192 | # | print_table_divider() | Print out a divider in | Character divider should | | |
193 | # | | the table | made from | | |
194 | # ----------------------------------------------------------------------------- | |
195 | sub print_table_divider($) { | |
196 | ||
197 | my $divider_char = shift; | |
198 | ||
199 | if ($divider_char eq $HBORDER && $HBORDER eq "") { | |
200 | return; | |
201 | } | |
202 | if ($comment_char ne "") { | |
203 | print "$comment_char "; | |
204 | } | |
205 | for my $size (@max_field_sizes) { | |
206 | print $VBORDER.add_padding($divider_char); | |
207 | for (my $i=0;$i<$size;$i++) { | |
208 | print $divider_char; | |
209 | } | |
210 | print "".add_padding($divider_char); | |
211 | } | |
212 | print $VBORDER."\n"; | |
213 | } | |
214 | ||
215 | ||
216 | # ---------------------------------------------------------------------------- | |
217 | # | Name | Purpose | Parameters | | |
218 | # ---------------------------------------------------------------------------- | |
219 | # | print_header_footer() | Print out the tables header/footer | | | |
220 | # ---------------------------------------------------------------------------- | |
221 | sub print_header_footer() { | |
222 | ||
223 | my $divider_char = $HBORDER; | |
224 | ||
225 | if ($divider_char eq $HBORDER && $HBORDER eq "") { | |
226 | return; | |
227 | } | |
228 | if ($comment_char ne "") { | |
229 | print "$comment_char "; | |
230 | } | |
231 | for my $size (@max_field_sizes) { | |
232 | print $HBORDER.add_padding($divider_char); | |
233 | for (my $i=0;$i<$size;$i++) { | |
234 | print $divider_char; | |
235 | } | |
236 | print "".add_padding($divider_char); | |
237 | } | |
238 | print $HBORDER."\n"; | |
239 | ||
240 | } | |
241 | ||
242 | # ------------------------------------------------------------------------------ | |
243 | # | Name | Purpose | Parameters | | |
244 | # ------------------------------------------------------------------------------ | |
245 | # | add_field_empty_space() | Print out the field spacer | Field Length (int) | | |
246 | # | | | Field Number (int) | | |
247 | # ------------------------------------------------------------------------------ | |
248 | sub add_field_empty_space($$) { | |
249 | my $ret=""; | |
250 | my $field_length=shift; | |
251 | my $field_number=shift; | |
252 | my $empty_space_size=$max_field_sizes[$field_number] - $field_length; | |
253 | for (my $i=0;$i<$empty_space_size;$i++) { | |
254 | $ret.=" "; | |
255 | } | |
256 | return $ret; | |
257 | } | |
258 | ||
259 | # ----------------------------------------------------------------------------- | |
260 | # | Name | Purpose | Parameters | | |
261 | # |---------------|------------------------------|----------------------------| | |
262 | # | add_padding | Print out the padding string | Padding character (string) | | |
263 | # ----------------------------------------------------------------------------- | |
264 | sub add_padding($) { | |
265 | my $padding_char = shift; | |
266 | my $ret=""; | |
267 | for (my $i=0;$i<$HPADDING;$i++) { | |
268 | $ret.=$padding_char; | |
269 | } | |
270 | return $ret; | |
271 | } | |
272 | ||
273 | # ----------------------------------------------------------------------------- | |
274 | # | Name | Purpose | Parameters | | |
275 | # |--------------|--------------------------------------|---------------------| | |
276 | # | get_fields | Extract a list of fields from a line | Input line (string) | | |
277 | # ----------------------------------------------------------------------------- | |
278 | sub get_fields($) { | |
279 | my $line=shift; | |
280 | my @fields = split ',',$line; | |
281 | my @ret=(); | |
282 | for my $field (@fields) { | |
283 | $field =~ s/^\s*//; | |
284 | $field =~ s/\s*$//; | |
285 | push @ret, $field; | |
286 | } | |
287 | return @ret; | |
288 | } | |
289 | ||
290 | # ----------------------------------------------------------------------------------- | |
291 | # | Name | Purpose | Parameters | | |
292 | # |----------------|---------------------------------------------------|------------| | |
293 | # | untabulate() | Perform the inverse function and untabulate stdin | | | |
294 | # ----------------------------------------------------------------------------------- | |
295 | sub untabulate() { | |
296 | ||
297 | my $counter=0; | |
298 | while (<STDIN>) { | |
299 | ||
300 | chomp; | |
301 | ||
302 | #Work out if the data is inside a comment | |
303 | if ($counter==0 && $_=~/^(\#|\/\/)/) { | |
304 | $comment_char=$1; | |
305 | } | |
306 | ||
307 | #Handle a borderless table specifically | |
308 | if ($HBORDER eq "" && $VBORDER eq "") { | |
309 | s/\s{2,200}/,/g; | |
310 | s/,$//; | |
311 | #This is a table with a border | |
312 | } else { | |
313 | my $hb_regexp="\\$HBORDER"; | |
314 | my $vb_regexp="\\$VBORDER"; | |
315 | s/^$hb_regexp*$//g; | |
316 | s/^(?:$hb_regexp|$vb_regexp)*$//; | |
317 | s/^$comment_char\s*$hb_regexp*$//g; | |
318 | s/\s*$vb_regexp\s*/,/g; | |
319 | } | |
320 | ||
321 | s/^$comment_char\,/$comment_char/; | |
322 | for (my $i=0;$i<20;$i++) { | |
323 | s/^\,//; | |
324 | s/\,$//; | |
325 | } | |
326 | ||
327 | s/,/, /g; #If you want spaces as the default after commas | |
328 | if ($_ !~ /$comment_char\s/) { | |
329 | s/$comment_char/$comment_char /; | |
330 | } | |
331 | if ($_ !~ /^\s*$/) { | |
332 | print "$_\n"; | |
333 | } | |
334 | $counter++; | |
335 | } | |
336 | ||
337 | } | |
338 | ||
339 | # ---------------------------------------------------------------------------------------- | |
340 | # | Name | Purpose | Parameters | | |
341 | # |---------------------------|------------------------------------------|---------------| | |
342 | # | wrap_oversized_fields() | Wrap fields that are more than specified | List of lines | | |
343 | # | | size. This works by rewriting the | | | |
344 | # | | comma seperated data so that extra lines | | | |
345 | # | | are made. For this reason this | | | |
346 | # | | function cannot easily be undone by " | | | |
347 | # | | untab" | | | |
348 | # ---------------------------------------------------------------------------------------- | |
349 | sub wrap_oversized_fields(@) { | |
350 | ||
351 | my @table=@_; | |
352 | my @ret; | |
353 | ||
354 | #Go through each line in the table | |
355 | for my $line (@table) { | |
356 | my @overflow_buffer=(); | |
357 | chomp $line; | |
358 | my $lcounter=0; | |
359 | ||
360 | #Work out if the data is inside a comment | |
361 | if ($lcounter==0 && $line=~/^(\#|\/\/)/) { | |
362 | $comment_char=$1; | |
363 | } | |
364 | ||
365 | $line =~ s/^$comment_char//; | |
366 | ||
367 | my @fields = get_fields($line); | |
368 | my @overflow_fields = (); | |
369 | my $fcounter = 0; | |
370 | ||
371 | #Go through fields in each line looking for fields that must be split | |
372 | for my $field (@fields) { | |
373 | if (length($field) > $field_wrap_boundary) { | |
374 | my $temp; | |
375 | #Wrap and preserve words | |
376 | if ($break_words_on_wrap) { | |
377 | while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) { | |
378 | $overflow_fields[$fcounter].=",$1"; | |
379 | $field=$'; | |
380 | } | |
381 | #Wrap and split words | |
382 | } else { | |
383 | while ($temp = substr($field, 0, $field_wrap_boundary)) { | |
384 | $temp =~ s/^\s*//; | |
385 | $overflow_fields[$fcounter].=",$temp"; | |
386 | $field=substr($field, $field_wrap_boundary,length($field)); | |
387 | } | |
388 | } | |
389 | } else { | |
390 | $overflow_fields[$fcounter]=$field; | |
391 | } | |
392 | $fcounter ++; | |
393 | } | |
394 | ||
395 | #Build the extra lines that must be put back into @table using the | |
396 | #@overflow_fields table | |
397 | $fcounter=0; | |
398 | my $keep_processing=1; | |
399 | while ($keep_processing) { | |
400 | ||
401 | $keep_processing=0; | |
402 | ||
403 | my $counter = 0; | |
404 | for (@overflow_fields) { | |
405 | ||
406 | my $field, my $remainder; | |
407 | if( ($field, $remainder) = /^,(.*?)(,.*)$/) { | |
408 | my $a=1; | |
409 | } else { | |
410 | $field=$_; | |
411 | $field=~s/^,//; | |
412 | } | |
413 | if ($field ne "") { $keep_processing = 1; } | |
414 | ||
415 | #Put any extra lines we make into the overflow buffer so that | |
416 | #that can be added into our return result. | |
417 | $overflow_buffer[$fcounter].=",$field"; | |
418 | $overflow_fields[$counter]=$remainder; | |
419 | $counter++; | |
420 | } | |
421 | $fcounter++; | |
422 | } | |
423 | ||
424 | #Put the contents of the overflow buffer into our return result | |
425 | for $line (@overflow_buffer) { | |
426 | #print "OB: $line\n"; | |
427 | $line =~ s/^,//; | |
428 | if ($line !~ /^,*$/) { | |
429 | push @ret, $line; | |
430 | } elsif ($wrapped_line_vpad==1) { | |
431 | push @ret, $line; | |
432 | } | |
433 | } | |
434 | ||
435 | $lcounter++; | |
436 | } | |
437 | return @ret; | |
438 | ||
439 | } | |
440 | ||
441 | sub print_usage() { | |
442 | ||
443 | print <<END; | |
444 | ------------------------------------------------------------------------- | |
445 | | Command/Option | Purpose | | |
446 | |----------------|------------------------------------------------------| | |
447 | | tab | Reads from stdin and tabulates comma seperated input | | |
448 | | tab <-t> | Tabulates input and assumes first row are titles | | |
449 | | tab <-h> | Prints this help | | |
450 | | tab <-nb> | Tabulates without a border | | |
451 | | tab <-fw X> | Wrap fields greater than X big don't break words | | |
452 | | tab <-fs X> | Wrap fields greater than X big and break words | | |
453 | | tab <-vp X> | Vertically pad table by X lines | | |
454 | | tab <-hp X> | Horizontally pad fields by X chars | | |
455 | | tab <-b X> | Tabulates with a border made from char X | | |
456 | |----------------|------------------------------------------------------| | |
457 | | untab | Reads from stdin and untabulates table input | | |
458 | | untab <-b X> | Untabulate a table with border char X | | |
459 | | untab <-nb> | Untabulate a borderless table | | |
460 | ------------------------------------------------------------------------- | |
461 | END | |
462 | ||
463 | ||
464 | } |