Advertisement
Guest User

Untitled

a guest
May 1st, 2012
1,518
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. #===============================================================================
  3. #
  4. #         FILE:  tab
  5. #
  6. #        USAGE:  "tab" or "untab"
  7. #
  8. #  DESCRIPTION:  This will turn comma seperated input from stdin into a text table.  
  9. #                It can also then convert it back if program is invoked as "untab".
  10. #
  11. #                As well as being used on the command line, tab/untab can be
  12. #                used from within vi, and can work on tables that are commented
  13. #                out with # or //.
  14. #
  15. #      OPTIONS:
  16. #                -------------------------------------------------------------------------
  17. #                | Command/Option | Purpose                                              |
  18. #                |----------------|------------------------------------------------------|
  19. #                | tab            | Reads from stdin and tabulates comma seperated input |
  20. #                | tab <-t>       | Tabulates input and assumes first row are titles     |
  21. #                | tab <-h>       | Prints this help                                     |
  22. #                | tab <-nb>      | Tabulates without a border                           |
  23. #                | tab <-fw X>    | Wrap fields greater than X big don't break words     |
  24. #                | tab <-fs X>    | Wrap fields greater than X big and break words       |
  25. #                | tab <-vp X>    | Vertically pad table by X lines                      |
  26. #                | tab <-hp X>    | Horizontally pad fields by X chars                   |
  27. #                | tab <-b X>     | Tabulates with a border made from char X             |
  28. #                |----------------|------------------------------------------------------|
  29. #                | untab          | Reads from stdin and untabulates table input         |
  30. #                | untab <-b X>   | Untabulate a table with border char X                |
  31. #                | untab <-nb>    | Untabulate a borderless table                        |
  32. #                -------------------------------------------------------------------------
  33. #
  34. # REQUIREMENTS:  ---
  35. #         BUGS:  ---
  36. #        NOTES:  ---
  37. #       AUTHOR:  Ben Staniford (BTS), <ben.staniford@g nospam mai l.com>
  38. #      COMPANY:  
  39. #      VERSION:  1.0
  40. #      CREATED:  23/08/07 11:53:19 BST
  41. #     REVISION:  ---
  42. #===============================================================================
  43.  
  44. # TODO
  45. # 1. Make tab and untab keep existing indentation including inside comments
  46. # 2. Store the comment regexp only once in a global variable
  47. # 3. Allow facility to set the delimiter on the command line
  48.  
  49. use strict;
  50. use warnings;
  51.  
  52. #Default values (Normally set from cmd line)
  53. my $HPADDING=1;                    #How much horizontal padding
  54. my $VPADDING=0;                    #How much vertical padding
  55. my $VBORDER="|";                   #What is our vertical border?
  56. my $HBORDER="-";                   #What is our horizontal border/divider?
  57. my $wrapped_line_vpad=1;           #Should we vertically pad fields that have been word wrapped?
  58. my $break_words_on_wrap=1;         #Should we break words when wrapping
  59. my $field_wrap_boundary=0;         #How big should a field be before we wrap it?
  60.  
  61. #Globals
  62. my @max_field_sizes=();
  63. my $max_col_count=0;
  64. my $comment_char="";
  65. my $titles=0;
  66.  
  67. #Funcs
  68. sub tabulate();
  69. sub get_fields($);
  70. sub print_header_footer();
  71. sub add_padding($);
  72. sub untabulate();
  73. sub add_field_empty_space($$);
  74. sub print_usage();
  75. sub wrap_oversized_fields(@);
  76. sub print_table_divider($);
  77.  
  78. #No STDERR under any circumstances
  79. open (STDERR, ">/dev/null");
  80.  
  81. #Arguments
  82. my $args = join ' ', @ARGV;
  83. if ($args =~ /-t/)          { $titles=1; }
  84. if ($args =~ /-nb/)         { $VBORDER=""; $HBORDER=""; $HPADDING=2;}
  85. if ($args =~ /-b\s+(\S)/)   { $VBORDER=$1; $HBORDER=$1; $HPADDING=1;}
  86. if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;}
  87. if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;}
  88. if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1}
  89. if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1}
  90. elsif ($args =~ /-h/)       { print_usage(); exit 0; }
  91.  
  92. #If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior
  93. #Note, untab uses most of the same program arguments above.
  94. chomp(my $PROGRAM_NAME = `basename $0`);
  95. if ($PROGRAM_NAME eq "untab") {
  96.     untabulate();
  97. } else {
  98.     tabulate();
  99. }
  100. exit 0;
  101.  
  102. # ------------------------------------------------------------------
  103. # | Name         | Purpose                            | Parameters |
  104. # |--------------|------------------------------------|------------|
  105. # | tabulate()   | Main function that tabulates stdin |            |
  106. # ------------------------------------------------------------------
  107. sub tabulate() {
  108.  
  109.     #Step 1, load the data into a list
  110.     my @table=<STDIN>;
  111.  
  112.     #Step 2, If we have field length restrictions, reorder the table as
  113.     #needed.  Note, this can't be untabbed.
  114.     if ($field_wrap_boundary != 0) {
  115.         @table = wrap_oversized_fields(@table);
  116.     }
  117.  
  118.     #Step 3, calculate the number of rows and columns from the input as well as the
  119.     #maximum field size for each column.  Also, work out if this table is in a comment.
  120.     for my $line (@table) {
  121.         chomp $line;
  122.         my @fields = get_fields($line);
  123.         my $counter=0;
  124.  
  125.         #Work out if the data is inside a comment
  126.         if ($counter==0 && $line=~/^(\#|\/\/)/) {
  127.             $comment_char=$1;
  128.         }
  129.  
  130.         $line =~ s/^$comment_char//;
  131.        
  132.         for my $field (@fields) {
  133.             if (!defined $max_field_sizes[$counter]) {
  134.                 $max_field_sizes[$counter] = 0;
  135.             }
  136.             if ($max_field_sizes[$counter] < length($field)) {
  137.                 $max_field_sizes[$counter] = length($field);
  138.             }
  139.             $counter++;
  140.         }
  141.  
  142.         if ($counter > $max_col_count) {
  143.             $max_col_count=$counter;
  144.         }
  145.     }
  146.  
  147.     #Step 4, print out the table
  148.     print_header_footer();
  149.     my $lcounter=0;
  150.     for my $line (@table) {
  151.         chomp $line;
  152.         my @fields = get_fields($line);
  153.  
  154.         if ($comment_char ne "") {
  155.             print "$comment_char ";
  156.         }
  157.  
  158.         if ($VBORDER ne "") {
  159.             print $VBORDER.add_padding(" ");
  160.         }
  161.         $lcounter++;
  162.  
  163.         my $counter=0;
  164.         #Print fields
  165.         for my $field (@fields) {
  166.             print "$field".add_field_empty_space(length($field), $counter).add_padding(" ");
  167.             print $VBORDER.add_padding(" ");
  168.             $counter++;
  169.         }
  170.         #Print any empty fields (if they exist)
  171.         if ($counter < $max_col_count) {
  172.             for (my $i=0;$i<($max_col_count - $counter);$i++) {
  173.                 print add_field_empty_space(0, $counter+$i).add_padding(" ");
  174.                 print $VBORDER.add_padding(" ");
  175.             }
  176.         }
  177.         print "\n";
  178.         if ($VPADDING==1) {
  179.             print_table_divider(" ");
  180.         }
  181.         if ($titles && $lcounter==1) {
  182.             #print_header_footer();
  183.             print_table_divider($HBORDER);
  184.         }
  185.     }
  186.     print_header_footer();
  187. }
  188.  
  189. # -----------------------------------------------------------------------------
  190. # | Name                  | Purpose                | Parameters               |
  191. # |-----------------------|------------------------|--------------------------|
  192. # | print_table_divider() | Print out a divider in | Character divider should |
  193. # |                       | the table              | made from                |
  194. # -----------------------------------------------------------------------------
  195. sub print_table_divider($) {
  196.  
  197.     my $divider_char = shift;
  198.    
  199.     if ($divider_char eq $HBORDER && $HBORDER eq "") {
  200.         return;
  201.     }
  202.     if ($comment_char ne "") {
  203.         print "$comment_char ";
  204.     }
  205.     for my $size (@max_field_sizes) {
  206.         print $VBORDER.add_padding($divider_char);
  207.         for (my $i=0;$i<$size;$i++) {
  208.             print $divider_char;
  209.         }
  210.         print "".add_padding($divider_char);
  211.     }
  212.     print $VBORDER."\n";
  213. }
  214.  
  215.  
  216. # ----------------------------------------------------------------------------
  217. # | Name                   | Purpose                            | Parameters |
  218. # ----------------------------------------------------------------------------
  219. # | print_header_footer()  | Print out the tables header/footer |            |
  220. # ----------------------------------------------------------------------------
  221. sub print_header_footer() {
  222.  
  223.     my $divider_char = $HBORDER;
  224.    
  225.     if ($divider_char eq $HBORDER && $HBORDER eq "") {
  226.         return;
  227.     }
  228.     if ($comment_char ne "") {
  229.         print "$comment_char ";
  230.     }
  231.     for my $size (@max_field_sizes) {
  232.         print $HBORDER.add_padding($divider_char);
  233.         for (my $i=0;$i<$size;$i++) {
  234.             print $divider_char;
  235.         }
  236.         print "".add_padding($divider_char);
  237.     }
  238.     print $HBORDER."\n";
  239.  
  240. }
  241.  
  242. # ------------------------------------------------------------------------------
  243. # | Name                     | Purpose                    | Parameters         |
  244. # ------------------------------------------------------------------------------
  245. # | add_field_empty_space()  | Print out the field spacer | Field Length (int) |
  246. # |                          |                            | Field Number (int) |
  247. # ------------------------------------------------------------------------------
  248. sub add_field_empty_space($$) {
  249.     my $ret="";
  250.     my $field_length=shift;
  251.     my $field_number=shift;
  252.     my $empty_space_size=$max_field_sizes[$field_number] - $field_length;
  253.     for (my $i=0;$i<$empty_space_size;$i++) {
  254.         $ret.=" ";
  255.     }
  256.     return $ret;
  257. }
  258.  
  259. # -----------------------------------------------------------------------------
  260. # | Name          | Purpose                      | Parameters                 |
  261. # |---------------|------------------------------|----------------------------|
  262. # | add_padding   | Print out the padding string | Padding character (string) |
  263. # -----------------------------------------------------------------------------
  264. sub add_padding($) {
  265.     my $padding_char = shift;
  266.     my $ret="";
  267.     for (my $i=0;$i<$HPADDING;$i++) {
  268.         $ret.=$padding_char;
  269.     }
  270.     return $ret;
  271. }
  272.  
  273. # -----------------------------------------------------------------------------
  274. # | Name         | Purpose                              | Parameters          |
  275. # |--------------|--------------------------------------|---------------------|
  276. # | get_fields   | Extract a list of fields from a line | Input line (string) |
  277. # -----------------------------------------------------------------------------
  278. sub get_fields($) {
  279.     my $line=shift;
  280.     my @fields = split ',',$line;
  281.     my @ret=();
  282.     for my $field (@fields) {
  283.         $field =~ s/^\s*//;
  284.         $field =~ s/\s*$//;
  285.         push @ret, $field;
  286.     }
  287.     return @ret;
  288. }
  289.  
  290. # -----------------------------------------------------------------------------------
  291. # | Name           | Purpose                                           | Parameters |
  292. # |----------------|---------------------------------------------------|------------|
  293. # | untabulate()   | Perform the inverse function and untabulate stdin |            |
  294. # -----------------------------------------------------------------------------------
  295. sub untabulate() {
  296.  
  297.     my $counter=0;
  298.     while (<STDIN>) {
  299.  
  300.         chomp;
  301.  
  302.         #Work out if the data is inside a comment
  303.         if ($counter==0 && $_=~/^(\#|\/\/)/) {
  304.             $comment_char=$1;
  305.         }
  306.  
  307.         #Handle a borderless table specifically
  308.         if ($HBORDER eq "" && $VBORDER eq "") {
  309.             s/\s{2,200}/,/g;
  310.             s/,$//;
  311.         #This is a table with a border
  312.         } else {
  313.             my $hb_regexp="\\$HBORDER";
  314.             my $vb_regexp="\\$VBORDER";
  315.             s/^$hb_regexp*$//g;
  316.             s/^(?:$hb_regexp|$vb_regexp)*$//;
  317.             s/^$comment_char\s*$hb_regexp*$//g;
  318.             s/\s*$vb_regexp\s*/,/g;
  319.         }
  320.  
  321.         s/^$comment_char\,/$comment_char/;
  322.         for (my $i=0;$i<20;$i++) {
  323.             s/^\,//;
  324.             s/\,$//;
  325.         }
  326.  
  327.         s/,/, /g;   #If you want spaces as the default after commas
  328.         if ($_ !~ /$comment_char\s/) {
  329.             s/$comment_char/$comment_char /;
  330.         }
  331.         if ($_ !~ /^\s*$/) {
  332.             print "$_\n";
  333.         }
  334.         $counter++;
  335.     }
  336.  
  337. }
  338.  
  339. # ----------------------------------------------------------------------------------------
  340. # | Name                      | Purpose                                  | Parameters    |
  341. # |---------------------------|------------------------------------------|---------------|
  342. # | wrap_oversized_fields()   | Wrap fields that are more than specified | List of lines |
  343. # |                           | size.  This works by rewriting the       |               |
  344. # |                           | comma seperated data so that extra lines |               |
  345. # |                           | are made.  For this reason this          |               |
  346. # |                           | function cannot easily be undone by "    |               |
  347. # |                           | untab"                                   |               |
  348. # ----------------------------------------------------------------------------------------
  349. sub wrap_oversized_fields(@) {
  350.  
  351.     my @table=@_;
  352.     my @ret;
  353.  
  354.     #Go through each line in the table
  355.     for my $line (@table) {
  356.         my @overflow_buffer=();
  357.         chomp $line;
  358.         my $lcounter=0;
  359.  
  360.         #Work out if the data is inside a comment
  361.         if ($lcounter==0 && $line=~/^(\#|\/\/)/) {
  362.             $comment_char=$1;
  363.         }
  364.  
  365.         $line =~ s/^$comment_char//;
  366.  
  367.         my @fields = get_fields($line);
  368.         my @overflow_fields = ();
  369.         my $fcounter = 0;
  370.  
  371.         #Go through fields in each line looking for fields that must be split
  372.         for my $field (@fields) {
  373.             if (length($field) > $field_wrap_boundary) {
  374.                 my $temp;
  375.                 #Wrap and preserve words
  376.                 if ($break_words_on_wrap) {
  377.                     while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) {
  378.                         $overflow_fields[$fcounter].=",$1";
  379.                         $field=$';
  380.                     }
  381.                 #Wrap and split words
  382.                 } else {
  383.                     while ($temp = substr($field, 0, $field_wrap_boundary)) {
  384.                         $temp =~ s/^\s*//;
  385.                         $overflow_fields[$fcounter].=",$temp";
  386.                         $field=substr($field, $field_wrap_boundary,length($field));
  387.                     }
  388.                 }
  389.             } else {
  390.                 $overflow_fields[$fcounter]=$field;
  391.             }
  392.             $fcounter ++;
  393.         }
  394.  
  395.         #Build the extra lines that must be put back into @table using the
  396.         #@overflow_fields table
  397.         $fcounter=0;
  398.         my $keep_processing=1;
  399.         while ($keep_processing) {
  400.  
  401.             $keep_processing=0;
  402.  
  403.             my $counter = 0;
  404.             for (@overflow_fields) {
  405.  
  406.                 my $field, my $remainder;
  407.                 if( ($field, $remainder) = /^,(.*?)(,.*)$/) {
  408.                     my $a=1;
  409.                 } else {
  410.                     $field=$_;
  411.                     $field=~s/^,//;
  412.                 }
  413.                 if ($field ne "") { $keep_processing = 1; }
  414.  
  415.                 #Put any extra lines we make into the overflow buffer so that
  416.                 #that can be added into our return result.
  417.                 $overflow_buffer[$fcounter].=",$field";
  418.                 $overflow_fields[$counter]=$remainder;
  419.                 $counter++;
  420.             }
  421.             $fcounter++;
  422.         }
  423.  
  424.         #Put the contents of the overflow buffer into our return result
  425.         for $line (@overflow_buffer) {
  426.             #print "OB: $line\n";
  427.             $line =~ s/^,//;
  428.             if ($line !~ /^,*$/) {
  429.                 push @ret, $line;
  430.             } elsif ($wrapped_line_vpad==1) {
  431.                 push @ret, $line;
  432.             }
  433.         }
  434.  
  435.         $lcounter++;
  436.     }
  437.     return @ret;
  438.  
  439. }
  440.  
  441. sub print_usage() {
  442.  
  443. print <<END;
  444. -------------------------------------------------------------------------
  445. | Command/Option | Purpose                                              |
  446. |----------------|------------------------------------------------------|
  447. | tab            | Reads from stdin and tabulates comma seperated input |
  448. | tab <-t>       | Tabulates input and assumes first row are titles     |
  449. | tab <-h>       | Prints this help                                     |
  450. | tab <-nb>      | Tabulates without a border                           |
  451. | tab <-fw X>    | Wrap fields greater than X big don't break words     |
  452. | tab <-fs X>    | Wrap fields greater than X big and break words       |
  453. | tab <-vp X>    | Vertically pad table by X lines                      |
  454. | tab <-hp X>    | Horizontally pad fields by X chars                   |
  455. | tab <-b X>     | Tabulates with a border made from char X             |
  456. |----------------|------------------------------------------------------|
  457. | untab          | Reads from stdin and untabulates table input         |
  458. | untab <-b X>   | Untabulate a table with border char X                |
  459. | untab <-nb>    | Untabulate a borderless table                        |
  460. -------------------------------------------------------------------------
  461. END
  462.        
  463.  
  464. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement