Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- #===============================================================================
- #
- # FILE: tab
- #
- # USAGE: "tab" or "untab"
- #
- # DESCRIPTION: This will turn comma seperated input from stdin into a text table.
- # It can also then convert it back if program is invoked as "untab".
- #
- # As well as being used on the command line, tab/untab can be
- # used from within vi, and can work on tables that are commented
- # out with # or //.
- #
- # OPTIONS:
- # -------------------------------------------------------------------------
- # | Command/Option | Purpose |
- # |----------------|------------------------------------------------------|
- # | tab | Reads from stdin and tabulates comma seperated input |
- # | tab <-t> | Tabulates input and assumes first row are titles |
- # | tab <-h> | Prints this help |
- # | tab <-nb> | Tabulates without a border |
- # | tab <-fw X> | Wrap fields greater than X big don't break words |
- # | tab <-fs X> | Wrap fields greater than X big and break words |
- # | tab <-vp X> | Vertically pad table by X lines |
- # | tab <-hp X> | Horizontally pad fields by X chars |
- # | tab <-b X> | Tabulates with a border made from char X |
- # |----------------|------------------------------------------------------|
- # | untab | Reads from stdin and untabulates table input |
- # | untab <-b X> | Untabulate a table with border char X |
- # | untab <-nb> | Untabulate a borderless table |
- # -------------------------------------------------------------------------
- #
- # REQUIREMENTS: ---
- # BUGS: ---
- # NOTES: ---
- # AUTHOR: Ben Staniford (BTS), <ben.staniford@g nospam mai l.com>
- # COMPANY:
- # VERSION: 1.0
- # CREATED: 23/08/07 11:53:19 BST
- # REVISION: ---
- #===============================================================================
- # TODO
- # 1. Make tab and untab keep existing indentation including inside comments
- # 2. Store the comment regexp only once in a global variable
- # 3. Allow facility to set the delimiter on the command line
- use strict;
- use warnings;
- #Default values (Normally set from cmd line)
- my $HPADDING=1; #How much horizontal padding
- my $VPADDING=0; #How much vertical padding
- my $VBORDER="|"; #What is our vertical border?
- my $HBORDER="-"; #What is our horizontal border/divider?
- my $wrapped_line_vpad=1; #Should we vertically pad fields that have been word wrapped?
- my $break_words_on_wrap=1; #Should we break words when wrapping
- my $field_wrap_boundary=0; #How big should a field be before we wrap it?
- #Globals
- my @max_field_sizes=();
- my $max_col_count=0;
- my $comment_char="";
- my $titles=0;
- #Funcs
- sub tabulate();
- sub get_fields($);
- sub print_header_footer();
- sub add_padding($);
- sub untabulate();
- sub add_field_empty_space($$);
- sub print_usage();
- sub wrap_oversized_fields(@);
- sub print_table_divider($);
- #No STDERR under any circumstances
- open (STDERR, ">/dev/null");
- #Arguments
- my $args = join ' ', @ARGV;
- if ($args =~ /-t/) { $titles=1; }
- if ($args =~ /-nb/) { $VBORDER=""; $HBORDER=""; $HPADDING=2;}
- if ($args =~ /-b\s+(\S)/) { $VBORDER=$1; $HBORDER=$1; $HPADDING=1;}
- if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;}
- if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;}
- if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1}
- if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1}
- elsif ($args =~ /-h/) { print_usage(); exit 0; }
- #If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior
- #Note, untab uses most of the same program arguments above.
- chomp(my $PROGRAM_NAME = `basename $0`);
- if ($PROGRAM_NAME eq "untab") {
- untabulate();
- } else {
- tabulate();
- }
- exit 0;
- # ------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |--------------|------------------------------------|------------|
- # | tabulate() | Main function that tabulates stdin | |
- # ------------------------------------------------------------------
- sub tabulate() {
- #Step 1, load the data into a list
- my @table=<STDIN>;
- #Step 2, If we have field length restrictions, reorder the table as
- #needed. Note, this can't be untabbed.
- if ($field_wrap_boundary != 0) {
- @table = wrap_oversized_fields(@table);
- }
- #Step 3, calculate the number of rows and columns from the input as well as the
- #maximum field size for each column. Also, work out if this table is in a comment.
- for my $line (@table) {
- chomp $line;
- my @fields = get_fields($line);
- my $counter=0;
- #Work out if the data is inside a comment
- if ($counter==0 && $line=~/^(\#|\/\/)/) {
- $comment_char=$1;
- }
- $line =~ s/^$comment_char//;
- for my $field (@fields) {
- if (!defined $max_field_sizes[$counter]) {
- $max_field_sizes[$counter] = 0;
- }
- if ($max_field_sizes[$counter] < length($field)) {
- $max_field_sizes[$counter] = length($field);
- }
- $counter++;
- }
- if ($counter > $max_col_count) {
- $max_col_count=$counter;
- }
- }
- #Step 4, print out the table
- print_header_footer();
- my $lcounter=0;
- for my $line (@table) {
- chomp $line;
- my @fields = get_fields($line);
- if ($comment_char ne "") {
- print "$comment_char ";
- }
- if ($VBORDER ne "") {
- print $VBORDER.add_padding(" ");
- }
- $lcounter++;
- my $counter=0;
- #Print fields
- for my $field (@fields) {
- print "$field".add_field_empty_space(length($field), $counter).add_padding(" ");
- print $VBORDER.add_padding(" ");
- $counter++;
- }
- #Print any empty fields (if they exist)
- if ($counter < $max_col_count) {
- for (my $i=0;$i<($max_col_count - $counter);$i++) {
- print add_field_empty_space(0, $counter+$i).add_padding(" ");
- print $VBORDER.add_padding(" ");
- }
- }
- print "\n";
- if ($VPADDING==1) {
- print_table_divider(" ");
- }
- if ($titles && $lcounter==1) {
- #print_header_footer();
- print_table_divider($HBORDER);
- }
- }
- print_header_footer();
- }
- # -----------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |-----------------------|------------------------|--------------------------|
- # | print_table_divider() | Print out a divider in | Character divider should |
- # | | the table | made from |
- # -----------------------------------------------------------------------------
- sub print_table_divider($) {
- my $divider_char = shift;
- if ($divider_char eq $HBORDER && $HBORDER eq "") {
- return;
- }
- if ($comment_char ne "") {
- print "$comment_char ";
- }
- for my $size (@max_field_sizes) {
- print $VBORDER.add_padding($divider_char);
- for (my $i=0;$i<$size;$i++) {
- print $divider_char;
- }
- print "".add_padding($divider_char);
- }
- print $VBORDER."\n";
- }
- # ----------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # ----------------------------------------------------------------------------
- # | print_header_footer() | Print out the tables header/footer | |
- # ----------------------------------------------------------------------------
- sub print_header_footer() {
- my $divider_char = $HBORDER;
- if ($divider_char eq $HBORDER && $HBORDER eq "") {
- return;
- }
- if ($comment_char ne "") {
- print "$comment_char ";
- }
- for my $size (@max_field_sizes) {
- print $HBORDER.add_padding($divider_char);
- for (my $i=0;$i<$size;$i++) {
- print $divider_char;
- }
- print "".add_padding($divider_char);
- }
- print $HBORDER."\n";
- }
- # ------------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # ------------------------------------------------------------------------------
- # | add_field_empty_space() | Print out the field spacer | Field Length (int) |
- # | | | Field Number (int) |
- # ------------------------------------------------------------------------------
- sub add_field_empty_space($$) {
- my $ret="";
- my $field_length=shift;
- my $field_number=shift;
- my $empty_space_size=$max_field_sizes[$field_number] - $field_length;
- for (my $i=0;$i<$empty_space_size;$i++) {
- $ret.=" ";
- }
- return $ret;
- }
- # -----------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |---------------|------------------------------|----------------------------|
- # | add_padding | Print out the padding string | Padding character (string) |
- # -----------------------------------------------------------------------------
- sub add_padding($) {
- my $padding_char = shift;
- my $ret="";
- for (my $i=0;$i<$HPADDING;$i++) {
- $ret.=$padding_char;
- }
- return $ret;
- }
- # -----------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |--------------|--------------------------------------|---------------------|
- # | get_fields | Extract a list of fields from a line | Input line (string) |
- # -----------------------------------------------------------------------------
- sub get_fields($) {
- my $line=shift;
- my @fields = split ',',$line;
- my @ret=();
- for my $field (@fields) {
- $field =~ s/^\s*//;
- $field =~ s/\s*$//;
- push @ret, $field;
- }
- return @ret;
- }
- # -----------------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |----------------|---------------------------------------------------|------------|
- # | untabulate() | Perform the inverse function and untabulate stdin | |
- # -----------------------------------------------------------------------------------
- sub untabulate() {
- my $counter=0;
- while (<STDIN>) {
- chomp;
- #Work out if the data is inside a comment
- if ($counter==0 && $_=~/^(\#|\/\/)/) {
- $comment_char=$1;
- }
- #Handle a borderless table specifically
- if ($HBORDER eq "" && $VBORDER eq "") {
- s/\s{2,200}/,/g;
- s/,$//;
- #This is a table with a border
- } else {
- my $hb_regexp="\\$HBORDER";
- my $vb_regexp="\\$VBORDER";
- s/^$hb_regexp*$//g;
- s/^(?:$hb_regexp|$vb_regexp)*$//;
- s/^$comment_char\s*$hb_regexp*$//g;
- s/\s*$vb_regexp\s*/,/g;
- }
- s/^$comment_char\,/$comment_char/;
- for (my $i=0;$i<20;$i++) {
- s/^\,//;
- s/\,$//;
- }
- s/,/, /g; #If you want spaces as the default after commas
- if ($_ !~ /$comment_char\s/) {
- s/$comment_char/$comment_char /;
- }
- if ($_ !~ /^\s*$/) {
- print "$_\n";
- }
- $counter++;
- }
- }
- # ----------------------------------------------------------------------------------------
- # | Name | Purpose | Parameters |
- # |---------------------------|------------------------------------------|---------------|
- # | wrap_oversized_fields() | Wrap fields that are more than specified | List of lines |
- # | | size. This works by rewriting the | |
- # | | comma seperated data so that extra lines | |
- # | | are made. For this reason this | |
- # | | function cannot easily be undone by " | |
- # | | untab" | |
- # ----------------------------------------------------------------------------------------
- sub wrap_oversized_fields(@) {
- my @table=@_;
- my @ret;
- #Go through each line in the table
- for my $line (@table) {
- my @overflow_buffer=();
- chomp $line;
- my $lcounter=0;
- #Work out if the data is inside a comment
- if ($lcounter==0 && $line=~/^(\#|\/\/)/) {
- $comment_char=$1;
- }
- $line =~ s/^$comment_char//;
- my @fields = get_fields($line);
- my @overflow_fields = ();
- my $fcounter = 0;
- #Go through fields in each line looking for fields that must be split
- for my $field (@fields) {
- if (length($field) > $field_wrap_boundary) {
- my $temp;
- #Wrap and preserve words
- if ($break_words_on_wrap) {
- while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) {
- $overflow_fields[$fcounter].=",$1";
- $field=$';
- }
- #Wrap and split words
- } else {
- while ($temp = substr($field, 0, $field_wrap_boundary)) {
- $temp =~ s/^\s*//;
- $overflow_fields[$fcounter].=",$temp";
- $field=substr($field, $field_wrap_boundary,length($field));
- }
- }
- } else {
- $overflow_fields[$fcounter]=$field;
- }
- $fcounter ++;
- }
- #Build the extra lines that must be put back into @table using the
- #@overflow_fields table
- $fcounter=0;
- my $keep_processing=1;
- while ($keep_processing) {
- $keep_processing=0;
- my $counter = 0;
- for (@overflow_fields) {
- my $field, my $remainder;
- if( ($field, $remainder) = /^,(.*?)(,.*)$/) {
- my $a=1;
- } else {
- $field=$_;
- $field=~s/^,//;
- }
- if ($field ne "") { $keep_processing = 1; }
- #Put any extra lines we make into the overflow buffer so that
- #that can be added into our return result.
- $overflow_buffer[$fcounter].=",$field";
- $overflow_fields[$counter]=$remainder;
- $counter++;
- }
- $fcounter++;
- }
- #Put the contents of the overflow buffer into our return result
- for $line (@overflow_buffer) {
- #print "OB: $line\n";
- $line =~ s/^,//;
- if ($line !~ /^,*$/) {
- push @ret, $line;
- } elsif ($wrapped_line_vpad==1) {
- push @ret, $line;
- }
- }
- $lcounter++;
- }
- return @ret;
- }
- sub print_usage() {
- print <<END;
- -------------------------------------------------------------------------
- | Command/Option | Purpose |
- |----------------|------------------------------------------------------|
- | tab | Reads from stdin and tabulates comma seperated input |
- | tab <-t> | Tabulates input and assumes first row are titles |
- | tab <-h> | Prints this help |
- | tab <-nb> | Tabulates without a border |
- | tab <-fw X> | Wrap fields greater than X big don't break words |
- | tab <-fs X> | Wrap fields greater than X big and break words |
- | tab <-vp X> | Vertically pad table by X lines |
- | tab <-hp X> | Horizontally pad fields by X chars |
- | tab <-b X> | Tabulates with a border made from char X |
- |----------------|------------------------------------------------------|
- | untab | Reads from stdin and untabulates table input |
- | untab <-b X> | Untabulate a table with border char X |
- | untab <-nb> | Untabulate a borderless table |
- -------------------------------------------------------------------------
- END
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement