Untitled

#!/usr/bin/perl
#===============================================================================
#
#         FILE:  tab
#
#        USAGE:  "tab" or "untab"
#
#  DESCRIPTION:  This will turn comma seperated input from stdin into a text table.
#                It can also then convert it back if program is invoked as "untab".
#
#                As well as being used on the command line, tab/untab can be
#                used from within vi, and can work on tables that are commented
#                out with # or //.
#
#      OPTIONS:
#                -------------------------------------------------------------------------
#                | Command/Option | Purpose                                              |
#                |----------------|------------------------------------------------------|
#                | tab            | Reads from stdin and tabulates comma seperated input |
#                | tab <-t>       | Tabulates input and assumes first row are titles     |
#                | tab <-h>       | Prints this help                                     |
#                | tab <-nb>      | Tabulates without a border                           |
#                | tab <-fw X>    | Wrap fields greater than X big don't break words     |
#                | tab <-fs X>    | Wrap fields greater than X big and break words       |
#                | tab <-vp X>    | Vertically pad table by X lines                      |
#                | tab <-hp X>    | Horizontally pad fields by X chars                   |
#                | tab <-b X>     | Tabulates with a border made from char X             |
#                |----------------|------------------------------------------------------|
#                | untab          | Reads from stdin and untabulates table input         |
#                | untab <-b X>   | Untabulate a table with border char X                |
#                | untab <-nb>    | Untabulate a borderless table                        |
#                -------------------------------------------------------------------------
#
# REQUIREMENTS:  ---
#         BUGS:  ---
#        NOTES:  ---
#       AUTHOR:  Ben Staniford (BTS), <ben.staniford@g nospam mai l.com>
#      COMPANY:
#      VERSION:  1.0
#      CREATED:  23/08/07 11:53:19 BST
#     REVISION:  ---
#===============================================================================

# TODO
# 1. Make tab and untab keep existing indentation including inside comments
# 2. Store the comment regexp only once in a global variable
# 3. Allow facility to set the delimiter on the command line

use strict;
use warnings;

#Default values (Normally set from cmd line)
my $HPADDING=1;                    #How much horizontal padding
my $VPADDING=0;                    #How much vertical padding
my $VBORDER="|";                   #What is our vertical border?
my $HBORDER="-";                   #What is our horizontal border/divider?
my $wrapped_line_vpad=1;           #Should we vertically pad fields that have been word wrapped?
my $break_words_on_wrap=1;         #Should we break words when wrapping
my $field_wrap_boundary=0;         #How big should a field be before we wrap it?

#Globals
my @max_field_sizes=();
my $max_col_count=0;
my $comment_char="";
my $titles=0;

#Funcs
sub tabulate();
sub get_fields($);
sub print_header_footer();
sub add_padding($);
sub untabulate();
sub add_field_empty_space($$);
sub print_usage();
sub wrap_oversized_fields(@);
sub print_table_divider($);

#No STDERR under any circumstances
open (STDERR, ">/dev/null");

#Arguments
my $args = join ' ', @ARGV;
if ($args =~ /-t/)          { $titles=1; }
if ($args =~ /-nb/)         { $VBORDER=""; $HBORDER=""; $HPADDING=2;}
if ($args =~ /-b\s+(\S)/)   { $VBORDER=$1; $HBORDER=$1; $HPADDING=1;}
if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;}
if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;}
if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1}
if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1}
elsif ($args =~ /-h/)       { print_usage(); exit 0; }

#If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior
#Note, untab uses most of the same program arguments above.
chomp(my $PROGRAM_NAME = `basename $0`);
if ($PROGRAM_NAME eq "untab") {
    untabulate();
} else {
    tabulate();
}
exit 0;

# ------------------------------------------------------------------
# | Name         | Purpose                            | Parameters |
# |--------------|------------------------------------|------------|
# | tabulate()   | Main function that tabulates stdin |            |
# ------------------------------------------------------------------
sub tabulate() {

    #Step 1, load the data into a list
    my @table=<STDIN>;

    #Step 2, If we have field length restrictions, reorder the table as
    #needed.  Note, this can't be untabbed.
    if ($field_wrap_boundary != 0) {
        @table = wrap_oversized_fields(@table);
    }

    #Step 3, calculate the number of rows and columns from the input as well as the
    #maximum field size for each column.  Also, work out if this table is in a comment.
    for my $line (@table) {
        chomp $line;
        my @fields = get_fields($line);
        my $counter=0;

        #Work out if the data is inside a comment
        if ($counter==0 && $line=~/^(\#|\/\/)/) {
            $comment_char=$1;
        }

        $line =~ s/^$comment_char//;

        for my $field (@fields) {
            if (!defined $max_field_sizes[$counter]) {
                $max_field_sizes[$counter] = 0;
            }
            if ($max_field_sizes[$counter] < length($field)) {
                $max_field_sizes[$counter] = length($field);
            }
            $counter++;
        }

        if ($counter > $max_col_count) {
            $max_col_count=$counter;
        }
    }

    #Step 4, print out the table
    print_header_footer();
    my $lcounter=0;
    for my $line (@table) {
        chomp $line;
        my @fields = get_fields($line);

        if ($comment_char ne "") {
            print "$comment_char ";
        }

        if ($VBORDER ne "") {
            print $VBORDER.add_padding(" ");
        }
        $lcounter++;

        my $counter=0;
        #Print fields
        for my $field (@fields) {
            print "$field".add_field_empty_space(length($field), $counter).add_padding(" ");
            print $VBORDER.add_padding(" ");
            $counter++;
        }
        #Print any empty fields (if they exist)
        if ($counter < $max_col_count) {
            for (my $i=0;$i<($max_col_count - $counter);$i++) {
                print add_field_empty_space(0, $counter+$i).add_padding(" ");
                print $VBORDER.add_padding(" ");
            }
        }
        print "\n";
        if ($VPADDING==1) {
            print_table_divider(" ");
        }
        if ($titles && $lcounter==1) {
            #print_header_footer();
            print_table_divider($HBORDER);
        }
    }
    print_header_footer();
}

# -----------------------------------------------------------------------------
# | Name                  | Purpose                | Parameters               |
# |-----------------------|------------------------|--------------------------|
# | print_table_divider() | Print out a divider in | Character divider should |
# |                       | the table              | made from                |
# -----------------------------------------------------------------------------
sub print_table_divider($) {

    my $divider_char = shift;

    if ($divider_char eq $HBORDER && $HBORDER eq "") {
        return;
    }
    if ($comment_char ne "") {
        print "$comment_char ";
    }
    for my $size (@max_field_sizes) {
        print $VBORDER.add_padding($divider_char);
        for (my $i=0;$i<$size;$i++) {
            print $divider_char;
        }
        print "".add_padding($divider_char);
    }
    print $VBORDER."\n";
}


# ----------------------------------------------------------------------------
# | Name                   | Purpose                            | Parameters |
# ----------------------------------------------------------------------------
# | print_header_footer()  | Print out the tables header/footer |            |
# ----------------------------------------------------------------------------
sub print_header_footer() {

    my $divider_char = $HBORDER;

    if ($divider_char eq $HBORDER && $HBORDER eq "") {
        return;
    }
    if ($comment_char ne "") {
        print "$comment_char ";
    }
    for my $size (@max_field_sizes) {
        print $HBORDER.add_padding($divider_char);
        for (my $i=0;$i<$size;$i++) {
            print $divider_char;
        }
        print "".add_padding($divider_char);
    }
    print $HBORDER."\n";

}

# ------------------------------------------------------------------------------
# | Name                     | Purpose                    | Parameters         |
# ------------------------------------------------------------------------------
# | add_field_empty_space()  | Print out the field spacer | Field Length (int) |
# |                          |                            | Field Number (int) |
# ------------------------------------------------------------------------------
sub add_field_empty_space($$) {
    my $ret="";
    my $field_length=shift;
    my $field_number=shift;
    my $empty_space_size=$max_field_sizes[$field_number] - $field_length;
    for (my $i=0;$i<$empty_space_size;$i++) {
        $ret.=" ";
    }
    return $ret;
}

# -----------------------------------------------------------------------------
# | Name          | Purpose                      | Parameters                 |
# |---------------|------------------------------|----------------------------|
# | add_padding   | Print out the padding string | Padding character (string) |
# -----------------------------------------------------------------------------
sub add_padding($) {
    my $padding_char = shift;
    my $ret="";
    for (my $i=0;$i<$HPADDING;$i++) {
        $ret.=$padding_char;
    }
    return $ret;
}

# -----------------------------------------------------------------------------
# | Name         | Purpose                              | Parameters          |
# |--------------|--------------------------------------|---------------------|
# | get_fields   | Extract a list of fields from a line | Input line (string) |
# -----------------------------------------------------------------------------
sub get_fields($) {
    my $line=shift;
    my @fields = split ',',$line;
    my @ret=();
    for my $field (@fields) {
        $field =~ s/^\s*//;
        $field =~ s/\s*$//;
        push @ret, $field;
    }
    return @ret;
}

# -----------------------------------------------------------------------------------
# | Name           | Purpose                                           | Parameters |
# |----------------|---------------------------------------------------|------------|
# | untabulate()   | Perform the inverse function and untabulate stdin |            |
# -----------------------------------------------------------------------------------
sub untabulate() {

    my $counter=0;
    while (<STDIN>) {

        chomp;

        #Work out if the data is inside a comment
        if ($counter==0 && $_=~/^(\#|\/\/)/) {
            $comment_char=$1;
        }

        #Handle a borderless table specifically
        if ($HBORDER eq "" && $VBORDER eq "") {
            s/\s{2,200}/,/g;
            s/,$//;
        #This is a table with a border
        } else {
            my $hb_regexp="\\$HBORDER";
            my $vb_regexp="\\$VBORDER";
            s/^$hb_regexp*$//g;
            s/^(?:$hb_regexp|$vb_regexp)*$//;
            s/^$comment_char\s*$hb_regexp*$//g;
            s/\s*$vb_regexp\s*/,/g;
        }

        s/^$comment_char\,/$comment_char/;
        for (my $i=0;$i<20;$i++) {
            s/^\,//;
            s/\,$//;
        }

        s/,/, /g;   #If you want spaces as the default after commas
        if ($_ !~ /$comment_char\s/) {
            s/$comment_char/$comment_char /;
        }
        if ($_ !~ /^\s*$/) {
            print "$_\n";
        }
        $counter++;
    }

}

# ----------------------------------------------------------------------------------------
# | Name                      | Purpose                                  | Parameters    |
# |---------------------------|------------------------------------------|---------------|
# | wrap_oversized_fields()   | Wrap fields that are more than specified | List of lines |
# |                           | size.  This works by rewriting the       |               |
# |                           | comma seperated data so that extra lines |               |
# |                           | are made.  For this reason this          |               |
# |                           | function cannot easily be undone by "    |               |
# |                           | untab"                                   |               |
# ----------------------------------------------------------------------------------------
sub wrap_oversized_fields(@) {

    my @table=@_;
    my @ret;

    #Go through each line in the table
    for my $line (@table) {
        my @overflow_buffer=();
        chomp $line;
        my $lcounter=0;

        #Work out if the data is inside a comment
        if ($lcounter==0 && $line=~/^(\#|\/\/)/) {
            $comment_char=$1;
        }

        $line =~ s/^$comment_char//;

        my @fields = get_fields($line);
        my @overflow_fields = ();
        my $fcounter = 0;

        #Go through fields in each line looking for fields that must be split
        for my $field (@fields) {
            if (length($field) > $field_wrap_boundary) {
                my $temp;
                #Wrap and preserve words
                if ($break_words_on_wrap) {
                    while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) {
                        $overflow_fields[$fcounter].=",$1";
                        $field=$';
                    }
                #Wrap and split words
                } else {
                    while ($temp = substr($field, 0, $field_wrap_boundary)) {
                        $temp =~ s/^\s*//;
                        $overflow_fields[$fcounter].=",$temp";
                        $field=substr($field, $field_wrap_boundary,length($field));
                    }
                }
            } else {
                $overflow_fields[$fcounter]=$field;
            }
            $fcounter ++;
        }

        #Build the extra lines that must be put back into @table using the
        #@overflow_fields table
        $fcounter=0;
        my $keep_processing=1;
        while ($keep_processing) {

            $keep_processing=0;

            my $counter = 0;
            for (@overflow_fields) {

                my $field, my $remainder;
                if( ($field, $remainder) = /^,(.*?)(,.*)$/) {
                    my $a=1;
                } else {
                    $field=$_;
                    $field=~s/^,//;
                }
                if ($field ne "") { $keep_processing = 1; }

                #Put any extra lines we make into the overflow buffer so that
                #that can be added into our return result.
                $overflow_buffer[$fcounter].=",$field";
                $overflow_fields[$counter]=$remainder;
                $counter++;
            }
            $fcounter++;
        }

        #Put the contents of the overflow buffer into our return result
        for $line (@overflow_buffer) {
            #print "OB: $line\n";
            $line =~ s/^,//;
            if ($line !~ /^,*$/) {
                push @ret, $line;
            } elsif ($wrapped_line_vpad==1) {
                push @ret, $line;
            }
        }

        $lcounter++;
    }
    return @ret;

}

sub print_usage() {

print <<END;
-------------------------------------------------------------------------
| Command/Option | Purpose                                              |
|----------------|------------------------------------------------------|
| tab            | Reads from stdin and tabulates comma seperated input |
| tab <-t>       | Tabulates input and assumes first row are titles     |
| tab <-h>       | Prints this help                                     |
| tab <-nb>      | Tabulates without a border                           |
| tab <-fw X>    | Wrap fields greater than X big don't break words     |
| tab <-fs X>    | Wrap fields greater than X big and break words       |
| tab <-vp X>    | Vertically pad table by X lines                      |
| tab <-hp X>    | Horizontally pad fields by X chars                   |
| tab <-b X>     | Tabulates with a border made from char X             |
|----------------|------------------------------------------------------|
| untab          | Reads from stdin and untabulates table input         |
| untab <-b X>   | Untabulate a table with border char X                |
| untab <-nb>    | Untabulate a borderless table                        |
-------------------------------------------------------------------------
END


}