terdon

Corrected version of duHTMLtree

Mar 22nd, 2013
460
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 10.65 KB | None | 0 0
  1. #!/usr/bin/perl
  2. ## This script is NOT my work, I have simply corrected a bug in a script I downloaded from here:
  3. ##  http://hints.macworld.com/article.php?story=20040923034351201
  4. ##
  5. ## The only change I made was to add this line (line 329) to strip trailing slashes (/):
  6. ##
  7. ##    $itm=~s/\/$//;
  8.  
  9.  
  10. use strict;
  11. use File::stat;
  12. use Getopt::Std;
  13. use POSIX;
  14.  
  15. # duHTMLtree - Can be used as a CGI or called from the command line
  16. #  
  17. my $VERSION='1.0.3';
  18.  
  19. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  20. # Start of configuration
  21. #   edit these as you see fit
  22. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  23.  
  24. #---------------------------
  25. # IP ACCESS CONTROL
  26. #---------------------------
  27.  
  28.    # What remote IPs can use the CGI version . This is a REGULAR EXPRESSION
  29.    # so don't forget to escape . and it's probably good to bind it with ^
  30.  
  31. my $OK_REMOTE_IP = '^(192\.168\.0\.|127\.0\.0\.1)'
  32.   if (exists($ENV{'GATEWAY_INTERFACE'}));
  33.  
  34. my @dusk_files;
  35. my @dusk_names;
  36.  
  37. #---------------------------
  38. # INPUT FILE CHOICES
  39. #---------------------------
  40.  
  41.     # There's two ways to populate the choices for file lists in CGI mode:
  42.  
  43.     # 1. The unsafe but easy way it to scan some directory for files matching
  44.     # a particular regexp and use those (don't forget the trailing / on
  45.     # SCAN_DIR and remember that SCAN_MATCH is a regexp!
  46.  
  47. my $SCAN_FILESYSTEM=1 if (exists($ENV{'GATEWAY_INTERFACE'}));
  48. my $SCAN_DIR='/tmp/disktree/' if (exists($ENV{'GATEWAY_INTERFACE'}));
  49. my $SCAN_MATCH='^duk_' if (exists($ENV{'GATEWAY_INTERFACE'}));
  50.  
  51.     # 2. The safe way is to explicitly set the two arrays - one contains filenames
  52.     # and the other printable names (comment out the above two lines as well):
  53.  
  54. # my $SCAN_FILESYSTEM=0;
  55. # @dusk_files = qw (/Users/msells/bin/dutr/duk_Ayyyeee.txt /Users/msells/bin/dutr/duk_gehenna.txt);
  56. # @dusk_names= qw (Ayyyeee Gehenna);
  57.  
  58. #---------------------------
  59. # SIZE CHOICES
  60. #---------------------------
  61.  
  62.     # These are the sizes shown in the left column when in CGI mode
  63.  
  64. my @size_choices = (
  65. #    '100k','500k','1M','5M','10M',
  66.     '50M','100M','200M','300M','500M','650M',
  67.     '1G','2G','3G','5G','10G')
  68. if (exists($ENV{'GATEWAY_INTERFACE'}));
  69.  
  70. #---------------------------
  71. # COLORIZATION CONFIG
  72. #---------------------------
  73. # NOTE: This is used in command line mode as well as CGI mode.
  74.  
  75.     # This controls how we colorize our nodes -- largest size must be listed
  76.     # first in each string!
  77.  
  78.     # you can use #F0F8FF style colors or W3C HTML 4.0 standard also defines
  79.     # 16 colors by name:
  80.     #    aqua, black, blue, fuchsia, gray, green, lime, maroon, navy,
  81.     #    olive, purple, red, silver, teal, white, and yellow
  82.  
  83. my %colorstrings = (
  84.     '1' => "5G red,2G green,1G purple,500M black,300M aqua",
  85.     '2' => "3G red,2G green,1G purple,500M black,200M aqua",
  86.     '3' => "100G red,20G green,10G purple,5G black,500M aqua",
  87.     '4' => "400M red,200M green,100M purple,50M black,5M aqua",
  88.     '5' => "10G red,5G green,2G purple,1G black,500M aqua",
  89. );
  90.  
  91. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  92. # End of configuration
  93. #   Nothing below here should require editing
  94. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  95.  
  96. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  97. # CGI Mode Handling
  98. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  99.  
  100. if (exists($ENV{'GATEWAY_INTERFACE'})) {
  101.  
  102. if ( (exists($ENV{'GATEWAY_INTERFACE'})) && ($SCAN_FILESYSTEM) ) {
  103.    opendir(DIR, $SCAN_DIR) || die "can't opendir $SCAN_DIR : $!";
  104.    my @goodones = grep { /$SCAN_MATCH/ } readdir(DIR);
  105.    closedir DIR;
  106.    foreach my $f (@goodones) {
  107.      push (@dusk_files,"$SCAN_DIR" . $f);
  108.      my $name=$f;
  109.      # strip off leading duk_ and trailing .txt to make a nicer name
  110.      $name =~ s#^duk_##;
  111.      $name =~ s#\.txt$##;
  112.      push (@dusk_names,$name);
  113.    }
  114. }
  115.  
  116.  
  117. my $qs=$ENV{'QUERY_STRING'};
  118. my $script_uri = $ENV{'SCRIPT_URL'};
  119.  
  120. my $cgi_size = ($qs =~ m#size=([\dkmgKMG]+)#) ? "$1" : "100M";
  121. my $cgi_color = ($qs =~ m#color=(\d+)#) ? "$1" : "1";
  122. my $cgi_title = ($qs =~ m#title=([^&]+)#) ? "$1" : "Disk Usage Tree";
  123. my $cgi_name = ($qs =~ m#name=([^&]+)#) ? "$1" : "Disk Usage Tree";
  124. my $cgi_file = ($qs =~ m#file=(\d+)#) ? "$1" : "-1";
  125.  
  126. $cgi_title = "Disk Usage Tree for " . @dusk_names[$cgi_file-1]
  127.   if ( ($cgi_file > 0) && ($cgi_file-1 <= $#dusk_files) );
  128.  
  129. print "Content-type: text/html\n\n<html><title>$cgi_title</title>\n";
  130. cgi_die ("I don't like your IP") if ($ENV{'REMOTE_ADDR'} !~ m#$OK_REMOTE_IP#);
  131.  
  132.  
  133. print "<table border=5><tr><th>Show Size</th><th>Color Scheme</th><th>Which File</th></tr>\n<tr>\n";
  134.  
  135. print "<td>\n";
  136. for my $s (@size_choices) {
  137.   my $link = self_query('size',$s);
  138.   print ( ($cgi_size ne $s) ?
  139.       qq!<a href="$link">$s</a><br>\n! :
  140.       qq!$s<br>\n!);
  141. }
  142.  
  143. print "</td>\n<td>\n";
  144.  
  145. for my $c (sort keys %colorstrings) {
  146.   my $link = self_query('color',$c);
  147.   my $htmlstring='';
  148.   for my $itm (split(/,/,$colorstrings{$c})) {
  149.     my ($size,$color) = split(/\s+/,$itm);
  150.     $htmlstring .= "<font color=$color>$size</font> ";
  151.   }
  152.   print ( ($cgi_color != $c) ?
  153.       qq!<a href="$link">Colorset $c</a> - $htmlstring<br>\n! :
  154.       qq!Colorset $c - $htmlstring<br>\n!);
  155. }
  156. print "</td>\n<td>\n";
  157.  
  158. for my $i (1 .. $#dusk_files+1) {
  159.   my $link = self_query('file',$i);
  160.   my $name=@dusk_names[$i-1];
  161.   print ( ($cgi_file != $i) ?
  162.     qq!<a href="$link">$name</a><br>\n! :
  163.     qq!$name<br>\n!);
  164. }
  165. print "</td></tr>\n</table>\n<br>\n";
  166.  
  167. @ARGV=();
  168. push (@ARGV,"-b","-s$cgi_size", "-c$cgi_color", "-t$cgi_title");
  169.  
  170. if ( ($cgi_file > 0) && ($cgi_file-1 <= $#dusk_files) ) {
  171.   push (@ARGV,"-n Disk Usage Tree for @dusk_names[$cgi_file-1]");
  172.   push (@ARGV,@dusk_files[$cgi_file-1]);
  173. } else {
  174.   cgi_die ("Please pick which file you want a tree for!");
  175. }
  176. print "<br><hr width=85%>\n";
  177. }
  178.  
  179. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  180. # End of CGI mode handling
  181. #:::::::::::::::::::::::::::::::::::::::::::::::::::::::
  182.  
  183. # output document template
  184. #
  185. my $template = <<'EOM';
  186.     <h3><a name="_LINKNAME_">_NAME_</a></h3>
  187.     <ul>
  188.     <li>Data file last updated: _TIME_
  189.     <li>Total size: _ROOT_
  190.     <li>Showing items larger than: <b>_MINSIZE_</b>
  191.     </ul>
  192.     <pre>_BODY_</pre>
  193. EOM
  194.  
  195. # init()
  196.  
  197. # Handle command line options
  198. #
  199. my %opts;
  200. getopts('c:s:l:n:t:pbh', \%opts);
  201.  
  202. if ( (defined($opts{h})) || ($#ARGV <0) ) {
  203.  
  204. print <<"EOM";
  205.     $0 [options] inputfiles
  206.        -sSIZE   Minimum size we care about
  207.        -tTITLE  Title for the HTML document
  208.        -b       Output body only
  209.        -nNAME   Name for heading
  210.        -cSTRING Colorstring or just 1,2,3,4 for built in ones
  211.        -lLINK   Name for <a name> tag
  212.        -p       Do not colorize output (plain)
  213. EOM
  214.     exit(0);
  215. }
  216.  
  217. my $doctitle = defined($opts{t}) ? $opts{t} : "Disk Usage Tree";
  218. my $name =     defined($opts{n}) ? $opts{n} : "disktree";
  219. my $link =     defined($opts{l}) ? $opts{l} : "disktree";
  220.  
  221. my $bigger=0;
  222. $opts{s}  = "100M" if (!defined($opts{s}));
  223. $bigger = sizearg($opts{s});
  224.  
  225. $opts{c} = '1' if (!defined($opts{c}));
  226.  
  227. my @colors = split(/,/,$colorstrings{$opts{c}});
  228. for my $i (0 .. $#colors) { $colors[$i] =~ s/^(\S+)\s/sizearg($1) . "\t"/e; }
  229.  
  230. # main()
  231. #
  232.  
  233. print "<html><head><title>$doctitle</title></head><body bgcolor=\"white\">\n<br>\n" if ( ! defined($opts{b}) );
  234.  
  235. for my $fn (@ARGV) {
  236.     my $shortname = $name; $shortname =~ s/.*duk_//; $shortname =~ s/\....$//;
  237.     $name = $shortname if (! defined($opts{n}));
  238.     $link = $shortname if (! defined($opts{l}));
  239.  
  240.     my ($rootitm, $ptr_sizes, $ptr_subdirs) = readdf($fn);
  241.     my %itmsize = %{$ptr_sizes};
  242.     my %subdirs = %{$ptr_subdirs};
  243.    
  244.     my $body = treeoutput($rootitm,$ptr_sizes,$ptr_subdirs);
  245.     my $sb = stat($fn);
  246.     my $timestring = strftime '%A %d %B %Y at %H:%M:%S', localtime $sb->mtime;
  247.  
  248.     for (my $output = $template) {
  249.         s/_LINKNAME_/$link/gm;
  250.         s/_BODY_/$body/gm;
  251.         s/_NAME_/$name/gm;
  252.         s/_TIME_/$timestring/gm;
  253.         s/_MINSIZE_/nicesize($bigger)/gme;
  254.         s/_ROOT_/nicesize($itmsize{$rootitm})/gme;
  255.         print $_;
  256.     }
  257.  
  258. }
  259. print "</html>\n" if ( ! defined($opts{b}) );
  260. exit;
  261.  
  262.  
  263. ###################################
  264. # SUBROUTINES BELOW HERE
  265.  
  266. sub colorize {
  267.     my ($line,$size)=@_;
  268.     return $line if defined($opts{p});
  269.     for my $color (@colors) {
  270.         my ($minsize,$code) = split(/\t/,$color);
  271.         return qq!<font color="$code">$line</font>! if ($size >= $minsize);
  272.     }
  273.     return $line;
  274. }
  275.  
  276. sub nicesize {
  277.     my($size) = @_;
  278.     return sprintf("%.2fG",$size / (2**20) ) if ($size >= (2**20));
  279.     return sprintf("%liM",$size /  (2**10) ) if ($size >= (2**10));
  280.     return sprintf("%lik",$size );
  281. }
  282.  
  283. sub sizearg {
  284.     my($arg) = @_;
  285.     my %suffixes = ( '' => 1, 'k' => 1, 'm' => 2**10, 'g' => 2**20 );
  286.  
  287.     return ($arg =~ /(\d+)([kmgMGK])?/) ? ($1 * $suffixes{lc($2)}) : $arg;
  288. }
  289.  
  290. sub readdf {
  291.     my %itmsize;
  292.     my %subdirs;
  293.     my $rootitm;
  294.     my ($fn) = @_;
  295.  
  296.     open FILE,"<$fn" || die "Can't open $fn for reading\n";
  297.     while (<FILE>) {
  298.         chop;
  299.         my($size, $parent);
  300.         if (m#^(\d+)\s+(.*)#) { $size=$1; $rootitm=$2; }
  301.         $itmsize{$rootitm} = $size;
  302.         ($parent = $rootitm) =~ s#/[^/]+$##;
  303.         push @{ $subdirs{$parent} }, $rootitm unless eof;
  304.  
  305.         if ( ($size) &&  ($subdirs{$rootitm}) ) {
  306.                 my $subsize;
  307.                 for my $kid (@{ $subdirs{$rootitm} }) { $subsize += $itmsize{$kid}; }
  308.                 if ( ($subsize != $size) && ($subsize) ) {
  309.                     $itmsize{"$rootitm/."} = ($size - $subsize);
  310.                     push @{ $subdirs{$rootitm} }, "$rootitm/.";
  311.                 }
  312.         }
  313.     }
  314.     close FILE;
  315.     return ($rootitm,\%itmsize,\%subdirs);
  316. }
  317.  
  318. sub treeoutput {
  319.     my ($rootitem,$ptr_sizes,$ptr_subdirs) = @_;
  320.     my %itmsize = %{$ptr_sizes};
  321.     my %subdirs = %{$ptr_subdirs};
  322.  
  323.     my @worklist;
  324.     my @prefixes;
  325.     my $output_buffer;
  326.     push (@worklist,$rootitem);
  327.  
  328.     while (my $itm=pop(@worklist)) {
  329.         $itm=~s/\/$//;
  330.         if ($itm eq "\t") { shift(@prefixes); next; }
  331.         my $prefix=$prefixes[0];
  332.         my $path = $itm;
  333.         $path =~ s#.*/##;
  334.         my $size = $itmsize{$itm};
  335.         my $line = sprintf("%s %s", nicesize($size), $path);
  336.         my $html = colorize($line,$size);
  337.         $output_buffer .= $prefix . $html . "\n" if ($size > $bigger);
  338.         if ($subdirs{$itm}) {
  339.             my @subdirs = @{ $subdirs{$itm} };
  340.             @subdirs = sort { $itmsize{$a} <=> $itmsize{$b} } @subdirs;
  341.             $itmsize{$subdirs[0]} =~ /(\d+)/;
  342.  
  343.             push (@worklist, "\t");
  344.             for ($prefix .= $line) { s/\d[kMG] /| /; s/[^|]/ /g; }
  345.             unshift(@prefixes, $prefix);
  346.  
  347.             for my $kid (@subdirs) {
  348.                  push(@worklist, $kid) if ($itmsize{$kid} > $bigger);
  349.             }
  350.         }
  351.     }
  352.     return $output_buffer;
  353. }
  354.  
  355. sub cgi_die {
  356.   my ($string) = @_;
  357.   print "<h1>$string</h1></html>\n";
  358.   exit;
  359. }
  360.  
  361. sub self_query {
  362.   my ($param,$value) = @_;
  363.  
  364.   my $result=$ENV{'QUERY_STRING'};
  365.   # remove the param
  366.   $result =~ s#([\?&])?$param=[^&]*##;
  367.   $result .= "&$param=$value";
  368.   return $ENV{'SCRIPT_URL'} . '?' . $result;
  369. }
Add Comment
Please, Sign In to add comment