Hashim

Untitled

Feb 1st, 2017
201
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 8.81 KB | None | 0 0
  1. #!/usr/bin/perl
  2. #
  3. # wyd.pl by Max Moser and Martin J. Muench
  4. #
  5. #  [ Licence ]
  6. #
  7. #  This program is free software; you can redistribute it and/or modify
  8. #  it under the terms of the GNU General Public License as published by
  9. #  the Free Software Foundation; either version 2 of the License, or
  10. #  any later version.
  11. #
  12. #  This program is distributed in the hope that it will be useful,
  13. #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15. #  GNU General Public License for more details.
  16. #
  17. #  You should have received a copy of the GNU General Public License
  18. #  along with this program; if not, write to the Free Software
  19. #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20. #
  21. #  See 'docs/gpl.txt' for more information.
  22.  
  23. use strict;
  24. use FileHandle;
  25. use File::Find;
  26. use File::Basename;
  27. use Getopt::Long;
  28.  
  29. my $version  = "0.2";  # version
  30.  
  31. my @listoffiles;       # The list of files to process
  32. my $fileprog = undef;  # scalar that is filled with 'file' program
  33.  
  34. # Module hash containing module name and supported file extensions
  35. # Multiple extensions are seperated using ';'
  36. my %wlgmods = (
  37.            'wlgmod::strings', '',           # only used with command-line switch
  38.            'wlgmod::plain'  , '.txt',       # used for all MIME text/plain as well
  39.            'wlgmod::html'   , '.html;.htm;.php;.php3;.php4',
  40.            'wlgmod::doc'    , '.doc',
  41.            'wlgmod::pdf'    , '.pdf',
  42.            'wlgmod::mp3'    , '.mp3',
  43.            'wlgmod::ppt'    , '.ppt',
  44.            'wlgmod::jpeg'   , '.jpeg;.jpg;.JPG;.JPEG',
  45.            'wlgmod::odt'    , '.odt;.ods;.odp'
  46.            );
  47.  
  48. # Hash that will be filled dynamically with filehandles (if -t is used)
  49. my %file_handle = ();
  50.  
  51. #### Begin main ####
  52.  
  53. # Print Header
  54. print STDERR "\n*\n* $0 $version by Max Moser and Martin J. Muench\n*\n\n";
  55.  
  56. # Check command line options
  57. my %opts;
  58. my $strings_check  = undef;
  59. my $output_file    = undef;
  60. my $separate_types = undef;
  61. my $no_filenames   = undef;
  62. my $debug          = undef;      # set to "1" for debugprints -v will do this on command line
  63. my $prefixclean    = undef;
  64. my $postfixclean   = undef;
  65. my $no_missingask  = undef;
  66.  
  67. # Parse command line
  68. &usage if !GetOptions ('s=i' => \$strings_check,
  69.                'o=s' => \$output_file,
  70.                'v+'  => \$debug,
  71.                'e+'  => \$postfixclean,
  72.                'b+'  => \$prefixclean ,
  73.                't+'  => \$separate_types,
  74.                'f+'  => \$no_filenames,
  75.                'n+'  => \$no_missingask);
  76.  
  77.  
  78. # -t used without -o
  79. &usage if($separate_types && !$output_file);
  80.  
  81. # No file(s)/dir(s) given
  82. &usage if($#ARGV < 0);
  83.  
  84. # Add given file(s)/directories to array
  85. for(my $i = 0 ; $i <= $#ARGV ; $i++) {
  86.     # File/Dir does not exist
  87.     if ( ! -e $ARGV[$i]) {
  88.     die "\nError, $ARGV[0] does not exist.\n\n";   
  89.     }
  90.     # Directory given
  91.     elsif ( -d $ARGV[$i])
  92.     {
  93.     # Its a directory so we first generate a list of all files with names
  94.     print "\n Its a directory \n" if $debug;
  95.  
  96.     $ARGV[$i] = qw(.) unless $ARGV[$i];
  97.  
  98.     find sub {
  99.         push @listoffiles, $File::Find::name if -f     
  100.     }, $ARGV[$i];
  101.    
  102.     }
  103.     # Single File
  104.     elsif (-f $ARGV[$i])  {
  105.     push @listoffiles, $ARGV[$i];
  106.     }
  107.     else {
  108.     die "\n* Error: $ARGV[$i] is not a directory and not a regular file.\n* Sorry, for now this is unsupported.\n\n";
  109.     }
  110.    
  111. }
  112.  
  113. print "\n\nThats the list to process: @listoffiles\n\n" if $debug;
  114.  
  115.  
  116. # Initialize modules
  117. if (!&check_n_init) {
  118.     die "\n* Processing aborted\n\n";
  119. }
  120.  
  121. # Open outputfile if requested
  122. if($output_file && !$separate_types) {
  123.     open(OUTPUT, ">$output_file") || die "\n* Cannot open output file: $!\n";
  124. }
  125. # Create output files for all types if requested
  126. elsif($output_file && $separate_types) {
  127.     foreach (keys %wlgmods) {
  128.     $_ =~ s/wlgmod:://;
  129.     my $fh =  new FileHandle "$output_file.$_", "w";
  130.     if(!$fh) {
  131.         die "\n* Cannot create $output_file.$_: $!\n";
  132.     }
  133.         $file_handle{$_} = $fh;
  134.     }
  135. }
  136.  
  137. # We progress now with processing the files and produce the output
  138. foreach my $singlefile (@listoffiles)
  139. {
  140.     # Get words using modules
  141.     my ($type, @words) = get_words($singlefile);
  142.  
  143.     # Print to given output (STDOUT || file)
  144.  
  145.     my $numentries = @words;
  146.     if($numentries > 0) {
  147.     print "---- Words in $singlefile -----\n\n"  if $debug;
  148.  
  149.     foreach my $wort (unique (\@words))
  150.     {
  151.         # Write to single file
  152.         if($output_file && !$separate_types) {
  153.         print OUTPUT "$wort\n";
  154.         }
  155.         # Write to type-specific output files
  156.         elsif($output_file && $separate_types) {
  157.         foreach(keys %file_handle) {
  158.             if($_ eq $type) {
  159.             my $fh = $file_handle{$_};     
  160.             print $fh "$wort\n";
  161.             }
  162.         }
  163.         }
  164.         else {
  165.         print "$wort\n";
  166.         }
  167.     }
  168.    
  169.     print "\n----- $singlefile -----\n" if $debug;
  170.     }
  171. }
  172.  
  173. # single out
  174. if($output_file && !$separate_types) {
  175.     close(OUTPUT);
  176. }
  177. # single file for each type
  178. elsif($output_file && $separate_types) {
  179.     foreach(keys %file_handle) {
  180.     my $fh = $file_handle{$_};
  181.     close($fh);
  182.     # remove empty files
  183.     my $file = "$output_file.$_";
  184.     unlink $file if -z $file;
  185.     }
  186. }
  187.  
  188.  
  189. print STDERR "\n** Done\n\n";
  190.  
  191. exit(0);
  192.  
  193. #### End of main ####
  194.  
  195. # Load needed plugin and extract words
  196. sub get_words {
  197.     my ($file)     = @_;
  198.     my $found      = 0;
  199.     my @words      = undef;
  200.     my $type       = undef;
  201.     my $file_name  = undef;
  202.     my $file_dir   = undef;
  203.     my $file_ext   = undef;
  204.  
  205.     ($file_name, $file_dir, $file_ext) = fileparse($file,'\..*');
  206.  
  207.     # Look for matching module and get words
  208.     foreach(keys %wlgmods) {
  209.     my @ext = split(";", $wlgmods{$_});
  210.     foreach my $extension (@ext) {
  211.         if($file_ext eq $extension) {
  212.         $type = $_;
  213.         @words = $_->get_words($file);
  214.         $found=1;
  215.         last;
  216.         }
  217.     }
  218.     }
  219.  
  220.     # If no module is found, do further checks
  221.     if(!$found) {
  222.     # Check MIME type, if ascii try plain-text module
  223.     open(FILE, "$fileprog -bi \"$file\"|") || die "Cannot execute file: $!\n";
  224.     my $type = <FILE>;
  225.     close(FILE);
  226.     if($type =~ m/^text\/plain/) {
  227.         print "'file' MIME check returned text/plain\n" if $debug;
  228.         $type = "wlgmod::plain";
  229.         @words = wlgmod::plain->get_words($file);
  230.     }
  231.     # Use strings module
  232.     elsif($strings_check) {
  233.         # Check if strings module available
  234.         foreach(keys %wlgmods) {
  235.         if($_ eq "wlgmod::strings") {
  236.             $type = "wlgmod::strings";
  237.             @words = wlgmod::strings->get_words($file,$strings_check);
  238.         }
  239.         }
  240.     }
  241.     # Give up and ignore file
  242.     else {
  243.         print STDERR "Ignoring file '$file'\n";
  244.         return (undef, undef);
  245.     }
  246.     }
  247.  
  248.     # Add filename itself to wordlist (without path/extension)
  249.     if(!$no_filenames) {
  250.     push @words, $file_name;
  251.     }
  252.  
  253.     # Remove brackets quotes etc.
  254.     my @Cleanedwords;
  255.     foreach (@words) {
  256.     s/^\W*(.*)/$1/  unless $prefixclean;
  257.     s/^(.*)\W+$/$1/ unless $postfixclean;      
  258.     push @Cleanedwords,$_;
  259.     }
  260.  
  261.     # Cleanup type for high-level func
  262.     $type =~ s/wlgmod:://;
  263.  
  264.     return ($type, @Cleanedwords);
  265.  
  266. } # End sub getwords
  267.  
  268. # Check modules for availability and init or remove them
  269. sub check_n_init {
  270.     my $retvals = undef;
  271.  
  272.     # Check for 'file'
  273.     open(FILE, "which file|");
  274.     chomp($fileprog = <FILE>);
  275.     close(FILE);
  276.     if($?) {
  277.     $fileprog = undef;
  278.     $retvals .= "file: Cannot locate 'file', skipping MIME type check on unknown files";
  279.     }
  280.  
  281.     # Initialize possible modules
  282.     foreach(keys %wlgmods) {
  283.     eval("use $_;");
  284.     my $ret = $_->init();
  285.     # If module failed, add errortext and remove from hash
  286.     if($ret) {
  287.         $retvals .= "$_: $ret\n";
  288.         delete $wlgmods{$_};
  289.         $ret = "";
  290.     }
  291.     }
  292.  
  293.     # If one or more modules failed, let user decide what to do
  294.     if($retvals) {
  295.     print STDERR "\n* Error initializing some modules:\n\n$retvals\n";
  296.     # prompt user what to do if not disabled
  297.     if(!$no_missingask) {
  298.         print STDERR "* Press enter to disable them and continue or STRG+C to abort\n";
  299.         <STDIN>;
  300.         }
  301.     }
  302.  
  303.     return 1;
  304. }
  305.  
  306.  
  307. # Make resulting list entries unique
  308. sub unique {
  309.     my $reflist = shift;
  310.     my @uniq    = undef;
  311.     my %seen    = ();
  312.     @uniq = grep { ! $seen{$_} ++ } @$reflist;
  313.     return @uniq;
  314. }
  315.  
  316. # print usage and exit
  317. sub usage {
  318. print qq~Usage: $0 [OPTIONS] <file(s)|directory>
  319.  
  320.        Options:
  321.  
  322.        -o <file>     = Write wordlist to <file>
  323.        -t            = Separate wordlist files by type, e.g. '<file>.doc'
  324.        -s <min-len>  = Use 'strings' for unsupported files
  325.        -b            = Disable removal of non-alpha chars at beginning of word
  326.        -e            = Disable removal of non-alpha chars at end of word
  327.        -f            = Disable inclusion of filenames in wordlist        
  328.        -v            = Show debug / verbose output
  329.        -n            = Continue even if programs / modules are missing
  330.  
  331. ~;
  332. exit(1);
  333. }
  334.  
  335. #### EOF #####
Add Comment
Please, Sign In to add comment