Advertisement
Guest User

list_compare.pl

a guest
Dec 9th, 2012
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl -w
  2. ########################################################################
  3. ####  This program is free software; you can redistribute it and/or modify
  4. ####  it under the terms of the GNU General Public License as published by
  5. ####  the Free Software Foundation; either version 3 of the License, or
  6. ####  (at your option) any later version.
  7. ####
  8. ####  This program is distributed in the hope that it will be useful,
  9. ####  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ####  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11. ####  GNU General Public License for more details.
  12. ####
  13. ####  You should have received a copy of the GNU General Public License
  14. ####  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15. ####
  16. ####  If you don't understand what Free Software is, please read (or reread)
  17. ####  this page: http://www.gnu.org/philosophy/free-sw.html
  18. ########################################################################
  19. use strict;
  20. use Getopt::Std;
  21. my %opts;
  22. getopts('hvfcmdk:', \%opts);
  23. my $missing=$opts{m}||undef;
  24. my $column=$opts{k}||undef;
  25. my $common=$opts{c}||undef;
  26. my $verbose=$opts{v}||undef;
  27. my $fast=$opts{f}||undef;
  28. my $dupes=$opts{d}||undef;
  29. $missing=1 unless $common || $dupes;;
  30. &usage() unless $ARGV[1];
  31. &usage() if $opts{h};
  32. my (%found,%k,%fields);
  33. if ($column) {
  34.     die("The -k option only works in fast (-f) mode\n") unless $fast;
  35.     $column--; ## So I don't need to count from 0
  36. }
  37.  
  38. open(F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n");
  39. while(<F1>){
  40.     chomp;
  41.     if ($fast){
  42.     my @a=split(/\s+/,$_);
  43.     $k{$a[0]}++;   
  44.         $found{$a[0]}++;
  45.     }
  46.     else {
  47.     $k{$_}++;  
  48.         $found{$_}++;
  49.     }
  50. }
  51. close(F1);
  52. my $n=0;
  53. open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
  54. my $size=0;
  55. if($verbose){
  56.     while(<F2>){
  57.     $size++;
  58.     }
  59. }
  60. close(F2);
  61. open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
  62.  
  63. while(<F2>){
  64.     next if /^\s+$/;
  65.     $n++;
  66.     chomp;
  67.     print STDERR "." if $verbose && $n % 10==0;
  68.     print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0;
  69.     if($fast){
  70.     my @a=split(/\s+/,$_);
  71.     $k{$a[0]}++ if defined($k{$a[0]});
  72.     $fields{$a[0]}=\@a if $column;
  73.     }
  74.     else{
  75.     my @keys=keys(%k);
  76.     foreach my $key(keys(%found)){
  77.         if (/$key/){
  78.         $k{$key}++ ;
  79.         $found{$key}=undef unless $dupes;
  80.         }
  81.     }
  82.     }
  83. }
  84. close(F2);
  85. print STDERR "[$n of $size lines]\n" if $verbose;
  86. #$missing && do map{print "$_ : $k{$_}\n" }keys(%k);
  87. if ($column) {
  88.     $missing && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" unless $k{$_}>1}keys(%k);
  89.     $common &&  do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>1}keys(%k);
  90.     $dupes &&   do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>2}keys(%k);
  91. }
  92. else {
  93.     $missing && do map{print "$_\n" unless $k{$_}>1}keys(%k);
  94.     $common &&  do map{print "$_\n" if $k{$_}>1}keys(%k);
  95.     $dupes &&   do map{print "$_\n" if $k{$_}>2}keys(%k);
  96. }
  97. sub usage{
  98.     print STDERR <<EndOfHelp;
  99.  
  100.   USAGE: compare_lists.pl FILE1 FILE2
  101.  
  102.       This script will compare FILE1 and FILE2, searching for the
  103.       contents of FILE1 in FILE2 (and NOT vice versa). FILE one must
  104.       be one search pattern per line, the search pattern need only be
  105.       contained within one of the lines of FILE2.
  106.  
  107.     OPTIONS:
  108.       -c : Print patterns COMMON to both files
  109.       -f : Search only the first characters of each line of FILE2
  110.       for the search pattern given in FILE1
  111.       -d : Print duplicate entries    
  112.       -m : Print patterns MISSING in FILE2 (default)
  113.       -h : Print this help and exit
  114. EndOfHelp
  115.       exit(0);
  116. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement