Guest User

list_compare.pl

a guest
Dec 9th, 2012
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 3.61 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2. ########################################################################
  3. ####  This program is free software; you can redistribute it and/or modify
  4. ####  it under the terms of the GNU General Public License as published by
  5. ####  the Free Software Foundation; either version 3 of the License, or
  6. ####  (at your option) any later version.
  7. ####
  8. ####  This program is distributed in the hope that it will be useful,
  9. ####  but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ####  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11. ####  GNU General Public License for more details.
  12. ####
  13. ####  You should have received a copy of the GNU General Public License
  14. ####  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15. ####
  16. ####  If you don't understand what Free Software is, please read (or reread)
  17. ####  this page: http://www.gnu.org/philosophy/free-sw.html
  18. ########################################################################
  19. use strict;
  20. use Getopt::Std;
  21. my %opts;
  22. getopts('hvfcmdk:', \%opts);
  23. my $missing=$opts{m}||undef;
  24. my $column=$opts{k}||undef;
  25. my $common=$opts{c}||undef;
  26. my $verbose=$opts{v}||undef;
  27. my $fast=$opts{f}||undef;
  28. my $dupes=$opts{d}||undef;
  29. $missing=1 unless $common || $dupes;;
  30. &usage() unless $ARGV[1];
  31. &usage() if $opts{h};
  32. my (%found,%k,%fields);
  33. if ($column) {
  34.     die("The -k option only works in fast (-f) mode\n") unless $fast;
  35.     $column--; ## So I don't need to count from 0
  36. }
  37.  
  38. open(F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n");
  39. while(<F1>){
  40.     chomp;
  41.     if ($fast){
  42.     my @a=split(/\s+/,$_);
  43.     $k{$a[0]}++;   
  44.         $found{$a[0]}++;
  45.     }
  46.     else {
  47.     $k{$_}++;  
  48.         $found{$_}++;
  49.     }
  50. }
  51. close(F1);
  52. my $n=0;
  53. open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
  54. my $size=0;
  55. if($verbose){
  56.     while(<F2>){
  57.     $size++;
  58.     }
  59. }
  60. close(F2);
  61. open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
  62.  
  63. while(<F2>){
  64.     next if /^\s+$/;
  65.     $n++;
  66.     chomp;
  67.     print STDERR "." if $verbose && $n % 10==0;
  68.     print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0;
  69.     if($fast){
  70.     my @a=split(/\s+/,$_);
  71.     $k{$a[0]}++ if defined($k{$a[0]});
  72.     $fields{$a[0]}=\@a if $column;
  73.     }
  74.     else{
  75.     my @keys=keys(%k);
  76.     foreach my $key(keys(%found)){
  77.         if (/$key/){
  78.         $k{$key}++ ;
  79.         $found{$key}=undef unless $dupes;
  80.         }
  81.     }
  82.     }
  83. }
  84. close(F2);
  85. print STDERR "[$n of $size lines]\n" if $verbose;
  86. #$missing && do map{print "$_ : $k{$_}\n" }keys(%k);
  87. if ($column) {
  88.     $missing && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" unless $k{$_}>1}keys(%k);
  89.     $common &&  do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>1}keys(%k);
  90.     $dupes &&   do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>2}keys(%k);
  91. }
  92. else {
  93.     $missing && do map{print "$_\n" unless $k{$_}>1}keys(%k);
  94.     $common &&  do map{print "$_\n" if $k{$_}>1}keys(%k);
  95.     $dupes &&   do map{print "$_\n" if $k{$_}>2}keys(%k);
  96. }
  97. sub usage{
  98.     print STDERR <<EndOfHelp;
  99.  
  100.   USAGE: compare_lists.pl FILE1 FILE2
  101.  
  102.       This script will compare FILE1 and FILE2, searching for the
  103.       contents of FILE1 in FILE2 (and NOT vice-versa). FILE one must
  104.       be one search pattern per line, the search pattern need only be
  105.       contained within one of the lines of FILE2.
  106.  
  107.     OPTIONS:
  108.       -c : Print patterns COMMON to both files
  109.       -f : Search only the first characters of each line of FILE2
  110.       for the search patern given in FILE1
  111.       -d : Print duplicate entries    
  112.       -m : Print patterns MISSING in FILE2 (default)
  113.       -h : Print this help and exit
  114. EndOfHelp
  115.       exit(0);
  116. }
Advertisement
Add Comment
Please, Sign In to add comment