View difference between Paste ID: iV6kzCHn and HW5vYfCJ
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/perl -w
2
########################################################################
3
####  This program is free software; you can redistribute it and/or modify
4
####  it under the terms of the GNU General Public License as published by
5
####  the Free Software Foundation; either version 3 of the License, or
6
####  (at your option) any later version.
7
####
8
####  This program is distributed in the hope that it will be useful,
9
####  but WITHOUT ANY WARRANTY; without even the implied warranty of
10
####  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
####  GNU General Public License for more details.
12
####
13
####  You should have received a copy of the GNU General Public License
14
####  along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
####
16
####  If you don't understand what Free Software is, please read (or reread)
17
####  this page: http://www.gnu.org/philosophy/free-sw.html
18
########################################################################
19
use strict;
20
use Getopt::Std;
21
my %opts;
22
getopts('hvfcmdk:', \%opts);
23
my $missing=$opts{m}||undef;
24
my $column=$opts{k}||undef;
25
my $common=$opts{c}||undef;
26
my $verbose=$opts{v}||undef;
27
my $fast=$opts{f}||undef;
28
my $dupes=$opts{d}||undef;
29
$missing=1 unless $common || $dupes;;
30
&usage() unless $ARGV[1];
31
&usage() if $opts{h};
32
my (%found,%k,%fields);
33
if ($column) {
34
    die("The -k option only works in fast (-f) mode\n") unless $fast;
35
    $column--; ## So I don't need to count from 0
36
}
37
38
open(F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n");
39
while(<F1>){
40
    chomp;
41
    if ($fast){ 
42
	my @a=split(/\s+/,$_);
43
	$k{$a[0]}++;	
44
        $found{$a[0]}++;
45
    }
46
    else {
47
	$k{$_}++;	
48
        $found{$_}++;
49
    }
50
}
51
close(F1);
52
my $n=0;
53
open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
54
my $size=0;
55
if($verbose){
56
    while(<F2>){
57
	$size++;
58
    }
59
}
60
close(F2);
61
open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
62
63
while(<F2>){
64
    next if /^\s+$/;
65
    $n++;
66
    chomp;
67
    print STDERR "." if $verbose && $n % 10==0;
68
    print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0;
69
    if($fast){
70
	my @a=split(/\s+/,$_);
71
	$k{$a[0]}++ if defined($k{$a[0]});
72
	$fields{$a[0]}=\@a if $column;
73
    }
74
    else{
75
	my @keys=keys(%k);
76
	foreach my $key(keys(%found)){
77
	    if (/$key/){
78
		$k{$key}++ ;
79
		$found{$key}=undef unless $dupes;
80
	    }
81
	}
82
    }
83
}
84
close(F2);
85
print STDERR "[$n of $size lines]\n" if $verbose;
86
#$missing && do map{print "$_ : $k{$_}\n" }keys(%k);
87
if ($column) {
88
    $missing && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" unless $k{$_}>1}keys(%k);
89
    $common &&  do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>1}keys(%k);
90
    $dupes &&   do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>2}keys(%k);
91
}
92
else {
93
    $missing && do map{print "$_\n" unless $k{$_}>1}keys(%k);
94
    $common &&  do map{print "$_\n" if $k{$_}>1}keys(%k);
95
    $dupes &&   do map{print "$_\n" if $k{$_}>2}keys(%k);
96
}
97
sub usage{
98
    print STDERR <<EndOfHelp;
99
100
  USAGE: compare_lists.pl FILE1 FILE2
101
102
      This script will compare FILE1 and FILE2, searching for the 
103-
      contents of FILE1 in FILE2 (and NOT vice-versa). FILE one must 
103+
      contents of FILE1 in FILE2 (and NOT vice versa). FILE one must 
104
      be one search pattern per line, the search pattern need only be 
105
      contained within one of the lines of FILE2.
106
107
    OPTIONS: 
108
      -c : Print patterns COMMON to both files
109
      -f : Search only the first characters of each line of FILE2
110-
      for the search patern given in FILE1
110+
      for the search pattern given in FILE1
111
      -d : Print duplicate entries     
112
      -m : Print patterns MISSING in FILE2 (default)
113
      -h : Print this help and exit
114
EndOfHelp
115
      exit(0);
116
}