SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/perl -w | |
2 | ######################################################################## | |
3 | #### This program is free software; you can redistribute it and/or modify | |
4 | #### it under the terms of the GNU General Public License as published by | |
5 | #### the Free Software Foundation; either version 3 of the License, or | |
6 | #### (at your option) any later version. | |
7 | #### | |
8 | #### This program is distributed in the hope that it will be useful, | |
9 | #### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | #### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | #### GNU General Public License for more details. | |
12 | #### | |
13 | #### You should have received a copy of the GNU General Public License | |
14 | #### along with this program. If not, see <http://www.gnu.org/licenses/>. | |
15 | #### | |
16 | #### If you don't understand what Free Software is, please read (or reread) | |
17 | #### this page: http://www.gnu.org/philosophy/free-sw.html | |
18 | ######################################################################## | |
19 | use strict; | |
20 | use Getopt::Std; | |
21 | my %opts; | |
22 | getopts('hvfcmdk:', \%opts); | |
23 | my $missing=$opts{m}||undef; | |
24 | my $column=$opts{k}||undef; | |
25 | my $common=$opts{c}||undef; | |
26 | my $verbose=$opts{v}||undef; | |
27 | my $fast=$opts{f}||undef; | |
28 | my $dupes=$opts{d}||undef; | |
29 | $missing=1 unless $common || $dupes;; | |
30 | &usage() unless $ARGV[1]; | |
31 | &usage() if $opts{h}; | |
32 | my (%found,%k,%fields); | |
33 | if ($column) { | |
34 | die("The -k option only works in fast (-f) mode\n") unless $fast; | |
35 | $column--; ## So I don't need to count from 0 | |
36 | } | |
37 | ||
38 | open(F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n"); | |
39 | while(<F1>){ | |
40 | chomp; | |
41 | if ($fast){ | |
42 | my @a=split(/\s+/,$_); | |
43 | $k{$a[0]}++; | |
44 | $found{$a[0]}++; | |
45 | } | |
46 | else { | |
47 | $k{$_}++; | |
48 | $found{$_}++; | |
49 | } | |
50 | } | |
51 | close(F1); | |
52 | my $n=0; | |
53 | open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n"); | |
54 | my $size=0; | |
55 | if($verbose){ | |
56 | while(<F2>){ | |
57 | $size++; | |
58 | } | |
59 | } | |
60 | close(F2); | |
61 | open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n"); | |
62 | ||
63 | while(<F2>){ | |
64 | next if /^\s+$/; | |
65 | $n++; | |
66 | chomp; | |
67 | print STDERR "." if $verbose && $n % 10==0; | |
68 | print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0; | |
69 | if($fast){ | |
70 | my @a=split(/\s+/,$_); | |
71 | $k{$a[0]}++ if defined($k{$a[0]}); | |
72 | $fields{$a[0]}=\@a if $column; | |
73 | } | |
74 | else{ | |
75 | my @keys=keys(%k); | |
76 | foreach my $key(keys(%found)){ | |
77 | if (/$key/){ | |
78 | $k{$key}++ ; | |
79 | $found{$key}=undef unless $dupes; | |
80 | } | |
81 | } | |
82 | } | |
83 | } | |
84 | close(F2); | |
85 | print STDERR "[$n of $size lines]\n" if $verbose; | |
86 | #$missing && do map{print "$_ : $k{$_}\n" }keys(%k); | |
87 | if ($column) { | |
88 | $missing && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" unless $k{$_}>1}keys(%k); | |
89 | $common && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>1}keys(%k); | |
90 | $dupes && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>2}keys(%k); | |
91 | } | |
92 | else { | |
93 | $missing && do map{print "$_\n" unless $k{$_}>1}keys(%k); | |
94 | $common && do map{print "$_\n" if $k{$_}>1}keys(%k); | |
95 | $dupes && do map{print "$_\n" if $k{$_}>2}keys(%k); | |
96 | } | |
97 | sub usage{ | |
98 | print STDERR <<EndOfHelp; | |
99 | ||
100 | USAGE: compare_lists.pl FILE1 FILE2 | |
101 | ||
102 | This script will compare FILE1 and FILE2, searching for the | |
103 | - | contents of FILE1 in FILE2 (and NOT vice-versa). FILE one must |
103 | + | contents of FILE1 in FILE2 (and NOT vice versa). FILE one must |
104 | be one search pattern per line, the search pattern need only be | |
105 | contained within one of the lines of FILE2. | |
106 | ||
107 | OPTIONS: | |
108 | -c : Print patterns COMMON to both files | |
109 | -f : Search only the first characters of each line of FILE2 | |
110 | - | for the search patern given in FILE1 |
110 | + | for the search pattern given in FILE1 |
111 | -d : Print duplicate entries | |
112 | -m : Print patterns MISSING in FILE2 (default) | |
113 | -h : Print this help and exit | |
114 | EndOfHelp | |
115 | exit(0); | |
116 | } |