SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/usr/bin/perl -w | |
| 2 | ######################################################################## | |
| 3 | #### This program is free software; you can redistribute it and/or modify | |
| 4 | #### it under the terms of the GNU General Public License as published by | |
| 5 | #### the Free Software Foundation; either version 3 of the License, or | |
| 6 | #### (at your option) any later version. | |
| 7 | #### | |
| 8 | #### This program is distributed in the hope that it will be useful, | |
| 9 | #### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 | #### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 | #### GNU General Public License for more details. | |
| 12 | #### | |
| 13 | #### You should have received a copy of the GNU General Public License | |
| 14 | #### along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 15 | #### | |
| 16 | #### If you don't understand what Free Software is, please read (or reread) | |
| 17 | #### this page: http://www.gnu.org/philosophy/free-sw.html | |
| 18 | ######################################################################## | |
| 19 | use strict; | |
| 20 | use Getopt::Std; | |
| 21 | my %opts; | |
| 22 | getopts('hvfcmdk:', \%opts);
| |
| 23 | my $missing=$opts{m}||undef;
| |
| 24 | my $column=$opts{k}||undef;
| |
| 25 | my $common=$opts{c}||undef;
| |
| 26 | my $verbose=$opts{v}||undef;
| |
| 27 | my $fast=$opts{f}||undef;
| |
| 28 | my $dupes=$opts{d}||undef;
| |
| 29 | $missing=1 unless $common || $dupes;; | |
| 30 | &usage() unless $ARGV[1]; | |
| 31 | &usage() if $opts{h};
| |
| 32 | my (%found,%k,%fields); | |
| 33 | if ($column) {
| |
| 34 | die("The -k option only works in fast (-f) mode\n") unless $fast;
| |
| 35 | $column--; ## So I don't need to count from 0 | |
| 36 | } | |
| 37 | ||
| 38 | open(F1,"$ARGV[0]")||die("Cannot open $ARGV[0]: $!\n");
| |
| 39 | while(<F1>){
| |
| 40 | chomp; | |
| 41 | if ($fast){
| |
| 42 | my @a=split(/\s+/,$_); | |
| 43 | $k{$a[0]}++;
| |
| 44 | $found{$a[0]}++;
| |
| 45 | } | |
| 46 | else {
| |
| 47 | $k{$_}++;
| |
| 48 | $found{$_}++;
| |
| 49 | } | |
| 50 | } | |
| 51 | close(F1); | |
| 52 | my $n=0; | |
| 53 | open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
| |
| 54 | my $size=0; | |
| 55 | if($verbose){
| |
| 56 | while(<F2>){
| |
| 57 | $size++; | |
| 58 | } | |
| 59 | } | |
| 60 | close(F2); | |
| 61 | open(F2,"$ARGV[1]")||die("Cannot open $ARGV[1]: $!\n");
| |
| 62 | ||
| 63 | while(<F2>){
| |
| 64 | next if /^\s+$/; | |
| 65 | $n++; | |
| 66 | chomp; | |
| 67 | print STDERR "." if $verbose && $n % 10==0; | |
| 68 | print STDERR "[$n of $size lines]\n" if $verbose && $n % 800==0; | |
| 69 | if($fast){
| |
| 70 | my @a=split(/\s+/,$_); | |
| 71 | $k{$a[0]}++ if defined($k{$a[0]});
| |
| 72 | $fields{$a[0]}=\@a if $column;
| |
| 73 | } | |
| 74 | else{
| |
| 75 | my @keys=keys(%k); | |
| 76 | foreach my $key(keys(%found)){
| |
| 77 | if (/$key/){
| |
| 78 | $k{$key}++ ;
| |
| 79 | $found{$key}=undef unless $dupes;
| |
| 80 | } | |
| 81 | } | |
| 82 | } | |
| 83 | } | |
| 84 | close(F2); | |
| 85 | print STDERR "[$n of $size lines]\n" if $verbose; | |
| 86 | #$missing && do map{print "$_ : $k{$_}\n" }keys(%k);
| |
| 87 | if ($column) {
| |
| 88 | $missing && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" unless $k{$_}>1}keys(%k);
| |
| 89 | $common && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>1}keys(%k);
| |
| 90 | $dupes && do map{my @a=@{$fields{$_}}; print "$a[$column]\n" if $k{$_}>2}keys(%k);
| |
| 91 | } | |
| 92 | else {
| |
| 93 | $missing && do map{print "$_\n" unless $k{$_}>1}keys(%k);
| |
| 94 | $common && do map{print "$_\n" if $k{$_}>1}keys(%k);
| |
| 95 | $dupes && do map{print "$_\n" if $k{$_}>2}keys(%k);
| |
| 96 | } | |
| 97 | sub usage{
| |
| 98 | print STDERR <<EndOfHelp; | |
| 99 | ||
| 100 | USAGE: compare_lists.pl FILE1 FILE2 | |
| 101 | ||
| 102 | This script will compare FILE1 and FILE2, searching for the | |
| 103 | - | contents of FILE1 in FILE2 (and NOT vice-versa). FILE one must |
| 103 | + | contents of FILE1 in FILE2 (and NOT vice versa). FILE one must |
| 104 | be one search pattern per line, the search pattern need only be | |
| 105 | contained within one of the lines of FILE2. | |
| 106 | ||
| 107 | OPTIONS: | |
| 108 | -c : Print patterns COMMON to both files | |
| 109 | -f : Search only the first characters of each line of FILE2 | |
| 110 | - | for the search patern given in FILE1 |
| 110 | + | for the search pattern given in FILE1 |
| 111 | -d : Print duplicate entries | |
| 112 | -m : Print patterns MISSING in FILE2 (default) | |
| 113 | -h : Print this help and exit | |
| 114 | EndOfHelp | |
| 115 | exit(0); | |
| 116 | } |