Guest User

MD5_dedupe.pl

a guest
Oct 10th, 2021
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.32 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. use warnings;
  3. use strict;
  4. use Digest::MD5 qw(md5 md5_hex md5_base64);
  5. use Data::Dumper;
  6.  
  7. if(! defined($ARGV[0]) || length($ARGV[0])<1) {
  8.     print "Usage: dedupe.pl <directory>\n";
  9.     exit;
  10. }
  11.  
  12. my $dir = $ARGV[0];
  13. my $ctx = Digest::MD5->new;
  14. my %md5list = ();
  15.  
  16. print "Processing files from $dir\n";
  17. opendir(DIR, $dir) or die "Could not open $dir\n";
  18. while (my $filename = readdir(DIR)) {
  19.     my $fullpath = $dir . "/" . $filename;
  20.     if( -f $fullpath ) {
  21.         print "$filename\t";
  22.         open my $HANDLE, $fullpath or die "Could not read $fullpath\n";
  23.         $ctx->addfile($HANDLE);
  24.         my $md5 = $ctx->hexdigest();
  25.         close($HANDLE);
  26.  
  27.         if(! exists $md5list{"$md5"}) {
  28.             print "Newly found key: $md5\n";
  29.             $md5list{$md5} = [];
  30.         }
  31.         print $md5 . "\n";
  32.         push(@{%md5list{$md5}}, $fullpath);
  33.     }
  34. }
  35. closedir(DIR);
  36.  
  37. print Dumper(%md5list);
  38.  
  39. foreach my $key (keys %md5list) {
  40.     my $count = scalar(@{$md5list{$key}});
  41.     if($count > 1) {
  42.         print "$key: $count files\n";
  43.         my $shortest = $md5list{$key}[0];
  44.         foreach my $file (@{$md5list{$key}}) {
  45.             if(length($file) < length($shortest)) {
  46.                 $shortest = $file;
  47.             }
  48.         }
  49.        
  50.         foreach my $file (@{$md5list{$key}}) {
  51.             if($file eq $shortest) {
  52.                 print "\tKeeping:\t$file\n";
  53.             } else {
  54.                 print "\tRemoving:\t$file\n";
  55.                 unlink $file;
  56.             }
  57.         }
  58.     }
  59. }
Add Comment
Please, Sign In to add comment