Advertisement
Guest User

remove duplicates from filesystem

a guest
Sep 2nd, 2012
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.98 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. use strict;
  3. use Digest::SHA;
  4. use Cwd;
  5. use File::Util;
  6. my $topDir=cwd();
  7. my($f) = File::Util->new();
  8. my(@files) = $f->list_dir($topDir,'--recurse');
  9. my %hash;
  10. my $deleteFlag=$ARGV[0];
  11. #print $deleteFlag,"\n";
  12. foreach my $file(@files) {
  13.   if(-d $file) {next;}
  14.   my $size=$f->size($file);
  15.   push @{$hash{$size}},$file;
  16. }
  17. my ($filectr,$setctr)=(0,0);
  18. foreach my $key (sort { $a <=> $b } keys %hash) {#loop through sizes
  19.   my $value=$hash{$key};
  20.   my @arr=@{$value};
  21.   my $numFiles = @arr;
  22.   if ($numFiles < 2) {next;}
  23.   my %shahash;
  24.   foreach my $file(@arr) { #loop through files of same size
  25.     my $checksum=getSha512($file);
  26.     push @{$shahash{$checksum}},$file; #files with same checksum resolve to same hash key
  27.   }
  28.  
  29.   foreach my $shakey (sort { $a <=> $b } keys %shahash) { #loop through files of same hash value
  30.     my $shavalue=$shahash{$shakey};
  31.     my @shaarr=@{$shavalue};
  32.     my $numFilesSha = @shaarr;  
  33.     if($numFilesSha < 2){next;}
  34.     $setctr+=1;
  35.     $filectr+=$numFilesSha;
  36.     print "Files: $numFilesSha Size: $key Hash: $shakey\n";
  37.     if($deleteFlag eq "-d") {
  38.       for(my $i=0; $i<$numFilesSha-1;$i++) {
  39.         my $item=$shaarr[$i];
  40.         print "DELETE: $item\n";
  41.         unlink $item;
  42.       }
  43.       print "  KEEP: ",$shaarr[$numFilesSha-1],"\n";
  44.     }
  45.     else {
  46.       foreach my $shaFile(@shaarr) {
  47.         my $escaped = unixFilename($shaFile);
  48.         print "  rm $escaped\n";
  49.       }
  50.     }
  51.   }
  52. }
  53. print "=== RESULTS ===\nFiles: $filectr Sets: $setctr\n";
  54. sub getSha512 {
  55.   my ($filename)=@_;
  56.   my $alg="512";
  57.   my $sha = Digest::SHA->new($alg);
  58.   $sha->addfile($filename);
  59.   my $digest = $sha->hexdigest();
  60.   return $digest;
  61. }
  62. sub unixFilename {
  63.   my ($filename) = @_;
  64.   $filename =~ s/\)/\\\)/g;
  65.   $filename =~ s/\(/\\\(/g;
  66.   $filename =~ s/\ /\\ /g;
  67.   $filename =~ s/\;/\\\;/g;
  68.   $filename =~ s/\'/\\\'/g;
  69.   $filename =~ s/\"/\\\"/g;
  70.   $filename =~ s/\&/\\\&/g;
  71.   $filename =~ s/\!/\\\!/g;
  72.   return $filename;
  73. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement