Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- use warnings;
- use strict;
- use Digest::MD5 qw(md5 md5_hex md5_base64);
- use Data::Dumper;
- if(! defined($ARGV[0]) || length($ARGV[0])<1) {
- print "Usage: dedupe.pl <directory>\n";
- exit;
- }
- my $dir = $ARGV[0];
- my $ctx = Digest::MD5->new;
- my %md5list = ();
- print "Processing files from $dir\n";
- opendir(DIR, $dir) or die "Could not open $dir\n";
- while (my $filename = readdir(DIR)) {
- my $fullpath = $dir . "/" . $filename;
- if( -f $fullpath ) {
- print "$filename\t";
- open my $HANDLE, $fullpath or die "Could not read $fullpath\n";
- $ctx->addfile($HANDLE);
- my $md5 = $ctx->hexdigest();
- close($HANDLE);
- if(! exists $md5list{"$md5"}) {
- print "Newly found key: $md5\n";
- $md5list{$md5} = [];
- }
- print $md5 . "\n";
- push(@{%md5list{$md5}}, $fullpath);
- }
- }
- closedir(DIR);
- print Dumper(%md5list);
- foreach my $key (keys %md5list) {
- my $count = scalar(@{$md5list{$key}});
- if($count > 1) {
- print "$key: $count files\n";
- my $shortest = $md5list{$key}[0];
- foreach my $file (@{$md5list{$key}}) {
- if(length($file) < length($shortest)) {
- $shortest = $file;
- }
- }
- foreach my $file (@{$md5list{$key}}) {
- if($file eq $shortest) {
- print "\tKeeping:\t$file\n";
- } else {
- print "\tRemoving:\t$file\n";
- unlink $file;
- }
- }
- }
- }
Add Comment
Please, Sign In to add comment