Advertisement
Gredunza

wordrank.pl

Jan 24th, 2012
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.31 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. use strict;
  4. use warnings;
  5.  
  6. my (@charval,%charval);
  7. my $file_charval = '.\wordrank.txt';
  8. my $file_output = '.\wordrank_out.txt';
  9. my @wordlen = (5,8);
  10. my $cutoff = 1000;
  11. my $caprange = qr/^[n]/i;
  12. my @wordlist;
  13.  
  14. if (-f $file_charval) {
  15.     open CHARVAL, '<', $file_charval;
  16.     chomp(@charval = <CHARVAL>);
  17.     close CHARVAL;
  18.     foreach (@charval) {
  19.         my ($key,$val) = split;
  20.         $charval{$key} = $val;
  21.     }
  22. }
  23. else { print "Error: Values file was not found.\n"; exit; }
  24.  
  25. sub uniqchars {
  26.     # http://www.perlmonks.org/?node_id=239649
  27.     my $txt = join('', sort(split //,shift));
  28.     $txt =~ s/(.)\1+/$1/g;
  29.     return $txt;
  30. }
  31.  
  32. sub wordrank {
  33.     my $word = shift;
  34.     my @char = split(//,$word);
  35.     my $rank = 0;
  36.     foreach (@char) {
  37.         if (defined($charval{$_})) { $rank += $charval{$_}; }
  38.         #else { print "Warning: Found a character without a value: '$_', skipping it.\n"; }
  39.     }
  40.     return $rank;
  41. }
  42.  
  43.  
  44. while (<>) {
  45.     chomp;
  46.     $_ = lc($_);
  47.     $_ =~ s/ij/Y/g;
  48.     if (length($_) < $wordlen[0] or length($_) > $wordlen[1] or substr($_,0,1) !~ $caprange) { next; }
  49.     my $input = $_;
  50.     my $uncapped = substr($input,1);
  51.     my $output = wordrank(uniqchars($uncapped)).",$input,$uncapped,".uniqchars($uncapped);
  52.     push @wordlist, $output;
  53. }
  54.  
  55. @wordlist = reverse sort {
  56.     (split(/,/,$a))[0] <=> (split(/,/,$b))[0];
  57. } @wordlist;
  58.  
  59. my @filtered;
  60. OUTER: for (my $i = 0; $i <= $cutoff; $i++) {
  61.     my $record = $wordlist[$i];
  62.     my $pattern = (split(/,/,$record))[3];
  63.     my %letters;
  64.     foreach (split(//,$pattern)) { $letters{$_} = 1; }
  65.         INNER: for (my $j = ($i + 1); $j <= $cutoff; $j++) {
  66.         my $match_record = $wordlist[$j];
  67.         my $match_pattern = (split(/,/,$match_record))[3];
  68.         foreach (split(//,$match_pattern)) {
  69.             if (defined($letters{$_})) { next INNER; }
  70.         }
  71.             my @first = split(/,/,$record);
  72.             my @second = split(/,/,$match_record);
  73.             my $sum = $first[0] + $second[0];
  74.             push @filtered, $sum . '|' . join(',',@first) . '|' . join(',',@second);
  75.         next OUTER;
  76.     }
  77. }
  78. @filtered = reverse sort {
  79.     (split(/\|/,$a))[0] <=> (split(/\|/,$b))[0];
  80. } @filtered;
  81.  
  82. open OUT, '>', $file_output;
  83. foreach (@filtered) {
  84.     my $sum = (split(/\|/,$_))[0];
  85.     my @first = split(/,/,(split(/\|/,$_))[1]);
  86.     my @second = split(/,/,(split(/\|/,$_))[2]);
  87.     my $output = "$first[1] ($first[0]) + $second[1] ($second[0]) = $sum";
  88.     print OUT "$output\n";
  89. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement