Advertisement
Guest User

Pangram Names

a guest
Jul 8th, 2013
204
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.56 KB | None | 0 0
  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4.  
  5. use Data::Dumper;
  6.  
  7. my $names_file = $ARGV[0] or die $!;
  8. my %names; my %q; my %x;
  9.  
  10. # Make an array of rare bigrams, ordered kinda.
  11. my @rare_letters = qw/Z J K V B Y W G P F M C D L/;
  12. my $LEN_RARE = @rare_letters;
  13. my @bigrams;
  14. for (my $i=0;$i<$LEN_RARE;$i++)
  15. {
  16.     my $l1 = $rare_letters[$i];
  17.     for (my $j=$i+1;$j<$LEN_RARE;$j++)
  18.     {
  19.         my $l2 = $rare_letters[$j];
  20.         my $bigram = $l1 . $l2;
  21.         push(@bigrams,$bigram);
  22.     }
  23. }
  24.  
  25. my $MIN_SCORE = 98;
  26. open(FILE, $names_file) or die $!;
  27. while (<FILE>)
  28. {
  29.     s/[\r\n]//g;
  30.     my $name = $_;
  31.     my $score = 95;
  32.     if ($name =~ /^(.*)\t(.*)$/) {$name = uc $1; $score = $2;}
  33.     if (length($name) < 17 && $score >= $MIN_SCORE) # ENC($name) > 5 &&
  34.     {
  35.         $q{$name} = $score if $name =~ /Q/;
  36.         $x{$name} = $score if $name =~ /X/;
  37.         foreach my $bigram (@bigrams)
  38.         {
  39.             my ($l1, $l2) = split(//,$bigram);
  40.             if ($name =~ /$l1/ && $name =~ /$l2/)
  41.             {
  42.                 my $len = uniqueCount($name);
  43.                 push(@{$names{$bigram}{$len}},$name);
  44.                 last;
  45.             }
  46.         }
  47.     }
  48. }
  49. close(FILE);
  50.  
  51. my $MAX_MISSING_LETTERS = 12;
  52.  
  53. while (my ($qw,$qs) = each (%q))
  54. {
  55.     while (my ($xw,$xs) = each (%x))
  56.     {
  57.         my $sml = sorted_missing_letters($qw . $xw);
  58.         my $len = length($sml);
  59.         next if $len > $MAX_MISSING_LETTERS; # No more than $MAX_MISSING_LETTERS missing letters
  60.         foreach my $bigram (@bigrams)
  61.         {
  62.             my ($l1, $l2) = split(//,$bigram);
  63.             if ($sml =~ /$l1/ && $sml =~ /$l2/)
  64.             {
  65.                 for (my $i = $len; $i <= $MAX_MISSING_LETTERS; $i++)
  66.                 {
  67.                     next unless $names{$bigram}{$i};
  68.                     my @n3 = @{$names{$bigram}{$i}};
  69.                     foreach my $n3 (@n3)
  70.                     {
  71.                         if (uniqueCount( $qw . $xw . $n3 ) == 26 && length(ToXword($qw . $xw . $n3)) < 45)
  72.                         {
  73.                             my $l = length(ToXword($qw . $xw . $n3));
  74.                             print "$l -- $qw $xw $n3\n";
  75.                         }
  76.                     }
  77.                 }
  78.                 # If we're missing the given bigram,
  79.                 # there's no point going through the rest of them.
  80.                 last;
  81.             }
  82.         }
  83.     }
  84. }
  85.  
  86. ######
  87. # SUBS
  88. ######
  89.  
  90. sub sorted_missing_letters
  91. {
  92.     my $w = shift;
  93.     my %h;
  94.     $h{$_}++ for split(//,ToXword($w));
  95.     my $ret = '';
  96.     my @alph = split(//,'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
  97.     foreach my $let (@alph)
  98.     {
  99.         unless ($h{$let}) {$ret .= $let;}
  100.     }
  101.     return $ret;
  102. }
  103.  
  104. sub ENC
  105. {
  106.     my $w = shift;
  107.     $w = ToXword($w);
  108.     my $len = length($w);
  109.     return 0 unless $len;
  110.     my $u = uniqueCount($w);
  111.     return ($u * $u / $len);
  112. }
  113.  
  114. sub ToXword
  115. {
  116.     my $w = shift;
  117.     $w = uc $w;
  118.     $w =~ s/[^A-Z]//g;
  119.     return $w;
  120. }
  121.  
  122. sub uniqueCount
  123. {
  124.     my $txt = join('', sort(split //,ToXword(shift)));
  125.     $txt =~ s/(.)\1+/$1/g;
  126.     return length($txt);
  127. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement