Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl -w
- use strict;
- use Data::Dumper;
- my $names_file = $ARGV[0] or die $!;
- my %names; my %q; my %x;
- # Make an array of rare bigrams, ordered kinda.
- my @rare_letters = qw/Z J K V B Y W G P F M C D L/;
- my $LEN_RARE = @rare_letters;
- my @bigrams;
- for (my $i=0;$i<$LEN_RARE;$i++)
- {
- my $l1 = $rare_letters[$i];
- for (my $j=$i+1;$j<$LEN_RARE;$j++)
- {
- my $l2 = $rare_letters[$j];
- my $bigram = $l1 . $l2;
- push(@bigrams,$bigram);
- }
- }
- my $MIN_SCORE = 98;
- open(FILE, $names_file) or die $!;
- while (<FILE>)
- {
- s/[\r\n]//g;
- my $name = $_;
- my $score = 95;
- if ($name =~ /^(.*)\t(.*)$/) {$name = uc $1; $score = $2;}
- if (length($name) < 17 && $score >= $MIN_SCORE) # ENC($name) > 5 &&
- {
- $q{$name} = $score if $name =~ /Q/;
- $x{$name} = $score if $name =~ /X/;
- foreach my $bigram (@bigrams)
- {
- my ($l1, $l2) = split(//,$bigram);
- if ($name =~ /$l1/ && $name =~ /$l2/)
- {
- my $len = uniqueCount($name);
- push(@{$names{$bigram}{$len}},$name);
- last;
- }
- }
- }
- }
- close(FILE);
- my $MAX_MISSING_LETTERS = 12;
- while (my ($qw,$qs) = each (%q))
- {
- while (my ($xw,$xs) = each (%x))
- {
- my $sml = sorted_missing_letters($qw . $xw);
- my $len = length($sml);
- next if $len > $MAX_MISSING_LETTERS; # No more than $MAX_MISSING_LETTERS missing letters
- foreach my $bigram (@bigrams)
- {
- my ($l1, $l2) = split(//,$bigram);
- if ($sml =~ /$l1/ && $sml =~ /$l2/)
- {
- for (my $i = $len; $i <= $MAX_MISSING_LETTERS; $i++)
- {
- next unless $names{$bigram}{$i};
- my @n3 = @{$names{$bigram}{$i}};
- foreach my $n3 (@n3)
- {
- if (uniqueCount( $qw . $xw . $n3 ) == 26 && length(ToXword($qw . $xw . $n3)) < 45)
- {
- my $l = length(ToXword($qw . $xw . $n3));
- print "$l -- $qw $xw $n3\n";
- }
- }
- }
- # If we're missing the given bigram,
- # there's no point going through the rest of them.
- last;
- }
- }
- }
- }
- ######
- # SUBS
- ######
- sub sorted_missing_letters
- {
- my $w = shift;
- my %h;
- $h{$_}++ for split(//,ToXword($w));
- my $ret = '';
- my @alph = split(//,'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
- foreach my $let (@alph)
- {
- unless ($h{$let}) {$ret .= $let;}
- }
- return $ret;
- }
- sub ENC
- {
- my $w = shift;
- $w = ToXword($w);
- my $len = length($w);
- return 0 unless $len;
- my $u = uniqueCount($w);
- return ($u * $u / $len);
- }
- sub ToXword
- {
- my $w = shift;
- $w = uc $w;
- $w =~ s/[^A-Z]//g;
- return $w;
- }
- sub uniqueCount
- {
- my $txt = join('', sort(split //,ToXword(shift)));
- $txt =~ s/(.)\1+/$1/g;
- return length($txt);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement