Guest User

Untitled

a guest
Jan 23rd, 2018
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 3.57 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use strict; use warnings;
  3. use List::Util 'sum';
  4. #use Data::Dumper;
  5. use Getopt::Long qw(:config gnu_getopt);
  6. BEGIN {
  7.     eval {
  8.         require Math::Random::MT::Perl; Math::Random::MT::Perl->import('rand');
  9.     };
  10.     warn "Optional module Math::Random::MT::Perl not found.\n" if $@;
  11. }
  12.  
  13. #constants
  14. my @options = qw(eng-1M eng-all eng-fiction eng-gb eng-us fre ger heb rus spa irish german-medical bulgarian catalan swedish brazilian canadian-english-insane manx italian ogerman portuguese polish gaelic finnish);
  15. my $n = 4;
  16. my $default_dataset = 'Eng1M';
  17.  
  18. #data from loaded files
  19. my @loaded_data;
  20.  
  21. #data after normalizing and combining datasets
  22. my %grams;
  23. my %freqs;
  24.  
  25. #some command line options
  26. my $debug_mode;
  27. my $target_offset = -4; #needs testing;
  28. my $dont_normalize;
  29.  
  30. sub pick(%) {
  31.     my %f = @_;
  32.     my @c = keys %f;
  33.     my @w = map { $f{$_} } @c;
  34.     my $r = int(rand(sum(@w)));
  35.     for(0..$#w) {
  36.         return $c[$_] if $r < $w[$_];
  37.         $r -= $w[$_];
  38.     }
  39.     print "end of pick loop reached. returned $c[$#w]\n" if $debug_mode;
  40.     return $c[$#w];
  41. }
  42.  
  43. sub generate {
  44.     my $target = pick %freqs + $target_offset;
  45.     my $word = ' ' x ($n-1);
  46.     my $c;
  47.     do {
  48.         my $len = (length $word) - ($n-1);
  49.         my %ftable = %{$grams{substr($word, -$n+1, $n-1)}};
  50.         $ftable{' '} *= 2**($len-$target);
  51.         $c = pick %ftable ;
  52.         $word .= $c;
  53.     } while $c ne ' ';
  54.     $word =~ s/\s//g;
  55.     $word = "$word (Target: $target)" if $debug_mode;
  56.     return $word;
  57. }
  58.  
  59. sub help {
  60.     print "Usage: words [-dhNo] [DATASETS...] [NUMBER_OF_WORDS]\n";
  61.     print "default: $default_dataset\n";
  62.     print 'valid datasets: ' . (join ' --', @options) . "\n";
  63.     return 0;
  64. }
  65.  
  66. sub handle_opt($) {
  67.     my ($mod) = @_;
  68.     return sub {
  69.         my $name = "Data/$mod.pl"
  70.         my $r;
  71.         if ($r = do $n) {
  72.             push @loaded_data, [$r];
  73.         }
  74.         else {
  75.             warn "Couldn't parse $n: $@" if $@;
  76.             warn "Invalid option $_[0]: couldn't load $name: $!" unless defined $r;
  77.             warn "Invalid option $_[0]: couldn't run $n" unless $r;
  78.         }
  79.     }
  80. }
  81.  
  82. sub main {
  83.     #option handling
  84.     my $help;
  85.     GetOptions (
  86.                 d        => \$debug_mode,
  87.                 'h|help' => \$help,
  88.                 N        => \$dont_normalize,
  89.                 'o=s'    => \$target_offset,
  90.                 map {
  91.                     my $opt=$_;
  92.                     s/(^|-)(.)/\u$2/g;
  93.                     $opt, handle_opt $_ } @options
  94.                );
  95.     return help if $help;
  96.     #combine/normalize datasets
  97.     my $r;
  98.     if ($r = do "Data/$default_dataset.pl") {
  99.         @loaded_data ||= [do "Data/$default_dataset.pl"];
  100.     }
  101.     else {
  102.         die "Couldn't compile default datafile: $@" if $@;
  103.         die "Couldn't load default datafile: $!" unless defined $r;
  104.         die 'Blargh!';
  105.     }
  106.     for(@loaded_data) {
  107.         my ($data, $fdata) = @$_;
  108.         #normalize and combine character frequencies
  109.         while( my ($key, $subhash) = each %$data ) {
  110.             my $sum = $dont_normalize || sum(values %$subhash);
  111.             while( my ($c, $v) = each %$subhash ) {
  112.                 $grams{$key}->{$c} += $v/$sum;
  113.             }
  114.         }
  115.         #normalize and combine length histograms
  116.         my $sum = $dont_normalize || sum(values %$fdata);
  117.         while ( my ($len, $f) = each %$fdata ) {
  118.             $freqs{$len} += $f/$sum;
  119.         }
  120.     }
  121.     local $, = ' ';
  122.     print (map {generate} 1..(int($ARGV[0])||1));
  123.     print "\n";
  124. }
  125.  
  126. main unless caller;
Add Comment
Please, Sign In to add comment