Advertisement
Guest User

Musicals (NPR Puzzle)

a guest
Nov 12th, 2013
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl -w
  2.  
  3. use strict;
  4.  
  5. use Parse::MediaWikiDump;
  6. use Text::MediawikiFormat as => 'wiki2html';
  7. use Time::Piece;
  8. use Unicode::Normalize;
  9.  
  10. no warnings 'utf8';
  11.  
  12. # Get a list of musicals
  13. my @musicals_tmp = category_members('Broadway musicals');
  14.  
  15. # Remove non-alphas from musicals
  16. my %musicals;
  17. my %hash_musicals;
  18. foreach my $musical (@musicals_tmp)
  19. {
  20.     #print "$musical\n";
  21.     $musical =~ s/[^A-Za-z ]//g;
  22.     $musicals{lc $musical} = 1;
  23.     $hash_musicals{word2num($musical)} = lc $musical;
  24. }
  25. #die;
  26.  
  27. # Read in famous names
  28. my @names;
  29. my %last_to_first;
  30. open FILE, 'FamousNames.txt' or die $!;
  31. while (<FILE>)
  32. {
  33.     chomp;
  34.     my $name = lc $_;
  35.     my $score = 100;
  36.     if ($name =~ /^(.*)\t(.*)$/) {$name = $1;$score = $2;}
  37.     next unless $score >= 90;
  38.     my $firstname; my $lastname;
  39.     if ($name =~ /^([^ ]+) ([^ ]+)$/) {$firstname = $1; $lastname = $2;}
  40.     next unless $firstname;
  41.     push(@names,"$firstname $lastname");
  42.     push(@{$last_to_first{$lastname}},$firstname);
  43. }
  44. close FILE;
  45.  
  46. # Go through the names and pull out anyone with the same first name as a musical
  47. my @musical_names;
  48. foreach my $n (@names)
  49. {
  50.     my ($fn, $ln) = split(/ /,$n);
  51.     push(@musical_names,$n) if $musicals{$fn};
  52. }
  53.  
  54. foreach my $fullname (@musical_names)
  55. {
  56.     my ($fn, $ln) = split(/ /,$fullname);
  57.     #print " $fn\n";
  58.     # Find all others with the same last name
  59.     my $last_people_ref = $last_to_first{$ln};
  60.     # Check if any of these people have a first name that anagrams to a musical
  61.     foreach my $new_fn (@$last_people_ref)
  62.     {
  63.         next if $fn eq $new_fn;
  64.         #print "  $new_fn\n";
  65.         if ($hash_musicals{word2num($new_fn)} && $hash_musicals{word2num($new_fn)} ne $new_fn)
  66.         {
  67.             print "$fn and $new_fn $ln => " . $hash_musicals{word2num($new_fn)} . "\n";
  68.         }
  69.     }
  70. }
  71.  
  72.  
  73. ######
  74. # SUBS
  75. ######
  76.  
  77. sub category_members
  78. {
  79.     use JSON;
  80.     use LWP::Simple;
  81.     my ($category) = @_;
  82.     my @category_members;
  83.     $category =~ s/ /_/g;
  84.    
  85.     my $cmcontinue = "";
  86.     do
  87.     {
  88.         my $url = "http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:$category&cmlimit=500&format=json&cmnamespace=0&cmtype=page&cmcontinue=$cmcontinue";
  89.         my $json = get($url);
  90.         my $href = decode_json($json);
  91.         my @arr = @{$href->{'query'}->{'categorymembers'}};
  92.         foreach my $a (@arr)
  93.         {
  94.             my $title = $a->{'title'};
  95.             $title = remove_diacritics($title);
  96.             if ($title =~ /^(.*) \([^\)]+\)$/) {$title = $1;}
  97.             push(@category_members,$title);
  98.         }
  99.        
  100.         # Decide whether to continue
  101.         if ($href->{'query-continue'}->{'categorymembers'}->{'cmcontinue'})
  102.         {
  103.             $cmcontinue = $href->{'query-continue'}->{'categorymembers'}->{'cmcontinue'};
  104.             sleep(1); # Be nice to Wikipedia
  105.         }
  106.         else {$cmcontinue = "";}
  107.        
  108.     } while $cmcontinue;
  109.    
  110.     return @category_members;
  111. }
  112.  
  113. # Remove diacritics (from a title)
  114. sub remove_diacritics
  115. {
  116.    my $w = NFD(shift);
  117.    $w =~ s/\pM//g;
  118.    return $w;
  119. }
  120.  
  121. sub word2num
  122. {
  123.     # Given a word, turn it into an order-independent hash
  124.     # Convert the word to ALL CAPS
  125.     my $w = uc shift;
  126.     # Remove any non-alphas
  127.     $w =~ s/[^A-Z]//g;
  128.     my %convert = qw(A 2 B 3 C 5 D 7 E 11 F 13 G 17 H 19 I 23 J 29 K 31 L 37 M 41 N 43 O 47 P 53 Q 59 R 61 S 67 T 71 U 73 V 79 W 83 X 89 Y 97 Z 101);
  129.     my $val = 1;
  130.     foreach my $l (split(//,$w))
  131.     {
  132.         next unless $convert{$l};
  133.         $val *= $convert{$l};
  134.     }
  135.     return $val;
  136. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement