SHARE
TWEET

charScenePlot

tambascot Jan 16th, 2014 44 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. #
  3. # A perl script to generate an character/scene breakdown from
  4. # an MIT Shakespeare file.
  5. #
  6. # Tony Tambasco
  7. #
  8. # GPL v.2 &c.
  9. ## 29 Dec. 2012
  10. #
  11. # Last Revised 16 Jan. 2014.
  12.  
  13. use strict;
  14. use warnings;
  15. use Data::Dumper;
  16.  
  17. my %all_chars; # storing these as a hash will eliminnate duplicates.
  18. my %breakdown; # A 2-D hash to hold actors by scene.
  19.  
  20. # Vars for calculating the act and scene.
  21. my $act = 1;
  22. my $scene = 1;
  23. my $act_and_scene = "$act\.$scene";
  24.  
  25. # A list of all acts and scenes.
  26. my @all_acts_and_scenes;
  27.  
  28. # A buffer for each char in a scene.
  29. my @chars;
  30.  
  31. # Get our file.
  32.  
  33. my $mit_shakes = pop @ARGV;
  34.  
  35. # Open a file or die trying.
  36. open PLAY, $mit_shakes or die "Could not open file: $!\n";
  37.  
  38. # Traverse the play until EOF
  39.  
  40. while(my $line = <PLAY>) {
  41.  
  42.   # Capture the act number.
  43.   if ($line =~ m/<h3>act (\w+)/i) {
  44.  
  45.     # Something to translate Roman numerals into numbers.
  46.  
  47.     if ($1 eq 'I') {
  48.       $act = 1;
  49.     } elsif ($1 eq 'II') {
  50.       $act = 2;
  51.     } elsif ($1 eq 'III') {
  52.       $act = 3;
  53.     } elsif ($1 eq 'IV') {
  54.       $act = 4;
  55.     } elsif ($1 eq 'V') {
  56.       $act = 5;
  57.     }  elsif (/^\d+$/) {
  58.       # Just in case it's presented numerically
  59.       $act = $1;
  60.     }
  61.    
  62.     # Start with scene 1.
  63.     $scene = 1;
  64.  
  65.     $act_and_scene = "$act\.$scene";
  66. #    print "$act_and_scene\n";  
  67.   }
  68.  
  69.   # Capture the scene number.
  70.   elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) {
  71.  
  72.     # When we start a new scene, write the contents of the
  73.     # @char buffer to the %breakdown hash and clear the
  74.     # @char buffer.
  75.     foreach my $char (@chars) {      
  76.       $breakdown{$act_and_scene}{$char} = 1;
  77.     }
  78.        
  79.     @chars = ();
  80.    
  81.     # Something to translate Roman numerals into numbers.
  82.     if ($1 eq 'I') {
  83.       $scene = 1;
  84.     } elsif ($1 eq 'II') {
  85.       $scene = 2;
  86.     } elsif ($1 eq 'III') {
  87.       $scene = 3;
  88.     } elsif ($1 eq 'IV') {
  89.       $scene = 4;
  90.     } elsif ($1 eq 'V') {
  91.       $scene = 5;
  92.     }  elsif ($1 eq 'VI') {
  93.       $scene = 6;
  94.     } elsif ($1 eq 'VII') {
  95.       $scene = 7;
  96.     } elsif ($1 eq 'VIII') {
  97.       $scene = 8;
  98.     } elsif ($1 eq 'IX') {
  99.       $scene = 9;
  100.     } elsif ($1 eq 'X') {
  101.       $scene = 10;
  102.     } elsif ($1 eq 'XI') {
  103.       $scene = 11;
  104.     } elsif ($1 eq 'XII') {
  105.       $scene = 12;
  106.     } elsif ($1 eq 'XIII') {
  107.       $scene = 13;
  108.     }  elsif ($1 eq 'XIV') {
  109.       $scene = 14;
  110.     } elsif ($1 eq 'XV') {
  111.       $scene = 15;
  112.     } elsif ($1 eq 'XVI') {
  113.       $scene = 16;
  114.     } elsif ($1 eq 'XVII') {
  115.       $scene = 17;
  116.     } elsif ($1 eq 'XVIII') {
  117.       $scene = 18;
  118.     } elsif ($1 eq 'XIX') {
  119.       $scene = 19;
  120.     } elsif ($1 eq 'XX') {
  121.       $scene = 20;
  122.     } elsif ($1 eq 'XXI') {
  123.       $scene = 21;
  124.     }  elsif ($1 eq 'XXII') {
  125.       $scene = 22;
  126.     } elsif ($1 eq 'XXIII') {
  127.       $scene = 23;
  128.     } elsif ($1 eq 'XXIV') {
  129.       $scene = 24;
  130.     } elsif ($1 eq 'XXV') {
  131.       $scene = 25;
  132.     } elsif (/^\d+$/) {
  133.       # Just in case it's presented numerically
  134.       $scene = $1;
  135.     }
  136.    
  137.     $act_and_scene = "$act\.$scene";
  138.     # print "$act_and_scene\n";
  139.  
  140.     # Add the act and scene to the list of all acts and scenes.
  141.     push @all_acts_and_scenes, $act_and_scene;
  142.  
  143.    
  144.   }
  145.  
  146.   elsif ($line =~ m/.*<h3>scene (\d+).*/i) {
  147.     # Or maybe we're dealing with our own format, and
  148.     # the scene numbers are actual numbers
  149.  
  150.     # When we start a new scene, write the contents of the
  151.     # @char buffer to the %breakdown hash and clear the
  152.     # @char buffer.
  153.     foreach my $char (@chars) {      
  154.       $breakdown{$act_and_scene}{$char} = 1;
  155.     }
  156.  
  157.     $act_and_scene = $1;
  158.  
  159.     # print "$act_and_scene\n";
  160.    
  161.     @chars = ();
  162.  
  163.   # Add the act and scene to the list of all acts and scenes.
  164.   push @all_acts_and_scenes, $act_and_scene;
  165.    
  166.   }
  167.  
  168.   elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
  169.    
  170.     # Add the character to the characters list.
  171.    
  172.     push @chars, $1;
  173.  
  174.     # Add the character to the hash of all characters. The value
  175.     # is just a place holder.
  176.  
  177.     $all_chars{$1} = 1;
  178.    
  179.   }
  180. }
  181.  
  182. # At the end of the play, write the @char buffer one last time to
  183. # get the last scene.
  184.  
  185. foreach my $char (@chars) {      
  186.       $breakdown{$act_and_scene}{$char} = 1;
  187.     }
  188.  
  189. # Uncomment for debugging:
  190. # print Dumper{%breakdown};
  191.  
  192. # The @all_chars list will serve as the header of the spreadsheet
  193. # we'll create, so first get all of the characters we kept track
  194. # of into it, and then put a basic column heading for act and scene
  195. # numbers as the first cell.
  196.  
  197. my @all_chars = keys (%all_chars);
  198. unshift @all_chars, 'Act.Scene';
  199.  
  200. # Create the table that will serve as the buffer for our spreadsheet,
  201. # and store the header as the first row.
  202. push my @table, \@all_chars;
  203.  
  204. # Next we'll create a new row for every act and scene, and
  205. # if a character is present in that act and scene, we will
  206. # print a list at that chars index value.
  207.  
  208. my $row_index = 0;
  209.  
  210. while ($row_index <= $#all_acts_and_scenes) {
  211.  
  212.   # Create a new row, and add the present row we're working on
  213.   # as the first column in that row.
  214.   unshift my @new_row, $all_acts_and_scenes[$row_index];
  215.  
  216.   # We want to start in the second column since the first is just
  217.   # going to be the act.scene.
  218.  
  219.   my $col_index = 1;
  220.  
  221.   # Do this for every entry in @all_chars, keeping track of the
  222.   # $col_index as we go.
  223.  
  224.   while ($col_index <= $#all_chars) {
  225.     # If the character in the column occurs in the scene of the row, mark
  226.     # the coordinate with an X. Otherwise, leave it empty.
  227.     if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
  228.       $new_row[$col_index] = 'X';
  229.     }
  230.     else {
  231.       $new_row[$col_index] = '';
  232.     }
  233.  
  234.     $col_index++;
  235.   }
  236.  
  237.   # Incriment the $row_index before saving the present row to the table.
  238.   $table[++$row_index] = \@new_row;
  239.  
  240. }
  241.  
  242. # Print as a CSV. This will be pretty straight-forward because we've already
  243. # created the spreadsheet in the @table, but we need to print a delimiting
  244. # characters along the way. This is just going to STDOUT for simplicity.
  245.  
  246. foreach my $row (@table) {
  247.   foreach my $col (@{$row}) {
  248.     print "$col,";
  249.   }
  250.   print "\n";
  251. }
RAW Paste Data
Top