SHARE
TWEET

charScenePlot.pl

tambascot Jan 30th, 2013 48 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. #
  3. # A perl script to generate an character/scene breakdown from
  4. # an MIT Shakespeare file.
  5. #
  6. # Tony Tambasco
  7. #
  8. # GPL v.2 &c.
  9. #
  10. # 29 Dec. 2012
  11. #
  12. # Last Revised 30 Jan. 2013.
  13.  
  14. use strict;
  15. use warnings;
  16. use Data::Dumper;
  17.  
  18. my %all_chars; # storing these as a hash will eliminnate duplicates.
  19. my %breakdown; # A 2-D hash to hold actors by scene.
  20.  
  21. # Vars for calculating the act and scene.
  22. my $act = 1;
  23. my $scene = 1;
  24. my $act_and_scene = "$act\.$scene";
  25.  
  26. # A list of all acts and scenes.
  27. my @all_acts_and_scenes;
  28.  
  29. # A buffer for each char in a scene.
  30. my @chars;
  31.  
  32. # Get our file.
  33.  
  34. my $mit_shakes = pop @ARGV;
  35.  
  36. # Open a file or die trying.
  37. open PLAY, $mit_shakes or die "Could not open file: $!\n";
  38.  
  39. # Traverse the play until EOF
  40.  
  41. while(my $line = <PLAY>) {
  42.  
  43.   # Capture the act number.
  44.   if ($line =~ m/<h3>ACT (\w+)/) {
  45.  
  46.     # Something to translate Roman numerals into numbers.
  47.  
  48.     if ($1 eq 'I') {
  49.       $act = 1;
  50.     } elsif ($1 eq 'II') {
  51.       $act = 2;
  52.     } elsif ($1 eq 'III') {
  53.       $act = 3;
  54.     } elsif ($1 eq 'IV') {
  55.       $act = 4;
  56.     } elsif ($1 eq 'V') {
  57.       $act = 5;
  58.     }  elsif (/^\d+$/) {
  59.       # Just in case it's presented numerically
  60.       $act = $1;
  61.     }
  62.    
  63.     # Start with scene 1.
  64.     $scene = 1;
  65.  
  66.     $act_and_scene = "$act\.$scene";
  67.    
  68.   }
  69.  
  70.   # Capture the scene number.
  71.   elsif ($line =~ m/.*<h3>SCENE (\w+)\..*/) {
  72.  
  73.     # When we start a new scene, write the contents of the
  74.     # @char buffer to the %breakdown hash and clear the
  75.     # @char buffer.
  76.     foreach my $char (@chars) {      
  77.       $breakdown{$act_and_scene}{$char} = 1;
  78.     }
  79.        
  80.     @chars = ();
  81.    
  82.     # Something to translate Roman numerals into numbers.
  83.     if ($1 eq 'I') {
  84.       $scene = 1;
  85.     } elsif ($1 eq 'II') {
  86.       $scene = 2;
  87.     } elsif ($1 eq 'III') {
  88.       $scene = 3;
  89.     } elsif ($1 eq 'IV') {
  90.       $scene = 4;
  91.     } elsif ($1 eq 'V') {
  92.       $scene = 5;
  93.     }  elsif ($1 eq 'VI') {
  94.       $scene = 6;
  95.     } elsif ($1 eq 'VII') {
  96.       $scene = 7;
  97.     } elsif ($1 eq 'VIII') {
  98.       $scene = 8;
  99.     } elsif ($1 eq 'IX') {
  100.       $scene = 9;
  101.     } elsif ($1 eq 'X') {
  102.       $scene = 10;
  103.     } elsif ($1 eq 'XI') {
  104.       $scene = 11;
  105.     } elsif ($1 eq 'XII') {
  106.       $scene = 12;
  107.     } elsif ($1 eq 'XIII') {
  108.       $scene = 13;
  109.     }  elsif ($1 eq 'XIV') {
  110.       $scene = 14;
  111.     } elsif ($1 eq 'XV') {
  112.       $scene = 15;
  113.     } elsif ($1 eq 'XVI') {
  114.       $scene = 16;
  115.     } elsif ($1 eq 'XVII') {
  116.       $scene = 17;
  117.     } elsif ($1 eq 'XVIII') {
  118.       $scene = 18;
  119.     } elsif ($1 eq 'XIX') {
  120.       $scene = 19;
  121.     } elsif ($1 eq 'XX') {
  122.       $scene = 20;
  123.     } elsif ($1 eq 'XXI') {
  124.       $scene = 21;
  125.     }  elsif ($1 eq 'XXII') {
  126.       $scene = 22;
  127.     } elsif ($1 eq 'XXIII') {
  128.       $scene = 23;
  129.     } elsif ($1 eq 'XXIV') {
  130.       $scene = 24;
  131.     } elsif ($1 eq 'XXV') {
  132.       $scene = 25;
  133.     } elsif (/^\d+$/) {
  134.       # Just in case it's presented numerically
  135.       $scene = $1;
  136.     }
  137.  
  138.     $act_and_scene = "$act\.$scene";
  139.  
  140.     # Add the act and scene to the list of all acts and scenes.
  141.     push @all_acts_and_scenes, $act_and_scene;
  142.  
  143.   }
  144.  
  145.   elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
  146.  
  147.     # Add the character to the characters list.
  148.    
  149.     push @chars, $1;
  150.  
  151.     # Add the character to the hash of all characters. The value
  152.     # is just a place holder.
  153.  
  154.     $all_chars{$1} = 1;
  155.    
  156.   }
  157. }
  158.  
  159. # At the end of the play, write the @char buffer one last time to
  160. # get the last scene.
  161.  
  162. foreach my $char (@chars) {      
  163.       $breakdown{$act_and_scene}{$char} = 1;
  164.     }
  165.  
  166. # Uncomment for debugging:
  167. # print Dumper{%breakdown};
  168.  
  169. # The @all_chars list will serve as the header of the spreadsheet
  170. # we'll create, so first get all of the characters we kept track
  171. # of into it, and then put a basic column heading for act and scene
  172. # numbers as the first cell.
  173.  
  174. my @all_chars = keys (%all_chars);
  175. unshift @all_chars, 'Act.Scene';
  176.  
  177. # Create the table that will serve as the buffer for our spreadsheet,
  178. # and store the header as the first row.
  179. push my @table, \@all_chars;
  180.  
  181. # Next we'll create a new row for every act and scene, and
  182. # if a character is present in that act and scene, we will
  183. # print a list at that chars index value.
  184.  
  185. my $row_index = 0;
  186.  
  187. while ($row_index <= $#all_acts_and_scenes) {
  188.  
  189.   # Create a new row, and add the present row we're working on
  190.   # as the first column in that row.
  191.   unshift my @new_row, $all_acts_and_scenes[$row_index];
  192.  
  193.   # We want to start in the second column since the first is just
  194.   # going to be the act.scene.
  195.  
  196.   my $col_index = 1;
  197.  
  198.   # Do this for every entry in @all_chars, keeping track of the
  199.   # $col_index as we go.
  200.  
  201.   while ($col_index <= $#all_chars) {
  202.     # If the character in the column occurs in the scene of the row, mark
  203.     # the coordinate with an X. Otherwise, leave it empty.
  204.     if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
  205.       $new_row[$col_index] = 'X';
  206.     }
  207.     else {
  208.       $new_row[$col_index] = '';
  209.     }
  210.  
  211.     $col_index++;
  212.   }
  213.  
  214.   # Incriment the $row_index before saving the present row to the table.
  215.   $table[++$row_index] = \@new_row;
  216.  
  217. }
  218.  
  219. # Print as a CSV. This will be pretty straight-forward because we've already
  220. # created the spreadsheet in the @table, but we need to print a delimiting
  221. # characters along the way. This is just going to STDOUT for simplicity.
  222.  
  223. foreach my $row (@table) {
  224.   foreach my $col (@{$row}) {
  225.     print "$col,";
  226.   }
  227.   print "\n";
  228. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top