tambascot

charScenePlot

Jan 16th, 2014
108
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. #
  3. # A perl script to generate an character/scene breakdown from
  4. # an MIT Shakespeare file.
  5. #
  6. # Tony Tambasco
  7. #
  8. # GPL v.2 &c.
  9. ## 29 Dec. 2012
  10. #
  11. # Last Revised 16 Jan. 2014.
  12.  
  13. use strict;
  14. use warnings;
  15. use Data::Dumper;
  16.  
  17. my %all_chars; # storing these as a hash will eliminnate duplicates.
  18. my %breakdown; # A 2-D hash to hold actors by scene.
  19.  
  20. # Vars for calculating the act and scene.
  21. my $act = 1;
  22. my $scene = 1;
  23. my $act_and_scene = "$act\.$scene";
  24.  
  25. # A list of all acts and scenes.
  26. my @all_acts_and_scenes;
  27.  
  28. # A buffer for each char in a scene.
  29. my @chars;
  30.  
  31. # Get our file.
  32.  
  33. my $mit_shakes = pop @ARGV;
  34.  
  35. # Open a file or die trying.
  36. open PLAY, $mit_shakes or die "Could not open file: $!\n";
  37.  
  38. # Traverse the play until EOF
  39.  
  40. while(my $line = <PLAY>) {
  41.  
  42.   # Capture the act number.
  43.   if ($line =~ m/<h3>act (\w+)/i) {
  44.  
  45.     # Something to translate Roman numerals into numbers.
  46.  
  47.     if ($1 eq 'I') {
  48.       $act = 1;
  49.     } elsif ($1 eq 'II') {
  50.       $act = 2;
  51.     } elsif ($1 eq 'III') {
  52.       $act = 3;
  53.     } elsif ($1 eq 'IV') {
  54.       $act = 4;
  55.     } elsif ($1 eq 'V') {
  56.       $act = 5;
  57.     }  elsif (/^\d+$/) {
  58.       # Just in case it's presented numerically
  59.       $act = $1;
  60.     }
  61.    
  62.     # Start with scene 1.
  63.     $scene = 1;
  64.  
  65.     $act_and_scene = "$act\.$scene";
  66. #    print "$act_and_scene\n";  
  67.   }
  68.  
  69.   # Capture the scene number.
  70.   elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) {
  71.  
  72.     # When we start a new scene, write the contents of the
  73.     # @char buffer to the %breakdown hash and clear the
  74.     # @char buffer.
  75.     foreach my $char (@chars) {      
  76.       $breakdown{$act_and_scene}{$char} = 1;
  77.     }
  78.    
  79.     @chars = ();
  80.    
  81.     # Something to translate Roman numerals into numbers.
  82.     if ($1 eq 'I') {
  83.       $scene = 1;
  84.     } elsif ($1 eq 'II') {
  85.       $scene = 2;
  86.     } elsif ($1 eq 'III') {
  87.       $scene = 3;
  88.     } elsif ($1 eq 'IV') {
  89.       $scene = 4;
  90.     } elsif ($1 eq 'V') {
  91.       $scene = 5;
  92.     }  elsif ($1 eq 'VI') {
  93.       $scene = 6;
  94.     } elsif ($1 eq 'VII') {
  95.       $scene = 7;
  96.     } elsif ($1 eq 'VIII') {
  97.       $scene = 8;
  98.     } elsif ($1 eq 'IX') {
  99.       $scene = 9;
  100.     } elsif ($1 eq 'X') {
  101.       $scene = 10;
  102.     } elsif ($1 eq 'XI') {
  103.       $scene = 11;
  104.     } elsif ($1 eq 'XII') {
  105.       $scene = 12;
  106.     } elsif ($1 eq 'XIII') {
  107.       $scene = 13;
  108.     }  elsif ($1 eq 'XIV') {
  109.       $scene = 14;
  110.     } elsif ($1 eq 'XV') {
  111.       $scene = 15;
  112.     } elsif ($1 eq 'XVI') {
  113.       $scene = 16;
  114.     } elsif ($1 eq 'XVII') {
  115.       $scene = 17;
  116.     } elsif ($1 eq 'XVIII') {
  117.       $scene = 18;
  118.     } elsif ($1 eq 'XIX') {
  119.       $scene = 19;
  120.     } elsif ($1 eq 'XX') {
  121.       $scene = 20;
  122.     } elsif ($1 eq 'XXI') {
  123.       $scene = 21;
  124.     }  elsif ($1 eq 'XXII') {
  125.       $scene = 22;
  126.     } elsif ($1 eq 'XXIII') {
  127.       $scene = 23;
  128.     } elsif ($1 eq 'XXIV') {
  129.       $scene = 24;
  130.     } elsif ($1 eq 'XXV') {
  131.       $scene = 25;
  132.     } elsif (/^\d+$/) {
  133.       # Just in case it's presented numerically
  134.       $scene = $1;
  135.     }
  136.    
  137.     $act_and_scene = "$act\.$scene";
  138.     # print "$act_and_scene\n";
  139.  
  140.     # Add the act and scene to the list of all acts and scenes.
  141.     push @all_acts_and_scenes, $act_and_scene;
  142.  
  143.    
  144.   }
  145.  
  146.   elsif ($line =~ m/.*<h3>scene (\d+).*/i) {
  147.     # Or maybe we're dealing with our own format, and
  148.     # the scene numbers are actual numbers
  149.  
  150.     # When we start a new scene, write the contents of the
  151.     # @char buffer to the %breakdown hash and clear the
  152.     # @char buffer.
  153.     foreach my $char (@chars) {      
  154.       $breakdown{$act_and_scene}{$char} = 1;
  155.     }
  156.  
  157.     $act_and_scene = $1;
  158.  
  159.     # print "$act_and_scene\n";
  160.    
  161.     @chars = ();
  162.  
  163.   # Add the act and scene to the list of all acts and scenes.
  164.   push @all_acts_and_scenes, $act_and_scene;
  165.    
  166.   }
  167.  
  168.   elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
  169.    
  170.     # Add the character to the characters list.
  171.    
  172.     push @chars, $1;
  173.  
  174.     # Add the character to the hash of all characters. The value
  175.     # is just a place holder.
  176.  
  177.     $all_chars{$1} = 1;
  178.    
  179.   }
  180. }
  181.  
  182. # At the end of the play, write the @char buffer one last time to
  183. # get the last scene.
  184.  
  185. foreach my $char (@chars) {      
  186.       $breakdown{$act_and_scene}{$char} = 1;
  187.     }
  188.  
  189. # Uncomment for debugging:
  190. # print Dumper{%breakdown};
  191.  
  192. # The @all_chars list will serve as the header of the spreadsheet
  193. # we'll create, so first get all of the characters we kept track
  194. # of into it, and then put a basic column heading for act and scene
  195. # numbers as the first cell.
  196.  
  197. my @all_chars = keys (%all_chars);
  198. unshift @all_chars, 'Act.Scene';
  199.  
  200. # Create the table that will serve as the buffer for our spreadsheet,
  201. # and store the header as the first row.
  202. push my @table, \@all_chars;
  203.  
  204. # Next we'll create a new row for every act and scene, and
  205. # if a character is present in that act and scene, we will
  206. # print a list at that chars index value.
  207.  
  208. my $row_index = 0;
  209.  
  210. while ($row_index <= $#all_acts_and_scenes) {
  211.  
  212.   # Create a new row, and add the present row we're working on
  213.   # as the first column in that row.
  214.   unshift my @new_row, $all_acts_and_scenes[$row_index];
  215.  
  216.   # We want to start in the second column since the first is just
  217.   # going to be the act.scene.
  218.  
  219.   my $col_index = 1;
  220.  
  221.   # Do this for every entry in @all_chars, keeping track of the
  222.   # $col_index as we go.
  223.  
  224.   while ($col_index <= $#all_chars) {
  225.     # If the character in the column occurs in the scene of the row, mark
  226.     # the coordinate with an X. Otherwise, leave it empty.
  227.     if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
  228.       $new_row[$col_index] = 'X';
  229.     }
  230.     else {
  231.       $new_row[$col_index] = '';
  232.     }
  233.  
  234.     $col_index++;
  235.   }
  236.  
  237.   # Incriment the $row_index before saving the present row to the table.
  238.   $table[++$row_index] = \@new_row;
  239.  
  240. }
  241.  
  242. # Print as a CSV. This will be pretty straight-forward because we've already
  243. # created the spreadsheet in the @table, but we need to print a delimiting
  244. # characters along the way. This is just going to STDOUT for simplicity.
  245.  
  246. foreach my $row (@table) {
  247.   foreach my $col (@{$row}) {
  248.     print "$col,";
  249.   }
  250.   print "\n";
  251. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×