Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- #
- # A perl script to generate an character/scene breakdown from
- # an MIT Shakespeare file.
- #
- # Tony Tambasco
- #
- # GPL v.2 &c.
- ## 29 Dec. 2012
- #
- # Last Revised 16 Jan. 2014.
- use strict;
- use warnings;
- use Data::Dumper;
- my %all_chars; # storing these as a hash will eliminnate duplicates.
- my %breakdown; # A 2-D hash to hold actors by scene.
- # Vars for calculating the act and scene.
- my $act = 1;
- my $scene = 1;
- my $act_and_scene = "$act\.$scene";
- # A list of all acts and scenes.
- my @all_acts_and_scenes;
- # A buffer for each char in a scene.
- my @chars;
- # Get our file.
- my $mit_shakes = pop @ARGV;
- # Open a file or die trying.
- open PLAY, $mit_shakes or die "Could not open file: $!\n";
- # Traverse the play until EOF
- while(my $line = <PLAY>) {
- # Capture the act number.
- if ($line =~ m/<h3>act (\w+)/i) {
- # Something to translate Roman numerals into numbers.
- if ($1 eq 'I') {
- $act = 1;
- } elsif ($1 eq 'II') {
- $act = 2;
- } elsif ($1 eq 'III') {
- $act = 3;
- } elsif ($1 eq 'IV') {
- $act = 4;
- } elsif ($1 eq 'V') {
- $act = 5;
- } elsif (/^\d+$/) {
- # Just in case it's presented numerically
- $act = $1;
- }
- # Start with scene 1.
- $scene = 1;
- $act_and_scene = "$act\.$scene";
- # print "$act_and_scene\n";
- }
- # Capture the scene number.
- elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) {
- # When we start a new scene, write the contents of the
- # @char buffer to the %breakdown hash and clear the
- # @char buffer.
- foreach my $char (@chars) {
- $breakdown{$act_and_scene}{$char} = 1;
- }
- @chars = ();
- # Something to translate Roman numerals into numbers.
- if ($1 eq 'I') {
- $scene = 1;
- } elsif ($1 eq 'II') {
- $scene = 2;
- } elsif ($1 eq 'III') {
- $scene = 3;
- } elsif ($1 eq 'IV') {
- $scene = 4;
- } elsif ($1 eq 'V') {
- $scene = 5;
- } elsif ($1 eq 'VI') {
- $scene = 6;
- } elsif ($1 eq 'VII') {
- $scene = 7;
- } elsif ($1 eq 'VIII') {
- $scene = 8;
- } elsif ($1 eq 'IX') {
- $scene = 9;
- } elsif ($1 eq 'X') {
- $scene = 10;
- } elsif ($1 eq 'XI') {
- $scene = 11;
- } elsif ($1 eq 'XII') {
- $scene = 12;
- } elsif ($1 eq 'XIII') {
- $scene = 13;
- } elsif ($1 eq 'XIV') {
- $scene = 14;
- } elsif ($1 eq 'XV') {
- $scene = 15;
- } elsif ($1 eq 'XVI') {
- $scene = 16;
- } elsif ($1 eq 'XVII') {
- $scene = 17;
- } elsif ($1 eq 'XVIII') {
- $scene = 18;
- } elsif ($1 eq 'XIX') {
- $scene = 19;
- } elsif ($1 eq 'XX') {
- $scene = 20;
- } elsif ($1 eq 'XXI') {
- $scene = 21;
- } elsif ($1 eq 'XXII') {
- $scene = 22;
- } elsif ($1 eq 'XXIII') {
- $scene = 23;
- } elsif ($1 eq 'XXIV') {
- $scene = 24;
- } elsif ($1 eq 'XXV') {
- $scene = 25;
- } elsif (/^\d+$/) {
- # Just in case it's presented numerically
- $scene = $1;
- }
- $act_and_scene = "$act\.$scene";
- # print "$act_and_scene\n";
- # Add the act and scene to the list of all acts and scenes.
- push @all_acts_and_scenes, $act_and_scene;
- }
- elsif ($line =~ m/.*<h3>scene (\d+).*/i) {
- # Or maybe we're dealing with our own format, and
- # the scene numbers are actual numbers
- # When we start a new scene, write the contents of the
- # @char buffer to the %breakdown hash and clear the
- # @char buffer.
- foreach my $char (@chars) {
- $breakdown{$act_and_scene}{$char} = 1;
- }
- $act_and_scene = $1;
- # print "$act_and_scene\n";
- @chars = ();
- # Add the act and scene to the list of all acts and scenes.
- push @all_acts_and_scenes, $act_and_scene;
- }
- elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
- # Add the character to the characters list.
- push @chars, $1;
- # Add the character to the hash of all characters. The value
- # is just a place holder.
- $all_chars{$1} = 1;
- }
- }
- # At the end of the play, write the @char buffer one last time to
- # get the last scene.
- foreach my $char (@chars) {
- $breakdown{$act_and_scene}{$char} = 1;
- }
- # Uncomment for debugging:
- # print Dumper{%breakdown};
- # The @all_chars list will serve as the header of the spreadsheet
- # we'll create, so first get all of the characters we kept track
- # of into it, and then put a basic column heading for act and scene
- # numbers as the first cell.
- my @all_chars = keys (%all_chars);
- unshift @all_chars, 'Act.Scene';
- # Create the table that will serve as the buffer for our spreadsheet,
- # and store the header as the first row.
- push my @table, \@all_chars;
- # Next we'll create a new row for every act and scene, and
- # if a character is present in that act and scene, we will
- # print a list at that chars index value.
- my $row_index = 0;
- while ($row_index <= $#all_acts_and_scenes) {
- # Create a new row, and add the present row we're working on
- # as the first column in that row.
- unshift my @new_row, $all_acts_and_scenes[$row_index];
- # We want to start in the second column since the first is just
- # going to be the act.scene.
- my $col_index = 1;
- # Do this for every entry in @all_chars, keeping track of the
- # $col_index as we go.
- while ($col_index <= $#all_chars) {
- # If the character in the column occurs in the scene of the row, mark
- # the coordinate with an X. Otherwise, leave it empty.
- if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
- $new_row[$col_index] = 'X';
- }
- else {
- $new_row[$col_index] = '';
- }
- $col_index++;
- }
- # Incriment the $row_index before saving the present row to the table.
- $table[++$row_index] = \@new_row;
- }
- # Print as a CSV. This will be pretty straight-forward because we've already
- # created the spreadsheet in the @table, but we need to print a delimiting
- # characters along the way. This is just going to STDOUT for simplicity.
- foreach my $row (@table) {
- foreach my $col (@{$row}) {
- print "$col,";
- }
- print "\n";
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement