SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/usr/bin/perl | |
| 2 | # | |
| 3 | # A perl script to generate an character/scene breakdown from | |
| 4 | # an MIT Shakespeare file. | |
| 5 | # | |
| 6 | # Tony Tambasco | |
| 7 | # | |
| 8 | # GPL v.2 &c. | |
| 9 | ## 29 Dec. 2012 | |
| 10 | - | # 29 Dec. 2012 |
| 10 | + | |
| 11 | # Last Revised 16 Jan. 2014. | |
| 12 | - | # Last Revised 30 Jan. 2013. |
| 12 | + | |
| 13 | use strict; | |
| 14 | use warnings; | |
| 15 | use Data::Dumper; | |
| 16 | ||
| 17 | my %all_chars; # storing these as a hash will eliminnate duplicates. | |
| 18 | my %breakdown; # A 2-D hash to hold actors by scene. | |
| 19 | ||
| 20 | # Vars for calculating the act and scene. | |
| 21 | my $act = 1; | |
| 22 | my $scene = 1; | |
| 23 | my $act_and_scene = "$act\.$scene"; | |
| 24 | ||
| 25 | # A list of all acts and scenes. | |
| 26 | my @all_acts_and_scenes; | |
| 27 | ||
| 28 | # A buffer for each char in a scene. | |
| 29 | my @chars; | |
| 30 | ||
| 31 | # Get our file. | |
| 32 | ||
| 33 | my $mit_shakes = pop @ARGV; | |
| 34 | ||
| 35 | # Open a file or die trying. | |
| 36 | open PLAY, $mit_shakes or die "Could not open file: $!\n"; | |
| 37 | ||
| 38 | # Traverse the play until EOF | |
| 39 | ||
| 40 | while(my $line = <PLAY>) {
| |
| 41 | ||
| 42 | # Capture the act number. | |
| 43 | if ($line =~ m/<h3>act (\w+)/i) {
| |
| 44 | - | if ($line =~ m/<h3>ACT (\w+)/) {
|
| 44 | + | |
| 45 | # Something to translate Roman numerals into numbers. | |
| 46 | ||
| 47 | if ($1 eq 'I') {
| |
| 48 | $act = 1; | |
| 49 | } elsif ($1 eq 'II') {
| |
| 50 | $act = 2; | |
| 51 | } elsif ($1 eq 'III') {
| |
| 52 | $act = 3; | |
| 53 | } elsif ($1 eq 'IV') {
| |
| 54 | $act = 4; | |
| 55 | } elsif ($1 eq 'V') {
| |
| 56 | $act = 5; | |
| 57 | } elsif (/^\d+$/) {
| |
| 58 | # Just in case it's presented numerically | |
| 59 | $act = $1; | |
| 60 | } | |
| 61 | ||
| 62 | # Start with scene 1. | |
| 63 | $scene = 1; | |
| 64 | ||
| 65 | $act_and_scene = "$act\.$scene"; | |
| 66 | # print "$act_and_scene\n"; | |
| 67 | - | |
| 67 | + | |
| 68 | ||
| 69 | # Capture the scene number. | |
| 70 | elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) {
| |
| 71 | - | elsif ($line =~ m/.*<h3>SCENE (\w+)\..*/) {
|
| 71 | + | |
| 72 | # When we start a new scene, write the contents of the | |
| 73 | # @char buffer to the %breakdown hash and clear the | |
| 74 | # @char buffer. | |
| 75 | foreach my $char (@chars) {
| |
| 76 | $breakdown{$act_and_scene}{$char} = 1;
| |
| 77 | } | |
| 78 | ||
| 79 | @chars = (); | |
| 80 | ||
| 81 | # Something to translate Roman numerals into numbers. | |
| 82 | if ($1 eq 'I') {
| |
| 83 | $scene = 1; | |
| 84 | } elsif ($1 eq 'II') {
| |
| 85 | $scene = 2; | |
| 86 | } elsif ($1 eq 'III') {
| |
| 87 | $scene = 3; | |
| 88 | } elsif ($1 eq 'IV') {
| |
| 89 | $scene = 4; | |
| 90 | } elsif ($1 eq 'V') {
| |
| 91 | $scene = 5; | |
| 92 | } elsif ($1 eq 'VI') {
| |
| 93 | $scene = 6; | |
| 94 | } elsif ($1 eq 'VII') {
| |
| 95 | $scene = 7; | |
| 96 | } elsif ($1 eq 'VIII') {
| |
| 97 | $scene = 8; | |
| 98 | } elsif ($1 eq 'IX') {
| |
| 99 | $scene = 9; | |
| 100 | } elsif ($1 eq 'X') {
| |
| 101 | $scene = 10; | |
| 102 | } elsif ($1 eq 'XI') {
| |
| 103 | $scene = 11; | |
| 104 | } elsif ($1 eq 'XII') {
| |
| 105 | $scene = 12; | |
| 106 | } elsif ($1 eq 'XIII') {
| |
| 107 | $scene = 13; | |
| 108 | } elsif ($1 eq 'XIV') {
| |
| 109 | $scene = 14; | |
| 110 | } elsif ($1 eq 'XV') {
| |
| 111 | $scene = 15; | |
| 112 | } elsif ($1 eq 'XVI') {
| |
| 113 | $scene = 16; | |
| 114 | } elsif ($1 eq 'XVII') {
| |
| 115 | $scene = 17; | |
| 116 | } elsif ($1 eq 'XVIII') {
| |
| 117 | $scene = 18; | |
| 118 | } elsif ($1 eq 'XIX') {
| |
| 119 | $scene = 19; | |
| 120 | } elsif ($1 eq 'XX') {
| |
| 121 | $scene = 20; | |
| 122 | } elsif ($1 eq 'XXI') {
| |
| 123 | $scene = 21; | |
| 124 | } elsif ($1 eq 'XXII') {
| |
| 125 | $scene = 22; | |
| 126 | } elsif ($1 eq 'XXIII') {
| |
| 127 | $scene = 23; | |
| 128 | } elsif ($1 eq 'XXIV') {
| |
| 129 | $scene = 24; | |
| 130 | } elsif ($1 eq 'XXV') {
| |
| 131 | $scene = 25; | |
| 132 | } elsif (/^\d+$/) {
| |
| 133 | # Just in case it's presented numerically | |
| 134 | $scene = $1; | |
| 135 | } | |
| 136 | ||
| 137 | $act_and_scene = "$act\.$scene"; | |
| 138 | # print "$act_and_scene\n"; | |
| 139 | ||
| 140 | - | # Add the act and scene to the list of all acts and scenes. |
| 140 | + | # Add the act and scene to the list of all acts and scenes. |
| 141 | push @all_acts_and_scenes, $act_and_scene; | |
| 142 | ||
| 143 | ||
| 144 | } | |
| 145 | ||
| 146 | elsif ($line =~ m/.*<h3>scene (\d+).*/i) {
| |
| 147 | # Or maybe we're dealing with our own format, and | |
| 148 | # the scene numbers are actual numbers | |
| 149 | ||
| 150 | # When we start a new scene, write the contents of the | |
| 151 | # @char buffer to the %breakdown hash and clear the | |
| 152 | # @char buffer. | |
| 153 | foreach my $char (@chars) {
| |
| 154 | $breakdown{$act_and_scene}{$char} = 1;
| |
| 155 | } | |
| 156 | ||
| 157 | $act_and_scene = $1; | |
| 158 | ||
| 159 | # print "$act_and_scene\n"; | |
| 160 | ||
| 161 | @chars = (); | |
| 162 | ||
| 163 | # Add the act and scene to the list of all acts and scenes. | |
| 164 | push @all_acts_and_scenes, $act_and_scene; | |
| 165 | ||
| 166 | } | |
| 167 | ||
| 168 | elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
| |
| 169 | ||
| 170 | # Add the character to the characters list. | |
| 171 | ||
| 172 | push @chars, $1; | |
| 173 | ||
| 174 | # Add the character to the hash of all characters. The value | |
| 175 | # is just a place holder. | |
| 176 | ||
| 177 | $all_chars{$1} = 1;
| |
| 178 | ||
| 179 | } | |
| 180 | } | |
| 181 | ||
| 182 | # At the end of the play, write the @char buffer one last time to | |
| 183 | # get the last scene. | |
| 184 | ||
| 185 | foreach my $char (@chars) {
| |
| 186 | $breakdown{$act_and_scene}{$char} = 1;
| |
| 187 | } | |
| 188 | ||
| 189 | # Uncomment for debugging: | |
| 190 | # print Dumper{%breakdown};
| |
| 191 | ||
| 192 | # The @all_chars list will serve as the header of the spreadsheet | |
| 193 | # we'll create, so first get all of the characters we kept track | |
| 194 | # of into it, and then put a basic column heading for act and scene | |
| 195 | # numbers as the first cell. | |
| 196 | ||
| 197 | my @all_chars = keys (%all_chars); | |
| 198 | unshift @all_chars, 'Act.Scene'; | |
| 199 | ||
| 200 | # Create the table that will serve as the buffer for our spreadsheet, | |
| 201 | # and store the header as the first row. | |
| 202 | push my @table, \@all_chars; | |
| 203 | ||
| 204 | # Next we'll create a new row for every act and scene, and | |
| 205 | # if a character is present in that act and scene, we will | |
| 206 | # print a list at that chars index value. | |
| 207 | ||
| 208 | my $row_index = 0; | |
| 209 | ||
| 210 | while ($row_index <= $#all_acts_and_scenes) {
| |
| 211 | ||
| 212 | # Create a new row, and add the present row we're working on | |
| 213 | # as the first column in that row. | |
| 214 | unshift my @new_row, $all_acts_and_scenes[$row_index]; | |
| 215 | ||
| 216 | # We want to start in the second column since the first is just | |
| 217 | # going to be the act.scene. | |
| 218 | ||
| 219 | my $col_index = 1; | |
| 220 | ||
| 221 | # Do this for every entry in @all_chars, keeping track of the | |
| 222 | # $col_index as we go. | |
| 223 | ||
| 224 | while ($col_index <= $#all_chars) {
| |
| 225 | # If the character in the column occurs in the scene of the row, mark | |
| 226 | # the coordinate with an X. Otherwise, leave it empty. | |
| 227 | if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
| |
| 228 | $new_row[$col_index] = 'X'; | |
| 229 | } | |
| 230 | else {
| |
| 231 | $new_row[$col_index] = ''; | |
| 232 | } | |
| 233 | ||
| 234 | $col_index++; | |
| 235 | } | |
| 236 | ||
| 237 | # Incriment the $row_index before saving the present row to the table. | |
| 238 | $table[++$row_index] = \@new_row; | |
| 239 | ||
| 240 | } | |
| 241 | ||
| 242 | # Print as a CSV. This will be pretty straight-forward because we've already | |
| 243 | # created the spreadsheet in the @table, but we need to print a delimiting | |
| 244 | # characters along the way. This is just going to STDOUT for simplicity. | |
| 245 | ||
| 246 | foreach my $row (@table) {
| |
| 247 | foreach my $col (@{$row}) {
| |
| 248 | print "$col,"; | |
| 249 | } | |
| 250 | print "\n"; | |
| 251 | } |