View difference between Paste ID: PLqnNeQ0 and CvFMpKLe
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/perl
2
#
3
# A perl script to generate an character/scene breakdown from
4
# an MIT Shakespeare file.
5
#
6
# Tony Tambasco
7
#
8
# GPL v.2 &c. 
9
## 29 Dec. 2012
10-
# 29 Dec. 2012
10+
11
# Last Revised 16 Jan. 2014.
12-
# Last Revised 30 Jan. 2013.
12+
13
use strict;
14
use warnings;
15
use Data::Dumper;
16
17
my %all_chars; # storing these as a hash will eliminnate duplicates.
18
my %breakdown; # A 2-D hash to hold actors by scene.
19
20
# Vars for calculating the act and scene. 
21
my $act = 1;
22
my $scene = 1;
23
my $act_and_scene = "$act\.$scene";
24
25
# A list of all acts and scenes. 
26
my @all_acts_and_scenes;
27
28
# A buffer for each char in a scene. 
29
my @chars;
30
31
# Get our file. 
32
33
my $mit_shakes = pop @ARGV;
34
35
# Open a file or die trying. 
36
open PLAY, $mit_shakes or die "Could not open file: $!\n";
37
38
# Traverse the play until EOF
39
40
while(my $line = <PLAY>) {
41
  
42
  # Capture the act number. 
43
  if ($line =~ m/<h3>act (\w+)/i) {
44-
  if ($line =~ m/<h3>ACT (\w+)/) {
44+
45
    # Something to translate Roman numerals into numbers.
46
47
    if ($1 eq 'I') {
48
      $act = 1;
49
    } elsif ($1 eq 'II') {
50
      $act = 2;
51
    } elsif ($1 eq 'III') {
52
      $act = 3;
53
    } elsif ($1 eq 'IV') {
54
      $act = 4;
55
    } elsif ($1 eq 'V') {
56
      $act = 5;
57
    }  elsif (/^\d+$/) {
58
      # Just in case it's presented numerically
59
      $act = $1;
60
    }
61
    
62
    # Start with scene 1.
63
    $scene = 1;
64
65
    $act_and_scene = "$act\.$scene";
66
#    print "$act_and_scene\n";   
67-
   
67+
68
69
  # Capture the scene number. 
70
  elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) {
71-
  elsif ($line =~ m/.*<h3>SCENE (\w+)\..*/) {
71+
72
    # When we start a new scene, write the contents of the
73
    # @char buffer to the %breakdown hash and clear the
74
    # @char buffer. 
75
    foreach my $char (@chars) {      
76
      $breakdown{$act_and_scene}{$char} = 1;
77
    }
78
	
79
    @chars = ();
80
    
81
    # Something to translate Roman numerals into numbers.
82
    if ($1 eq 'I') {
83
      $scene = 1;
84
    } elsif ($1 eq 'II') {
85
      $scene = 2;
86
    } elsif ($1 eq 'III') {
87
      $scene = 3;
88
    } elsif ($1 eq 'IV') {
89
      $scene = 4;
90
    } elsif ($1 eq 'V') {
91
      $scene = 5;
92
    }  elsif ($1 eq 'VI') {
93
      $scene = 6;
94
    } elsif ($1 eq 'VII') {
95
      $scene = 7;
96
    } elsif ($1 eq 'VIII') {
97
      $scene = 8;
98
    } elsif ($1 eq 'IX') {
99
      $scene = 9;
100
    } elsif ($1 eq 'X') {
101
      $scene = 10;
102
    } elsif ($1 eq 'XI') {
103
      $scene = 11;
104
    } elsif ($1 eq 'XII') {
105
      $scene = 12;
106
    } elsif ($1 eq 'XIII') {
107
      $scene = 13;
108
    }  elsif ($1 eq 'XIV') {
109
      $scene = 14;
110
    } elsif ($1 eq 'XV') {
111
      $scene = 15;
112
    } elsif ($1 eq 'XVI') {
113
      $scene = 16;
114
    } elsif ($1 eq 'XVII') {
115
      $scene = 17;
116
    } elsif ($1 eq 'XVIII') {
117
      $scene = 18;
118
    } elsif ($1 eq 'XIX') {
119
      $scene = 19;
120
    } elsif ($1 eq 'XX') {
121
      $scene = 20;
122
    } elsif ($1 eq 'XXI') {
123
      $scene = 21;
124
    }  elsif ($1 eq 'XXII') {
125
      $scene = 22;
126
    } elsif ($1 eq 'XXIII') {
127
      $scene = 23;
128
    } elsif ($1 eq 'XXIV') {
129
      $scene = 24;
130
    } elsif ($1 eq 'XXV') {
131
      $scene = 25;
132
    } elsif (/^\d+$/) {
133
      # Just in case it's presented numerically
134
      $scene = $1;
135
    }
136
    
137
    $act_and_scene = "$act\.$scene";
138
    # print "$act_and_scene\n";
139
140-
    # Add the act and scene to the list of all acts and scenes. 
140+
    # Add the act and scene to the list of all acts and scenes.
141
    push @all_acts_and_scenes, $act_and_scene;
142
143
    
144
  }
145
146
  elsif ($line =~ m/.*<h3>scene (\d+).*/i) {
147
    # Or maybe we're dealing with our own format, and
148
    # the scene numbers are actual numbers
149
150
    # When we start a new scene, write the contents of the
151
    # @char buffer to the %breakdown hash and clear the
152
    # @char buffer. 
153
    foreach my $char (@chars) {      
154
      $breakdown{$act_and_scene}{$char} = 1;
155
    }
156
157
    $act_and_scene = $1;
158
159
    # print "$act_and_scene\n";
160
    
161
    @chars = ();
162
  
163
  # Add the act and scene to the list of all acts and scenes. 
164
  push @all_acts_and_scenes, $act_and_scene;
165
    
166
  }
167
  
168
  elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) {
169
    
170
    # Add the character to the characters list. 
171
    
172
    push @chars, $1;
173
174
    # Add the character to the hash of all characters. The value
175
    # is just a place holder.
176
177
    $all_chars{$1} = 1;
178
    
179
  }
180
}
181
182
# At the end of the play, write the @char buffer one last time to
183
# get the last scene.
184
185
foreach my $char (@chars) {      
186
      $breakdown{$act_and_scene}{$char} = 1;
187
    }
188
189
# Uncomment for debugging:
190
# print Dumper{%breakdown};
191
192
# The @all_chars list will serve as the header of the spreadsheet
193
# we'll create, so first get all of the characters we kept track
194
# of into it, and then put a basic column heading for act and scene
195
# numbers as the first cell. 
196
197
my @all_chars = keys (%all_chars);
198
unshift @all_chars, 'Act.Scene';
199
200
# Create the table that will serve as the buffer for our spreadsheet,
201
# and store the header as the first row. 
202
push my @table, \@all_chars;
203
204
# Next we'll create a new row for every act and scene, and
205
# if a character is present in that act and scene, we will
206
# print a list at that chars index value. 
207
208
my $row_index = 0;
209
210
while ($row_index <= $#all_acts_and_scenes) {
211
  
212
  # Create a new row, and add the present row we're working on
213
  # as the first column in that row. 
214
  unshift my @new_row, $all_acts_and_scenes[$row_index];
215
216
  # We want to start in the second column since the first is just
217
  # going to be the act.scene. 
218
  
219
  my $col_index = 1;
220
221
  # Do this for every entry in @all_chars, keeping track of the
222
  # $col_index as we go. 
223
  
224
  while ($col_index <= $#all_chars) {
225
    # If the character in the column occurs in the scene of the row, mark
226
    # the coordinate with an X. Otherwise, leave it empty. 
227
    if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) {
228
      $new_row[$col_index] = 'X';
229
    }
230
    else {
231
      $new_row[$col_index] = '';
232
    }
233
234
    $col_index++;
235
  }
236
237
  # Incriment the $row_index before saving the present row to the table. 
238
  $table[++$row_index] = \@new_row;
239
240
}
241
242
# Print as a CSV. This will be pretty straight-forward because we've already
243
# created the spreadsheet in the @table, but we need to print a delimiting
244
# characters along the way. This is just going to STDOUT for simplicity. 
245
246
foreach my $row (@table) {
247
  foreach my $col (@{$row}) {
248
    print "$col,";
249
  }
250
  print "\n";
251
}