SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/perl | |
2 | # | |
3 | # A perl script to generate an character/scene breakdown from | |
4 | # an MIT Shakespeare file. | |
5 | # | |
6 | # Tony Tambasco | |
7 | # | |
8 | # GPL v.2 &c. | |
9 | ## 29 Dec. 2012 | |
10 | - | # 29 Dec. 2012 |
10 | + | |
11 | # Last Revised 16 Jan. 2014. | |
12 | - | # Last Revised 30 Jan. 2013. |
12 | + | |
13 | use strict; | |
14 | use warnings; | |
15 | use Data::Dumper; | |
16 | ||
17 | my %all_chars; # storing these as a hash will eliminnate duplicates. | |
18 | my %breakdown; # A 2-D hash to hold actors by scene. | |
19 | ||
20 | # Vars for calculating the act and scene. | |
21 | my $act = 1; | |
22 | my $scene = 1; | |
23 | my $act_and_scene = "$act\.$scene"; | |
24 | ||
25 | # A list of all acts and scenes. | |
26 | my @all_acts_and_scenes; | |
27 | ||
28 | # A buffer for each char in a scene. | |
29 | my @chars; | |
30 | ||
31 | # Get our file. | |
32 | ||
33 | my $mit_shakes = pop @ARGV; | |
34 | ||
35 | # Open a file or die trying. | |
36 | open PLAY, $mit_shakes or die "Could not open file: $!\n"; | |
37 | ||
38 | # Traverse the play until EOF | |
39 | ||
40 | while(my $line = <PLAY>) { | |
41 | ||
42 | # Capture the act number. | |
43 | if ($line =~ m/<h3>act (\w+)/i) { | |
44 | - | if ($line =~ m/<h3>ACT (\w+)/) { |
44 | + | |
45 | # Something to translate Roman numerals into numbers. | |
46 | ||
47 | if ($1 eq 'I') { | |
48 | $act = 1; | |
49 | } elsif ($1 eq 'II') { | |
50 | $act = 2; | |
51 | } elsif ($1 eq 'III') { | |
52 | $act = 3; | |
53 | } elsif ($1 eq 'IV') { | |
54 | $act = 4; | |
55 | } elsif ($1 eq 'V') { | |
56 | $act = 5; | |
57 | } elsif (/^\d+$/) { | |
58 | # Just in case it's presented numerically | |
59 | $act = $1; | |
60 | } | |
61 | ||
62 | # Start with scene 1. | |
63 | $scene = 1; | |
64 | ||
65 | $act_and_scene = "$act\.$scene"; | |
66 | # print "$act_and_scene\n"; | |
67 | - | |
67 | + | |
68 | ||
69 | # Capture the scene number. | |
70 | elsif ($line =~ m/.*<h3>scene (\w+)\..*/i) { | |
71 | - | elsif ($line =~ m/.*<h3>SCENE (\w+)\..*/) { |
71 | + | |
72 | # When we start a new scene, write the contents of the | |
73 | # @char buffer to the %breakdown hash and clear the | |
74 | # @char buffer. | |
75 | foreach my $char (@chars) { | |
76 | $breakdown{$act_and_scene}{$char} = 1; | |
77 | } | |
78 | ||
79 | @chars = (); | |
80 | ||
81 | # Something to translate Roman numerals into numbers. | |
82 | if ($1 eq 'I') { | |
83 | $scene = 1; | |
84 | } elsif ($1 eq 'II') { | |
85 | $scene = 2; | |
86 | } elsif ($1 eq 'III') { | |
87 | $scene = 3; | |
88 | } elsif ($1 eq 'IV') { | |
89 | $scene = 4; | |
90 | } elsif ($1 eq 'V') { | |
91 | $scene = 5; | |
92 | } elsif ($1 eq 'VI') { | |
93 | $scene = 6; | |
94 | } elsif ($1 eq 'VII') { | |
95 | $scene = 7; | |
96 | } elsif ($1 eq 'VIII') { | |
97 | $scene = 8; | |
98 | } elsif ($1 eq 'IX') { | |
99 | $scene = 9; | |
100 | } elsif ($1 eq 'X') { | |
101 | $scene = 10; | |
102 | } elsif ($1 eq 'XI') { | |
103 | $scene = 11; | |
104 | } elsif ($1 eq 'XII') { | |
105 | $scene = 12; | |
106 | } elsif ($1 eq 'XIII') { | |
107 | $scene = 13; | |
108 | } elsif ($1 eq 'XIV') { | |
109 | $scene = 14; | |
110 | } elsif ($1 eq 'XV') { | |
111 | $scene = 15; | |
112 | } elsif ($1 eq 'XVI') { | |
113 | $scene = 16; | |
114 | } elsif ($1 eq 'XVII') { | |
115 | $scene = 17; | |
116 | } elsif ($1 eq 'XVIII') { | |
117 | $scene = 18; | |
118 | } elsif ($1 eq 'XIX') { | |
119 | $scene = 19; | |
120 | } elsif ($1 eq 'XX') { | |
121 | $scene = 20; | |
122 | } elsif ($1 eq 'XXI') { | |
123 | $scene = 21; | |
124 | } elsif ($1 eq 'XXII') { | |
125 | $scene = 22; | |
126 | } elsif ($1 eq 'XXIII') { | |
127 | $scene = 23; | |
128 | } elsif ($1 eq 'XXIV') { | |
129 | $scene = 24; | |
130 | } elsif ($1 eq 'XXV') { | |
131 | $scene = 25; | |
132 | } elsif (/^\d+$/) { | |
133 | # Just in case it's presented numerically | |
134 | $scene = $1; | |
135 | } | |
136 | ||
137 | $act_and_scene = "$act\.$scene"; | |
138 | # print "$act_and_scene\n"; | |
139 | ||
140 | - | # Add the act and scene to the list of all acts and scenes. |
140 | + | # Add the act and scene to the list of all acts and scenes. |
141 | push @all_acts_and_scenes, $act_and_scene; | |
142 | ||
143 | ||
144 | } | |
145 | ||
146 | elsif ($line =~ m/.*<h3>scene (\d+).*/i) { | |
147 | # Or maybe we're dealing with our own format, and | |
148 | # the scene numbers are actual numbers | |
149 | ||
150 | # When we start a new scene, write the contents of the | |
151 | # @char buffer to the %breakdown hash and clear the | |
152 | # @char buffer. | |
153 | foreach my $char (@chars) { | |
154 | $breakdown{$act_and_scene}{$char} = 1; | |
155 | } | |
156 | ||
157 | $act_and_scene = $1; | |
158 | ||
159 | # print "$act_and_scene\n"; | |
160 | ||
161 | @chars = (); | |
162 | ||
163 | # Add the act and scene to the list of all acts and scenes. | |
164 | push @all_acts_and_scenes, $act_and_scene; | |
165 | ||
166 | } | |
167 | ||
168 | elsif ($line =~ m/^<a name="speech\d+"><b>(.*)<\/b>/i) { | |
169 | ||
170 | # Add the character to the characters list. | |
171 | ||
172 | push @chars, $1; | |
173 | ||
174 | # Add the character to the hash of all characters. The value | |
175 | # is just a place holder. | |
176 | ||
177 | $all_chars{$1} = 1; | |
178 | ||
179 | } | |
180 | } | |
181 | ||
182 | # At the end of the play, write the @char buffer one last time to | |
183 | # get the last scene. | |
184 | ||
185 | foreach my $char (@chars) { | |
186 | $breakdown{$act_and_scene}{$char} = 1; | |
187 | } | |
188 | ||
189 | # Uncomment for debugging: | |
190 | # print Dumper{%breakdown}; | |
191 | ||
192 | # The @all_chars list will serve as the header of the spreadsheet | |
193 | # we'll create, so first get all of the characters we kept track | |
194 | # of into it, and then put a basic column heading for act and scene | |
195 | # numbers as the first cell. | |
196 | ||
197 | my @all_chars = keys (%all_chars); | |
198 | unshift @all_chars, 'Act.Scene'; | |
199 | ||
200 | # Create the table that will serve as the buffer for our spreadsheet, | |
201 | # and store the header as the first row. | |
202 | push my @table, \@all_chars; | |
203 | ||
204 | # Next we'll create a new row for every act and scene, and | |
205 | # if a character is present in that act and scene, we will | |
206 | # print a list at that chars index value. | |
207 | ||
208 | my $row_index = 0; | |
209 | ||
210 | while ($row_index <= $#all_acts_and_scenes) { | |
211 | ||
212 | # Create a new row, and add the present row we're working on | |
213 | # as the first column in that row. | |
214 | unshift my @new_row, $all_acts_and_scenes[$row_index]; | |
215 | ||
216 | # We want to start in the second column since the first is just | |
217 | # going to be the act.scene. | |
218 | ||
219 | my $col_index = 1; | |
220 | ||
221 | # Do this for every entry in @all_chars, keeping track of the | |
222 | # $col_index as we go. | |
223 | ||
224 | while ($col_index <= $#all_chars) { | |
225 | # If the character in the column occurs in the scene of the row, mark | |
226 | # the coordinate with an X. Otherwise, leave it empty. | |
227 | if ($breakdown{$all_acts_and_scenes[$row_index]}{$all_chars[$col_index]}) { | |
228 | $new_row[$col_index] = 'X'; | |
229 | } | |
230 | else { | |
231 | $new_row[$col_index] = ''; | |
232 | } | |
233 | ||
234 | $col_index++; | |
235 | } | |
236 | ||
237 | # Incriment the $row_index before saving the present row to the table. | |
238 | $table[++$row_index] = \@new_row; | |
239 | ||
240 | } | |
241 | ||
242 | # Print as a CSV. This will be pretty straight-forward because we've already | |
243 | # created the spreadsheet in the @table, but we need to print a delimiting | |
244 | # characters along the way. This is just going to STDOUT for simplicity. | |
245 | ||
246 | foreach my $row (@table) { | |
247 | foreach my $col (@{$row}) { | |
248 | print "$col,"; | |
249 | } | |
250 | print "\n"; | |
251 | } |