cueScripGen.pl

#!/usr/bin/perl
#
# cueScriptGen
#
# Generate a cue script from an MIT Shakespeare File
#
# By Tony Tambasco
#
# Released under the GPL 2.0 and all that jazz.
#
# This is just a prototype. A more polished version will need further
# testing and development.
#
# Last Revised 15 Jan. 2013 11 PM

use strict;
use warnings;

# The user must give us at least one character name (but may give us
# any number of them), followed by a script file. If they don't,
# print a simple error and exit.

if (@ARGV < 2) {
  die "usage is \"cueScriptGen character_1 [character_2 &c] file_name\"\n";
}

# We should edit this to either accept a file name or a URL.
# If we force a URL, we can explore transformations based on
# alternative methods of formatting.

# Whatever the file is, it will be the LAST thing in our
# argument list, so pop it off so we have a list of nothing
# but characters.
my $mit_shakes = pop @ARGV;

# Initialize variables.
my $cue_line      = "";
my $prev_cue_line = "";
my $act           = "";
my $scene         = "";
my $new_act       = undef;
my $new_scene     = undef;
my $line          = "";
my $title_of_play = "";

# Open a file or quit with a basic warning if you can't.
open PLAY, $mit_shakes or die "Could not open file: $!\n";


  # Step 0.5: Print basic html header info from the file, and
  # them some heading info so the reader knows what they're
  # looking at.

until ($line =~ m/<\/head>/) {
  if ($line =~ m/<title>(.*)<\/title>/) {
    $title_of_play = $1;
  }
  print "$line";
  $line = <PLAY>;
}

print "</head>\n<body>\n";
print "<h1>$title_of_play</h1>\n";
print '<h2>Cue Script for ';

# This boolean will tell us if we're on the
# first item in the list, which should not
# be preceeded by a comma and a space when
# printing the list of characters.

my $i = 0;

foreach (@ARGV) {
  if ($i == 0) {
    print "$_";
    $i = 1;
  }

  else {
    print ", $_";
  }
}

print ". </h2>\n";


# Traverse the file until EOF
while ($line = <PLAY>) {
  # The editor may have edited by commenting out text. In this case, we should
  # presume they have use comment delimiters exactly as if they had deleted all
  # text and markup in between them. i.e. comment delimiters may be used to join
  # multiple speeches by multiple characters into a single speech.

  if ($line =~ m/.*<!--.*/) {

    # Split the line with the comment mark, everything before it is
    # text we want to print. Everything after we can ignore...

    my ($keep, $comment) = split /<!--/, $line;
    print "$keep\n";
    $cue_line = $keep;

    # Until we find a closing comment mark...

    $line = <PLAY> until ($line =~ m/.*-->.*/);

    # When we split the line again, this time keep everything
    # to the right of the mark, and discarding the rest.

    ($comment, $keep) = split /-->/, $line;
    print "$keep\n";
    $prev_cue_line = $cue_line;
    $cue_line = $keep;

  }

  # Step 1: Find a speech block that contains a character that we're looking for.
  # We need to do this for every character left in our arguument list.
  foreach (@ARGV) {
    my $character = $_;

    if ($line =~ m/^<a name="speech\d+"><b>$character<\/b>/i) {

      # Print the act heading if we haven't yet.
      if ($new_act) {
    print "$act\n";
    $new_act = undef;
      }

      # Print the scene headin if we haven't yet.
      if ($new_scene) {
    print "$scene\n";
    $new_scene = undef;
      }

      # Step 2: We've found a line of text that matches a block of text for the
      # character we're looking for, so first we need to print their cue line.
      # See step 1.5 (below) for details. If the cue line is less than three
      # words long, print the line that came before it, too.

      $cue_line =~ m/^<a name="\d+\.\d+\.\d+">(.*)</;

      my @cue_words = split /\s/, $1;
      my $cue_words = @cue_words;

      if ($cue_words < 2) {
    print "<b>Cue:</b> $prev_cue_line\n$cue_line\n\n";
      }

      else {
    print "<b>Cue:</b> $cue_line\n\n";
      }

      # Step 3: Print the entire speech block for the character; you know you're
      # at the end of the block when you reach the closing blockquote
      while ($line !~ m/<\/blockquote>/) {

    print "$line\n";

    # We still need to keep track of the cue line, just in case the cue script
    # is for an actor who plays two roles with back to back lines.
    $prev_cue_line = $cue_line;
    $cue_line      = $line;

    # Read the next line of the file.
    $line = <PLAY>;
      }

      # Step 4: Print the closing blockquote.
      print "</blockquote>\n";

    }

    # Step 1.5: We need to keep track of each line of text as we go, so if the
    # line was not a speech heading, see if it's a line of text. If it is, keep it
    elsif ($line =~ m/^<a name="\d\.\d+\.\d+"/) {
      $prev_cue_line = $cue_line;
      $cue_line      = $line;
    }

    # Step 1.6: We should also print act headings.
    elsif ($line =~ m/.*<h3>ACT \w+.*/) {
      $act       = $line;
      $new_act   = "true";
    }

    # Step 1.7: We should also print scene headings.
    elsif ($line =~ m/.*<h3>SCENE \w+.*/) {
      $scene     = $line;
      $new_scene = "true";
    }

    # Step 1.8: If a line begins with a comment delimiter, skip ahead to the
    # end of the commnt block.

  }
}

# Last piece of cleanup, close off our HTML elements

print "\n</body>\n</html>\n";