macbethsearch.pl

#!/usr/bin/perl

#################
## Searching MacBeth for text
## MacBeth text is here: http://pastebin.com/PV5YUBuA
#################

# Using the CGI.pm module in a non-OO manner
use CGI qw(:standard);

#If the form was posted, grab the form variables
if ($ENV{'REQUEST_METHOD'} eq "POST") {
    $search = param('search');
    $showall = param('showall');
}

# For Testing via perl on the commandline
# $search="toil";

# Print the standard content type for the page
print header;

# Print the HTML header
print start_html("Search Macbeth! - Coding 101 Episode 27");
print h1("William Shakespeare's Macbeth");

# Print out the elements of the search form
print start_form, strong("What do you want to search for in MacBeth?"),
    textfield(-name => "search", -default => $search), p,
    checkbox(-name => "showall", -value => "YES", -label => "Show Entire Document"),
    p, submit, end_form, hr;

# If the length of search string is 0 then we have nothing to do.
if ($search){

    # This is the Macbeth text file
    $file = "macbeth.txt";

    # Search results header
    print h2("Search Results (bolded):");

    # Open the file or give an error message
    open MY_FILE, "<", $file or die "Could not open \"$file\".";

    # Loop through until the end of the file
    while(<MY_FILE>) {
        # Remove the \n newline character at the end of each line and
        # replace with the HTML <br> tag
        s/\n$/<br>/;

        # Append each line read from $_ (Perl's default variable when
        # reading from a file) into my variable called $fulltext
        $fulltext .= $_;
    }

    # Close the file
    close MY_FILE;

    # Remove the Project Gutenberg pre and post text
    #$fulltext =~ /---BEGIN---(.*)---END---/;
    $fulltext =~ s/.*---BEGIN---(.*)---END---.*/$1/;
    #$fulltext = $1;

    # The file is formatted with lots of line breaks within lines and a double linebreak
    # between them so these regexes will help to fix that on the fly without altering the file.
    # Use split to create an array of paragraphs, splitting on the double new line character
    # which should now put each line of dialog in a separate paragraph. The file is
    # formatted with lots of line breaks within paragraphs and a double linebreak
    # between paragraphs. This will help to fix that on the fly without altering the file.
    @paragraphs =  split/<br>(.<br>)+/gs, $fulltext;

    # Keep a count of number of matches
    $matchcount = 0;

    # Loop through the paragraph array and count the number of occurrances of the search term
    for $paragraph (@paragraphs) {
        # Grep is useful for counting matches and placing them in a scalar variable
        # or actually putting the matches into an array. I don't need the matches here.
        $count = grep /$search/gis, $paragraph;
        $matchcount += $count;

        # If there is at least one match in this paragraph, I want to print it out.
        if ($count > 0) {
            # Format the text so that the name of the character speaking is underlined.
            $paragraph =~ s/([A-Z\ ]+)\./<u>$1<\/u> /gs;

            # We're going to do a substitution in the paragraph to add HTML bolding and
            # a yellow background to the search term and print out the paragraph.
            $paragraph =~ s/($search)/<span style='font-weight: bold;background-color: #ffff42;'>$1<\/span>/ugis;

            # Now print out the result bordered by an HTML hard rule line
            print p($paragraph), hr;
        } else {
            # If the Show all checkbox was checked, print out the paragraph anyway.
            if ($showall eq "YES") {
                print p($paragraph);
            }
        }
        }

        # If no matches were found AND the Show all checkbox is empty, print a message
        if ($matchcount < 1 && $showall ne "YES") {
            print p("Sorry. No matches.");
        }

}

# End the HTML
print end_html;
exit;