Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- #################
- ## Searching MacBeth for text
- ## MacBeth text is here: http://pastebin.com/PV5YUBuA
- #################
- # Set the content type for the page
- print "Content-type: text/html\n\n";
- # Set the HTML header and default form
- print <<EOF;
- <HTML>
- <HEAD>
- <TITLE>Search Macbeth! - Coding 101 Episode 26</TITLE>
- </HEAD>
- <BODY>
- <H1>Search Macbeth</H1>
- <FORM METHOD="GET" ACTION="textsearch.pl">
- <STRONG>What do you want to search for in MacBeth?</STRONG>
- <INPUT TYPE="TEXT" NAME="search"><br />
- <INPUT TYPE="SUBMIT">
- </FORM>
- EOF
- # Get the querystring and check if there's anything there
- $qs = $ENV{'QUERY_STRING'};
- # For Testing via perl on the commandline
- #$qs="search=toil";
- #If the length of querystring is 0 then we have nothing to do.
- if (length ($qs) > 0){
- # Process the querystring into a hash of name/value pairs
- # In this case it should only be "search=xxxxx"
- # First split at the & mark. In our case there should be
- # only be one form variable passed but if there were more than
- # for example textsearch.pl?search=toil&count=5 this would
- # create an array with 2 elements: ("search=toil","count=5");
- @nvpairs = split(/&/, $qs);
- # Next loop throuth that array and do another split on the = sign
- # and assign the result to the variables $name and $value.
- foreach $nvpair (@nvpairs){
- ($name, $value) = split(/=/, $nvpair);
- # This is standard HTML form submission code to convert special
- # characters back to their proper string equivalent.
- # In this case I need to convert spaces. If I searched for "to be"
- # the browser would submit it as "to+be" so I need to convert all plusses
- # to spaces. If they submitted a plus sign in the form it would show
- # as %2B and get corrected next.
- $value =~ tr/+/ /;
- # Next, if my search term included an apostrophe, the browser would
- # submit it as "%27" so I need this to convert "that%27s" to "that's"
- $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
- # Finally, I create a hash variable to store name value pairs
- # This is like a dictionary in Python.
- $form{$name} = $value;
- }
- # I want to check that the value(s) that I'm insterested in were submitted.
- # If not, then I can end the script.
- if ($form{'search'}) {
- #I have a search term. So I can proceed
- # This is the Macbeth text file
- $file = "macbeth.txt";
- print "<hr><h2>Search Results:</h2><hr>";
- # Open the file or give an error message
- open MY_FILE, "<", $file or die "Could not open \"$file\".";
- # Loop through until the end of the file
- while(<MY_FILE>) {
- # Remove the \n newline character at the end of each line and
- # replace with the HTML <br> tag
- s/\n$/<br>/;
- # Append each line read from $_ (Perl's default variable when
- # reading from a file) into my variable called $fulltext
- $fulltext .= $_;
- }
- # Close the file
- close MY_FILE;
- # This text would have underlined the speaking character but I've moved it
- # into the loop. It may be a speed concern. Will need to investigate
- #$fulltext =~ s/([A-Z\ ]+)\./<u>$1<\/u>: /gs;
- # The file is formatted with lots of line breaks within lines and a double linebreak
- # between them so these regexes will help to fix that on the fly without altering the file.
- # Use split to create an array of paragraphs, splitting on the double new line character
- # which should now put each line of dialog in a separate paragraph. The file is
- # formatted with lots of line breaks within paragraphs and a double linebreak
- # between paragraphs. This will help to fix that on the fly without altering the file.
- @paragraphs = split/<br>(.<br>)+/gs, $fulltext;
- # Loop through the paragraph array and count the number of occurrances of the search term
- for $paragraph (@paragraphs) {
- # Grep is useful for counting matches and placing them in a scalar variable
- # or actually putting the matches into an array. I don't need the matches here.
- $count = grep /$form{'search'}/gis, $paragraph;
- # If I got at least one match in this paragraph, I want to print it out.
- if ($count > 0) {
- # Format the text so that the name of the character speaking is underlined.
- $paragraph =~ s/([A-Z\ ]+)\./<u>$1<\/u> /gs;
- #We're going to do a substitution in the paragraph to add HTML bolding to
- # the search term and print out the paragraph.
- $searchreplace = "($form{'search'})";
- # Bold the search term
- $paragraph =~ s/$searchreplace/<strong>$1<\/strong>/ugis;
- # Now print out the result bordered by an HTML hard rule line
- print "$paragraph \n<hr>\n";
- }
- }
- } else {
- # We're here because the query received was not the one that was expected.
- # Conclusion is that someone is trying to hack the program.
- print "<h1><blink>STOP HACKING MY PROGRAM</blink></h1>";
- }
- } else {
- # We're here because I didn't receive a query. This is not an error.
- }
- # End the HTML
- print " </BODY>
- </HTML>";
- exit;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement