Advertisement
dougllio

textsearch.pl

Jul 20th, 2014
343
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 5.48 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. #################
  4. ## Searching MacBeth for text
  5. ## MacBeth text is here: http://pastebin.com/PV5YUBuA
  6. #################
  7.  
  8. # Set the content type for the page
  9. print "Content-type: text/html\n\n";
  10.  
  11. # Set the HTML header and default form
  12. print <<EOF;
  13. <HTML>
  14.  <HEAD>
  15.   <TITLE>Search Macbeth! - Coding 101 Episode 26</TITLE>
  16.  </HEAD>
  17.  <BODY>
  18.   <H1>Search Macbeth</H1>
  19.   <FORM METHOD="GET" ACTION="textsearch.pl">
  20.     <STRONG>What do you want to search for in MacBeth?</STRONG>
  21.     <INPUT TYPE="TEXT" NAME="search"><br />
  22.     <INPUT TYPE="SUBMIT">
  23.   </FORM>
  24. EOF
  25.  
  26. # Get the querystring and check if there's anything there
  27. $qs = $ENV{'QUERY_STRING'};
  28.  
  29. # For Testing via perl on the commandline
  30. #$qs="search=toil";
  31.  
  32. #If the length of querystring is 0 then we have nothing to do.
  33. if (length ($qs) > 0){
  34.  
  35.     # Process the querystring into a hash of name/value pairs
  36.     # In this case it should only be "search=xxxxx"
  37.  
  38.     # First split at the & mark. In our case there should be
  39.     # only be one form variable passed but if there were more than
  40.     # for example textsearch.pl?search=toil&count=5 this would
  41.     # create an array with 2 elements: ("search=toil","count=5");
  42.     @nvpairs = split(/&/, $qs);
  43.  
  44.     # Next loop throuth that array and do another split on the = sign
  45.     # and assign the result to the variables $name and $value.
  46.     foreach $nvpair (@nvpairs){
  47.         ($name, $value) = split(/=/, $nvpair);
  48.  
  49.         # This is standard HTML form submission code to convert special
  50.         # characters back to their proper string equivalent.
  51.  
  52.         # In this case I need to convert spaces. If I searched for "to be"
  53.         # the browser would submit it as "to+be" so I need to convert all plusses
  54.         # to spaces. If they submitted a plus sign in the form it would show
  55.         # as %2B and get corrected next.
  56.         $value =~ tr/+/ /;
  57.  
  58.         # Next, if my search term included an apostrophe, the browser would
  59.         # submit it as "%27" so I need this to convert "that%27s" to "that's"
  60.         $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
  61.  
  62.     # Finally, I create a hash variable to store name value pairs
  63.     # This is like a dictionary in Python.
  64.         $form{$name} = $value;
  65.     }
  66.  
  67.     # I want to check that the value(s) that I'm insterested in were submitted.
  68.     # If not, then I can end the script.
  69.     if ($form{'search'}) {
  70.         #I have a search term. So I can proceed
  71.        
  72.         # This is the Macbeth text file
  73.         $file = "macbeth.txt";
  74.  
  75.         print "<hr><h2>Search Results:</h2><hr>";
  76.  
  77.     # Open the file or give an error message
  78.         open MY_FILE, "<", $file or die "Could not open \"$file\".";
  79.        
  80.         # Loop through until the end of the file
  81.         while(<MY_FILE>) {
  82.             # Remove the \n newline character at the end of each line and
  83.             # replace with the HTML <br> tag
  84.             s/\n$/<br>/;
  85.            
  86.             # Append each line read from $_ (Perl's default variable when
  87.             # reading from a file) into my variable called $fulltext
  88.             $fulltext .= $_;
  89.         }
  90.        
  91.         # Close the file
  92.         close MY_FILE;
  93.        
  94.         # This text would have underlined the speaking character but I've moved it
  95.         # into the loop. It may be a speed concern. Will need to investigate
  96.         #$fulltext =~ s/([A-Z\ ]+)\./<u>$1<\/u>: /gs;
  97.  
  98.         # The file is formatted with lots of line breaks within lines and a double linebreak
  99.         # between them so these regexes will help to fix that on the fly without altering the file.
  100.     # Use split to create an array of paragraphs, splitting on the double new line character
  101.     # which should now put each line of dialog in a separate paragraph. The file is
  102.         # formatted with lots of line breaks within paragraphs and a double linebreak
  103.         # between paragraphs. This will help to fix that on the fly without altering the file.
  104.         @paragraphs =  split/<br>(.<br>)+/gs, $fulltext;
  105.  
  106.     # Loop through the paragraph array and count the number of occurrances of the search term
  107.         for $paragraph (@paragraphs) {
  108.             # Grep is useful for counting matches and placing them in a scalar variable
  109.             # or actually putting the matches into an array. I don't need the matches here.
  110.             $count = grep /$form{'search'}/gis, $paragraph;
  111.            
  112.             # If I got at least one match in this paragraph, I want to print it out.
  113.             if ($count > 0) {
  114.                 # Format the text so that the name of the character speaking is underlined.
  115.                 $paragraph =~ s/([A-Z\ ]+)\./<u>$1<\/u> /gs;
  116.  
  117.                 #We're going to do a substitution in the paragraph to add HTML bolding to
  118.                 # the search term and print out the paragraph.
  119.                 $searchreplace = "($form{'search'})";
  120.  
  121.                 # Bold the search term
  122.                 $paragraph =~ s/$searchreplace/<strong>$1<\/strong>/ugis;
  123.  
  124.                 # Now print out the result bordered by an HTML hard rule line
  125.                 print  "$paragraph \n<hr>\n";
  126.             }
  127.         }
  128.     } else {
  129.         # We're here because the query received was not the one that was expected.
  130.         # Conclusion is that someone is trying to  hack the program.
  131.         print "<h1><blink>STOP HACKING MY PROGRAM</blink></h1>";
  132.     }
  133. } else {
  134.   # We're here because I didn't receive a query. This is not an error.
  135. }
  136.  
  137. # End the HTML
  138. print " </BODY>
  139. </HTML>";
  140.  
  141. exit;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement