Shout Box Search 0.1

'''HTML searching for Funny Pro Elites shoutbox
Author: Joe McBobski
Should be in same folder as Parser.py
Call from any shell with support for python with "cd (foldername)", and then "python wordsearch.py".
Python must be downloaded on your computer to run.
Attribution:
Zed Shaw (Writer of Learn Python the Hard Way) taught me how to do it.
Also the user xperroni on stackoverflow for Parser.py'''
### IF YOU FIND ANY ERRORS OR BUGS, PLEASE REPORT THEM TO JOE ###

#Imports necessary modules
import Parser
from urllib import urlopen
import sys

def clearscreen(numlines=100):
    import os
    if os.name == "posix":
        # Unix/Linux/MacOS/BSD/etc
        os.system('clear')
    elif os.name in ("nt", "dos", "ce"):
        # DOS/Windows
        os.system('CLS')
    else:
            # Fallback for other operating systems.
        print '\n' * numlines

#Search is now a function.
def wordsearch():
    '''Searches for the word.'''

    #sets up variable for words to go in. Also sets up term and page count.
    WORDS = []
    PAGENUM = 0

    #asks you what to look for, places them in search.
    print "What term to look for?"
    SEARCH = raw_input("> ")
    print "How many pages back?" #Might be removed later, or just do it yourself.
    PAGES = raw_input("> ") #Please do not set pages too high.

    #puts all the pages up in variables.
    URLS = []
    for pagenum in range(1, (int(PAGES) + 1)):
        #takes all the pages you want to search, and puts them in a list
        EXTRACT_URL = "http://funnyproelites.com/index.php?action=full_shoutbox&page=%d" % pagenum #"index.php_%d.html" % pagenum #
        URLS.append(EXTRACT_URL)

    #extracts shout data from pages
        UNPARSED = []
    for url in URLS:
        URL_OPENED = urlopen(url) #stores the page data in URL_OPENED
        LINES = URL_OPENED.readlines()
        URL_OPENED.close()
        PAGES = URLS.index(url) + 1
        clearscreen()
        print "Loading page:", PAGES
        UNPARSED.append( "Page %d" % (PAGES)) # adds a note for the page number
        for word in LINES: #begins a loop and performs it on each line in URL_OPENED
            #MAKE THIS A VARIABLE:
            if "<tr id=\'" in word: #Makes sure you're getting just the shouts
                UNPARSED.append(word) #adds the data to UNPARSED
            else:
                pass #AKA do nothing

        PAGES +=1
    #OPERABLE!

    SHOUTS = [] #create new empty set
    for arg in UNPARSED:
        if "<tr id=\'" in arg: #make sure it only tries this on actual shout data
            unbroken = arg.split("</tr>") #breaks up every term
            for arg in unbroken: #adds them together again to make the shouts.
                shout = "".join([arg, "</tr>"])
                SHOUTS.append(shout)
        elif "Page" in arg: #exception for page numbers
            SHOUTS.append(arg)
        else: #do nothing
            pass

    THREEPARTSHOUTS = []
    SHOUTNUM = 0
    WORDS_PARSED = []
    for x in SHOUTS:
        WORDS_PARSED.append(Parser.dehtml(x))
    for x in WORDS_PARSED:
        if " - " in x:
            startdate = x.index(" - ")
            date = x[startdate:(startdate + 18)]
            di = x.index(date)
            username = x[0:di]
            shout = x[(di + 18):-1] + x[-1]
            THREEPARTSHOUTS.append([username, date, shout])
        else:
            THREEPARTSHOUTS.append(x)


    TERMCOUNT = 0
    for shout in THREEPARTSHOUTS: #Takes this all on
        if isinstance(shout, str):
            WORDS.append(shout)
        elif SEARCH in shout[2]:
                TERMCOUNT += 1
                WORDS.append(shout) #adds shout to WORDS.
        else:
            pass


    #Returns words, with another one of those fancy "message" things.
    print "You searched:", SEARCH
    print "Pages searched:", PAGES
    print "Words matching search term:", TERMCOUNT
    print "Words:"
    YESTERMS = False
    for word in WORDS:
        if isinstance(word, list):
            print word[0], word[1], word[2]
            YESTERMS = True
        elif word != "" and WORDS[(WORDS.index(word) + 1)] != "":
            print word
        else:
            pass
    print "What would you like to do?\n1: try another search\n2: exit"
    NEXT = int(raw_input("> ")) #converts to integer
    if NEXT == 1:
        wordsearch()
    if NEXT == 2:
        exit()
    else:
        print "INVALID. EXITING"
        exit()
wordsearch()