View difference between Paste ID: <a href="/h0hL2qLz">h0hL2qLz</a> and <a href="/WmMR6HcC">WmMR6HcC</a>

'''HTML searching for Funny Pro Elites shoutbox
1		'''HTML searching for Funny Pro Elites shoutbox
2		Author: Joe McBobski
3		Should be in same folder as Parser.py
4		Call from any shell with support for python with "cd (foldername)", and then "python wordsearch.py".
5		Python must be downloaded on your computer to run.
6		Attribution:
7		Zed Shaw (Writer of Learn Python the Hard Way) taught me how to do it.
8		Also the user xperroni on stackoverflow for Parser.py'''
9		### IF YOU FIND ANY ERRORS OR BUGS, PLEASE REPORT THEM TO JOE ###
10
11		#Imports necessary modules
12		import Parser
13		from urllib import urlopen
14		import sys
15
16		def clearscreen(numlines=100):
17		import os
18		if os.name == "posix":
19		# Unix/Linux/MacOS/BSD/etc
20		os.system('clear')
21		elif os.name in ("nt", "dos", "ce"):
22		# DOS/Windows
23		os.system('CLS')
24		else:
25		# Fallback for other operating systems.
26		print '\n' * numlines
27
28		#Search is now a function.
29		def wordsearch():
30		'''Searches for the word.'''
31
32		#sets up variable for words to go in. Also sets up term and page count.
33		WORDS = []
34		PAGENUM = 0
35
36		#asks you what to look for, places them in search.
37	-	print "What term to look for?"
37	+	SEARCH = raw_input("What term to look for?\n> ")
38	-	SEARCH = raw_input("> ")
38	+	START = raw_input("Start?\n> ")
39	-	print "How many pages back?" #Might be removed later, or just do it yourself.
39	+	PAGES = raw_input("How far back?\n> ") #Please do not set pages too high.
40	-	PAGES = raw_input("> ") #Please do not set pages too high.
40	+
41		#puts all the pages up in variables.
42		URLS = []
43		for pagenum in range(int(START), (int(PAGES) + 1)):
44	-	for pagenum in range(1, (int(PAGES) + 1)):
44	+
45		EXTRACT_URL = "http://funnyproelites.com/index.php?action=full_shoutbox&page=%d" % pagenum #"index.php_%d.html" % pagenum #
46		URLS.append(EXTRACT_URL)
47
48		#extracts shout data from pages
49		UNPARSED = []
50		for url in URLS:
51		URL_OPENED = urlopen(url) #stores the page data in URL_OPENED
52		LINES = URL_OPENED.readlines()
53		URL_OPENED.close()
54		PAGES = URLS.index(url) + 1
55		clearscreen()
56		print "Loading page:", PAGES
57		UNPARSED.append( "Page %d" % (PAGES)) # adds a note for the page number
58		for word in LINES: #begins a loop and performs it on each line in URL_OPENED
59		#MAKE THIS A VARIABLE:
60		if "<tr id=\'" in word: #Makes sure you're getting just the shouts
61		UNPARSED.append(word) #adds the data to UNPARSED
62		else:
63		pass #AKA do nothing
64
65		PAGES +=1
66		#OPERABLE!
67
68		SHOUTS = [] #create new empty set
69		for arg in UNPARSED:
70		if "<tr id=\'" in arg: #make sure it only tries this on actual shout data
71		unbroken = arg.split("</tr>") #breaks up every term
72		for arg in unbroken: #adds them together again to make the shouts.
73		shout = "".join([arg, "</tr>"])
74		SHOUTS.append(shout)
75		elif "Page" in arg: #exception for page numbers
76		SHOUTS.append(arg)
77		else: #do nothing
78		pass
79
80		THREEPARTSHOUTS = []
81		SHOUTNUM = 0
82		WORDS_PARSED = []
83		for x in SHOUTS:
84		WORDS_PARSED.append(Parser.dehtml(x))
85		for x in WORDS_PARSED:
86		if " - " in x:
87		startdate = x.index(" - ")
88		date = x[startdate:(startdate + 18)]
89		di = x.index(date)
90		username = x[0:di]
91		shout = x[(di + 18):-1] + x[-1]
92		THREEPARTSHOUTS.append([username, date, shout])
93		else:
94		THREEPARTSHOUTS.append(x)
95
96
97		TERMCOUNT = 0
98		for shout in THREEPARTSHOUTS: #Takes this all on
99		if isinstance(shout, str):
100		WORDS.append(shout)
101		elif SEARCH in shout[2]:
102		TERMCOUNT += 1
103		WORDS.append(shout) #adds shout to WORDS.
104		else:
105		pass
106
107
108		#Returns words, with another one of those fancy "message" things.
109		print "You searched:", SEARCH
110		print "Pages searched:", PAGES
111		print "Words matching search term:", TERMCOUNT
112		print "Words:"
113		YESTERMS = False
114		for word in WORDS:
115		if isinstance(word, list):
116		print word[0], word[1], word[2]
117		YESTERMS = True
118		elif word != "" and WORDS[(WORDS.index(word) + 1)] != "":
119		print word
120		else:
121		pass
122		print "What would you like to do?\n1: try another search\n2: exit"
123		NEXT = int(raw_input("> ")) #converts to integer
124		if NEXT == 1:
125		wordsearch()
126		if NEXT == 2:
127		exit()
128		else:
129		print "INVALID. EXITING"
130		exit()
131		wordsearch()