View difference between Paste ID: <a href="/WmMR6HcC">WmMR6HcC</a> and <a href="/mWgxd2g1">mWgxd2g1</a>

'''HTML searching for Funny Pro Elites shoutbox
1		'''HTML searching for Funny Pro Elites shoutbox
2		Author: Joe McBobski
3		Should be in same folder as Parser.py
4		Call from any shell with support for python with "cd (foldername)", and then "python wordsearch.py".
5		Python must be downloaded on your computer to run.
6		Attribution:
7		Zed Shaw (Writer of Learn Python the Hard Way) taught me how to do it.
8		Also the user xperroni on stackoverflow for Parser.py'''
9		### IF YOU FIND ANY ERRORS OR BUGS, PLEASE REPORT THEM TO JOE ###
10
11		#Imports necessary modules
12		import Parser
13		from urllib import urlopen
14		import sys
15
16		def clearscreen(numlines=100):
17		import os
18		if os.name == "posix":
19		# Unix/Linux/MacOS/BSD/etc
20		os.system('clear')
21		elif os.name in ("nt", "dos", "ce"):
22	-	TERMCOUNT = 0
22	+	# DOS/Windows
23		os.system('CLS')
24	-	WORDS_PARSED = []
24	+
25	-	URLS = []
25	+	# Fallback for other operating systems.
26	-	URLS_TEXT = []
26	+	print '\n' * numlines
27	-	UNPARSED = []
27	+
28		#Search is now a function.
29		def wordsearch():
30		'''Searches for the word.'''
31
32		#sets up variable for words to go in. Also sets up term and page count.
33		WORDS = []
34		PAGENUM = 0
35
36		#asks you what to look for, places them in search.
37		print "What term to look for?"
38		SEARCH = raw_input("> ")
39		print "How many pages back?" #Might be removed later, or just do it yourself.
40		PAGES = raw_input("> ") #Please do not set pages too high.
41
42	-	PAGES = 0
42	+
43		URLS = []
44		for pagenum in range(1, (int(PAGES) + 1)):
45	-	print PAGES
45	+
46	-	print
46	+
47	-	UNPARSED.append( "Page %d" % (URLS.index(url) + 1)) # adds a note for the page number
47	+
48	-	for word in URL_OPENED.readlines(): #begins a loop and performs it on each line in URL_OPENED
48	+
49		#extracts shout data from pages
50		UNPARSED = []
51		for url in URLS:
52		URL_OPENED = urlopen(url) #stores the page data in URL_OPENED
53		LINES = URL_OPENED.readlines()
54		URL_OPENED.close()
55		PAGES = URLS.index(url) + 1
56		clearscreen()
57		print "Loading page:", PAGES
58		UNPARSED.append( "Page %d" % (PAGES)) # adds a note for the page number
59		for word in LINES: #begins a loop and performs it on each line in URL_OPENED
60		#MAKE THIS A VARIABLE:
61		if "<tr id=\'" in word: #Makes sure you're getting just the shouts
62		UNPARSED.append(word) #adds the data to UNPARSED
63		else:
64		pass #AKA do nothing
65
66		PAGES +=1
67		#OPERABLE!
68
69		SHOUTS = [] #create new empty set
70		for arg in UNPARSED:
71		if "<tr id=\'" in arg: #make sure it only tries this on actual shout data
72		unbroken = arg.split("</tr>") #breaks up every term
73		for arg in unbroken: #adds them together again to make the shouts.
74		shout = "".join([arg, "</tr>"])
75		SHOUTS.append(shout)
76		elif "Page" in arg: #exception for page numbers
77		SHOUTS.append(arg)
78		else: #do nothing
79		pass
80
81		THREEPARTSHOUTS = []
82		SHOUTNUM = 0
83		WORDS_PARSED = []
84		for x in SHOUTS:
85		WORDS_PARSED.append(Parser.dehtml(x))
86		for x in WORDS_PARSED:
87		if " - " in x:
88		startdate = x.index(" - ")
89		date = x[startdate:(startdate + 18)]
90		di = x.index(date)
91		username = x[0:di]
92		shout = x[(di + 18):-1] + x[-1]
93		THREEPARTSHOUTS.append([username, date, shout])
94		else:
95		THREEPARTSHOUTS.append(x)
96	-	YESTERMS = FALSE
96	+
97
98	-	if isinstance(word, str) and word != "\n" and (YESTERMS or word == "Page 1"):
98	+	TERMCOUNT = 0
99		for shout in THREEPARTSHOUTS: #Takes this all on
100	-	NEWPAGE = True
100	+
101		WORDS.append(shout)
102	-	else:
102	+
103		TERMCOUNT += 1
104		WORDS.append(shout) #adds shout to WORDS.
105		else:
106		pass
107
108
109		#Returns words, with another one of those fancy "message" things.
110		print "You searched:", SEARCH
111		print "Pages searched:", PAGES
112		print "Words matching search term:", TERMCOUNT
113		print "Words:"
114		YESTERMS = False
115		for word in WORDS:
116		if isinstance(word, list):
117		print word[0], word[1], word[2]
118		YESTERMS = True
119		elif word != "" and WORDS[(WORDS.index(word) + 1)] != "":
120		print word
121		else:
122		pass
123		print "What would you like to do?\n1: try another search\n2: exit"
124		NEXT = int(raw_input("> ")) #converts to integer
125		if NEXT == 1:
126		wordsearch()
127		if NEXT == 2:
128		exit()
129		else:
130		print "INVALID. EXITING"
131		exit()
132		wordsearch()