View difference between Paste ID: HA6SZDCB and vXVc0Ad4
SHOW: | | - or go back to the newest paste.
1
from bs4 import BeautifulSoup
2
import requests
3
from requests import session
4
5
login_page = "http://forums.somethingawful.com/account.php?action=loginform#form"
6
url = "http://forums.somethingawful.com/query.php?action=posthistory&userid=35304&page="
7
USERNAME = ''
8
PASSWORD = ''
9-
payload = {
9+
payload = { #stores our log in info to authenticate with the forums
10-
    'action': 'login',
10+
    'action': 'login', 
11
    'username': USERNAME,
12
    'password': PASSWORD
13
}
14-
"""with session() as c:
14+
15-
	for x in range(1,101):
15+
def checkDiv(string): #this is what im trying to use to filter out empty posts and posts that contain quotes
16-
    	c.post(login_page, data=payload)
16+
	toriPost = string
17-
    	response = c.get(url+x)
17+
	if string is False:
18-
    	soup = BeautifulSoup(response.text, 'html.parser')
18+
		toriPost = "its u"
19-
    	mydivs = soup.findAll("div", {"class" : "blurb"})
19+
	elif "[quote=" is in string:
20-
    	my_file = open("tori.txt", "a")
20+
		toriPost = "its u"
21-
    	for div in mydivs:
21+
	else:
22-
    		my_file.write(div.string + "\n")
22+
		toriPost = string
23-
   		my_file.close() """
23+
	return toriPost
24-
with session() as c:
24+
		
25-
	c.post(login_page, data = payload)
25+
with session() as c: #so i think what this does is create a session and closes that session automatically?
26-
	for x in range(1,101):
26+
	c.post(login_page, data = payload) 
27-
		response = c.get(url + str(x))
27+
	for x in range(1,101): #generating the page number of post history. 1-100 is what is available 
28-
		soup = BeautifulSoup(response.text, 'html.parser')
28+
		response = c.get(url + str(x)) #grabs the html? of the page + post number generated
29
		soup = BeautifulSoup(response.text, 'html.parser') #parse with BS4
30
		mydivs = soup.findAll("div", {"class" : "blurb"})
31-
		my_file.write("\n" + "\n" + "\n" + "\n" + "Page number: " + str(x) + "\n" + "\n" + "\n" + "\n")
31+
32
		for div in mydivs:
33-
			if "[quote=" not in div.string:
33+
			my_file.write(checkDiv(div.string) + "\n")
34-
				my_file.write(div.string + "\n")
34+