Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- from requests import session
- login_page = "http://forums.somethingawful.com/account.php?action=loginform#form"
- url = "http://forums.somethingawful.com/query.php?action=posthistory&userid=35304&page="
- USERNAME = ''
- PASSWORD = ''
- payload = { #stores our log in info to authenticate with the forums
- 'action': 'login',
- 'username': USERNAME,
- 'password': PASSWORD
- }
- def checkDiv(string): #this is what im trying to use to filter out empty posts and posts that contain quotes
- toriPost = string
- if string is False:
- toriPost = "its u"
- elif "[quote=" is in string:
- toriPost = "its u"
- else:
- toriPost = string
- return toriPost
- with session() as c: #so i think what this does is create a session and closes that session automatically?
- c.post(login_page, data = payload)
- for x in range(1,101): #generating the page number of post history. 1-100 is what is available
- response = c.get(url + str(x)) #grabs the html? of the page + post number generated
- soup = BeautifulSoup(response.text, 'html.parser') #parse with BS4
- mydivs = soup.findAll("div", {"class" : "blurb"})
- my_file = open("tori.txt", "a")
- for div in mydivs:
- my_file.write(checkDiv(div.string) + "\n")
- my_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement