Advertisement
Guest User

toribot

a guest
Apr 12th, 2016
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from bs4 import BeautifulSoup
  2. import requests
  3. from requests import session
  4.  
  5. login_page = "http://forums.somethingawful.com/account.php?action=loginform#form"
  6. url = "http://forums.somethingawful.com/query.php?action=posthistory&userid=35304&page="
  7. USERNAME = ''
  8. PASSWORD = ''
  9. payload = { #stores our log in info to authenticate with the forums
  10.     'action': 'login',
  11.     'username': USERNAME,
  12.     'password': PASSWORD
  13. }
  14.  
  15. def checkDiv(string): #this is what im trying to use to filter out empty posts and posts that contain quotes
  16.     toriPost = string
  17.     if string is False:
  18.         toriPost = "its u"
  19.     elif "[quote=" is in string:
  20.         toriPost = "its u"
  21.     else:
  22.         toriPost = string
  23.     return toriPost
  24.        
  25. with session() as c: #so i think what this does is create a session and closes that session automatically?
  26.     c.post(login_page, data = payload)
  27.     for x in range(1,101): #generating the page number of post history. 1-100 is what is available
  28.         response = c.get(url + str(x)) #grabs the html? of the page + post number generated
  29.         soup = BeautifulSoup(response.text, 'html.parser') #parse with BS4
  30.         mydivs = soup.findAll("div", {"class" : "blurb"})
  31.         my_file = open("tori.txt", "a")
  32.         for div in mydivs:
  33.             my_file.write(checkDiv(div.string) + "\n")
  34.         my_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement