Advertisement
Guest User

Untitled

a guest
May 16th, 2021
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.78 KB | None | 0 0
  1. import bs4
  2. import mechanize
  3. import re
  4. import shelve
  5.  
  6. if __name__ == '__main__':
  7.  
  8.     agent = [('User-agent', 'Mozilla/5.0 (X11;U;Linux 2.4.2.-2 i586; en-us;m18) Gecko/200010131 Netscape6/6.01')]
  9.     browser = mechanize.Browser()
  10.     browser.set_handle_robots(False)
  11.     browser.addheaders=agent
  12.     browser.open('https://forums.somethingawful.com/account.php?action=loginform')
  13.     browser.form = browser.forms()[0]
  14.     browser['username'] = ''
  15.     browser['password'] = ''
  16.     response = browser.submit()
  17.     response = browser.open('https://forums.somethingawful.com/showthread.php?threadid=3904417&userid=171559&perpage=40&pagenumber=1')
  18.     html_text = str(response.read())
  19.     page_count_element = re.compile('class="pages top".*?div>')
  20.     text = page_count_element.findall(html_text)
  21.     page_count = re.compile('option value="([0-9]+)"')
  22.     number = page_count.finditer(str(text))
  23.     pages = []
  24.     for i in number:
  25.         pages.append(int(i.group(1)))
  26.     pages.sort()
  27.     last_page = pages.pop()
  28.     #print(pages)
  29.     last_page_url = 'https://forums.somethingawful.com/showthread.php?threadid=3904417&userid=171559&perpage=40&pagenumber=%d' %(last_page)
  30.     #print(last_page_url)
  31.     response = browser.open(last_page_url)
  32.     html_text = response.read()
  33.     page = bs4.BeautifulSoup(html_text, 'html.parser')
  34.     posts = page.find_all(class_="post")
  35.     with shelve.open('xlol') as lol_store:
  36.         for i in posts:
  37.             pid = str(i["id"])
  38.             if pid in lol_store.keys():
  39.                 if str(i) != lol_store[pid][len(lol_store[pid])-1]:
  40.                     lol_store[pid].append(str(i))
  41.             else:
  42.                 lol_store[pid] = [str(i)]
  43.  #   with shelve.open('xlol') as lol_store:
  44.  #       for i in lol_store.keys():
  45.  #           print(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement