Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request
- import traceback
- import re
- import time
- from bs4 import BeautifulSoup
- def isCurrentYear(dateStr):
- return re.search("(1[2-9]mon)|(20(0|1)[^7])", dateStr) is None
- def writePostCount(postCount, filename):
- input_filename=open(filename, "w")
- for (k,v) in postCount.items():
- input_filename.write(k+":"+str(v)+"\n")
- input_filename.close()
- return filename
- postCount = {}
- forums = ["590", "29", "642", "665", "44", "538"]
- for forum in forums:
- forum_pageNum = 1
- forum_isCurrentYear = True
- while forum_isCurrentYear:
- forum_urlstr = "https://epicmafia.com/forum/"+forum+"?page="+str(forum_pageNum)
- print("Scanning "+forum_urlstr+"...")
- while True:
- try:
- with urllib.request.urlopen(forum_urlstr) as forum_url:
- forum_page = forum_url.read()
- except urllib.error.HTTPError:
- continue
- break
- forum_soup = BeautifulSoup(forum_page, "html.parser")
- forum_soup.prettify("latin-1")
- threads = forum_soup.find_all(name="a", class_="topic-title")
- for thread in threads:
- lastpost = thread.parent.parent.parent.find(class_="lastpost").find(class_="sg").text
- #print(lastpost)
- if not isCurrentYear(lastpost):
- forum_isCurrentYear = False
- break
- #continue
- thread_pageNum = -1
- while True:
- thread_urlstr = "https://epicmafia.com"+thread.get("href")+("" if thread_pageNum==-1 else ("?page="+str(thread_pageNum)))
- while True:
- try:
- with urllib.request.urlopen(thread_urlstr) as thread_url:
- thread_page = thread_url.read()
- except urllib.error.HTTPError:
- continue
- break
- thread_soup = BeautifulSoup(thread_page, "html.parser")
- thread_soup.prettify("latin-1")
- posts = thread_soup.find_all(class_="postuser")
- print("Scanning "+thread_urlstr+"...")
- for post in posts:
- userlink = post.find(name="a", class_="tt")
- created_at = post.find(class_="created_at").text
- #print(created_at)
- if not isCurrentYear(created_at):
- continue
- if userlink is None: #deleted user
- continue
- username = userlink.text
- if username not in postCount:
- postCount[username] = 0
- postCount[username] += 1
- #print(username)
- if thread_soup.find(class_="pagenav") is None:
- break
- thread_pageNum = int(thread_soup.find(class_="pagenav").find(class_="selected").contents[0].text)
- if thread_pageNum==1:
- break
- thread_pageNum -= 1
- forum_pageNum += 1
- writePostCount(postCount, "test.txt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement