Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import re
- from tqdm import tqdm
- from bs4 import BeautifulSoup, element
- from collections import Counter
- def decoded_poster(poster):
- """ Decode emails (cloudfare protection) """
- poster = poster.contents[0]
- if isinstance(poster, element.Tag):
- poster = poster['data-cfemail']
- decoded = ""
- k = int(poster[:2], 16)
- for ii in range(2, len(poster)-1, 2):
- decoded += chr(int(poster[ii:ii+2], 16)^k)
- poster = decoded
- return str(poster)
- all_killers = Counter()
- status_icon = re.compile('thread_statusicon')
- to_keep = re.compile('^((?!lock|moved).)*$')
- for i in tqdm(range(2, 1379)):
- page = requests.get('https://netgamers.it/forumdisplay.php?f=14&order=desc&page=' + str(i))
- threads = BeautifulSoup(page.text, 'html.parser').find(id='threadslist')
- threads = ''.join([str(img.parent.parent) for img in threads.find_all('img', id=status_icon, src=to_keep)])
- threads = BeautifulSoup(threads, 'html.parser')
- page_killers = [decoded_poster(pb) for pb in threads.find_all('a', rel='nofollow', href=re.compile('member\.php'))]
- all_killers.update(page_killers[3:])
- print(all_killers.most_common(10))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement