Advertisement
drunkenduck

Untitled

May 17th, 2019
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.19 KB | None | 0 0
  1. import requests
  2. import re
  3. from tqdm import tqdm
  4. from bs4 import BeautifulSoup, element
  5. from collections import Counter
  6.  
  7.  
  8. def decoded_poster(poster):
  9.     """ Decode emails (cloudfare protection) """
  10.     poster = poster.contents[0]
  11.     if isinstance(poster, element.Tag):
  12.         poster = poster['data-cfemail']
  13.         decoded = ""
  14.         k = int(poster[:2], 16)
  15.         for ii in range(2, len(poster)-1, 2):
  16.             decoded += chr(int(poster[ii:ii+2], 16)^k)
  17.         poster = decoded
  18.     return str(poster)
  19.  
  20.  
  21. all_killers = Counter()
  22. status_icon = re.compile('thread_statusicon')
  23. to_keep = re.compile('^((?!lock|moved).)*$')
  24. for i in tqdm(range(2, 1379)):
  25.     page = requests.get('https://netgamers.it/forumdisplay.php?f=14&order=desc&page=' + str(i))
  26.     threads = BeautifulSoup(page.text, 'html.parser').find(id='threadslist')
  27.     threads = ''.join([str(img.parent.parent) for img in threads.find_all('img', id=status_icon, src=to_keep)])
  28.     threads = BeautifulSoup(threads, 'html.parser')
  29.     page_killers = [decoded_poster(pb) for pb in threads.find_all('a', rel='nofollow', href=re.compile('member\.php'))]
  30.     all_killers.update(page_killers[3:])
  31. print(all_killers.most_common(10))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement