Hellerick_Ferlibay

Creating Toman anthology

Mar 14th, 2016
356
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.53 KB | None | 0 0
  1. #! python3
  2. # -*- coding: utf-8 -*-
  3.  
  4. import requests, re, os
  5. from hashlib import sha1
  6. from datetime import datetime, timedelta
  7.  
  8. login = "Hellerick"
  9. password = "не скажу"
  10. user=1366
  11.  
  12. LF_folder = {
  13.     r'C:\Users\OTK':r'd:\HCF\Data\LF',
  14.     r'/home/hellerick':r'/home/hellerick/Documents/LF',
  15.     }[os.environ['HOME']]
  16.  
  17. def main():
  18.     downloadallmessages()
  19.  
  20. def downloadallmessages():
  21.     code = getpage(0)
  22.     username = re.search(r'<meta name="description" content="Show Posts - ([^"]+)" />',code).group(1)
  23.     print('Username:', username)
  24.     userfolder = os.path.join(LF_folder, username)
  25.     if not os.path.exists(userfolder):
  26.         os.makedirs(userfolder)
  27.     pageq = re.search(r'\.\.\. </span><a class="navPages"[^>]+>(\d+)',code).group(1)
  28.     print('Number of pages:', pageq)
  29.     for pagen in range(int(pageq)):
  30.         code = getpage(pagen)
  31.         print('Page Nr', pagen)
  32.         m = getmessages(code)
  33.         for i in m:
  34.             msgid = re.search(r'\AMSG:(\d+)\n',i).group(1)
  35.             print('MSGID:', msgid)
  36.             date = re.search(r'\nDATE:([0-9-]+)',i).group(1)
  37.             datefolder = os.path.join(userfolder, date)
  38.             if not os.path.exists(datefolder):
  39.                 os.makedirs(datefolder)
  40.             with open(os.path.join(datefolder, msgid+'.txt'), mode='wt', encoding='utf-8') as f:
  41.                 f.write(i)
  42.  
  43. def getmessages(code):
  44.     code = re.sub(r'\n','#LBR#',code)
  45.     code = re.sub(r'\A.*?<div class="topic">','',code)
  46.     code = re.sub(r'<div class="pagesection" style="margin-bottom: 0;">.*\Z','',code)
  47.     code = re.sub(
  48.         r'<strong>Today</strong> at',
  49.         datetime.strftime(datetime.now()+timedelta(hours=-0),"%Y-%m-%d")+',',
  50.         code)
  51.     code = re.sub(
  52.         r'<strong>Yesterday</strong> at',
  53.         datetime.strftime(datetime.now()+timedelta(hours=-24),"%Y-%m-%d")+',',
  54.         code)
  55.  
  56.     m = re.split('<div class="topic">', code)
  57.     m = [re.sub('\A.*?#msg(\d+)">([^<]+)</a>.+?</strong> ([^<]+)&nbsp;.+<div class="list_posts">#LBR#\t*(.+)#LBR#\t\t\t\t\t</div>.*\Z',r'MSG:\1\nSUBJ:\2\nDATE:\3\nBODY:\4',i) for i in m]
  58.     return m
  59.  
  60. def getpage(n):
  61.     session = authorize(login, password)
  62.     p = session.get('http://lingvoforum.net/index.php?action=profile;u={};area=showposts;start={}'.format(user,n*25)).text
  63.     return p
  64.  
  65. def authorize(login, password):
  66.     session = requests.Session()
  67.     index = session.get("http://lingvoforum.net/")
  68.     session_id = re.search("hashLoginPassword\(this, '(.+?)'\)", index.text).group(1)
  69.     hash_password = hashLoginPassword(login, password, session_id)
  70.     login_response = session.post("http://lingvoforum.net/index.php?action=login2", data = {
  71.         "user": login, "passwrd": "", "coockielength": -1, "hash_passwrd": hash_password
  72.     })
  73.     return session
  74.  
  75. def hashLoginPassword(login, password, session_id):
  76.     return sha1(sha1(php_to8bit(login).lower() + php_to8bit(password)).hexdigest().encode("ascii")
  77.                 + session_id.encode("ascii")).hexdigest()
  78.  
  79. def php_to8bit(string):
  80.     res = b""
  81.     byte = lambda i: bytes((i,))
  82.     for n in (ord(c) for c in string):
  83.         if n < 128:
  84.             res += byte(n)
  85.         elif n < 2048:
  86.             res += byte(192 | n >> 6) + byte(128 | n & 63)
  87.         elif n < 65536:
  88.             res += byte(224 | n >> 12) + byte(128 | n >> 6 & 63) + byte(128 | n & 63)
  89.         else:
  90.             res += byte(240 | n >> 18) + byte(128 | n >> 12 & 63) + byte(128 | n >> 6 & 63) + byte(128 | n & 63)
  91.     return res
  92.  
  93. if __name__ == "__main__":
  94.     main()
Advertisement
Add Comment
Please, Sign In to add comment