Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! python3
- # -*- coding: utf-8 -*-
- import requests, re, os
- from hashlib import sha1
- from datetime import datetime, timedelta
- login = "Hellerick"
- password = "не скажу"
- user=1366
- LF_folder = {
- r'C:\Users\OTK':r'd:\HCF\Data\LF',
- r'/home/hellerick':r'/home/hellerick/Documents/LF',
- }[os.environ['HOME']]
- def main():
- downloadallmessages()
- def downloadallmessages():
- code = getpage(0)
- username = re.search(r'<meta name="description" content="Show Posts - ([^"]+)" />',code).group(1)
- print('Username:', username)
- userfolder = os.path.join(LF_folder, username)
- if not os.path.exists(userfolder):
- os.makedirs(userfolder)
- pageq = re.search(r'\.\.\. </span><a class="navPages"[^>]+>(\d+)',code).group(1)
- print('Number of pages:', pageq)
- for pagen in range(int(pageq)):
- code = getpage(pagen)
- print('Page Nr', pagen)
- m = getmessages(code)
- for i in m:
- msgid = re.search(r'\AMSG:(\d+)\n',i).group(1)
- print('MSGID:', msgid)
- date = re.search(r'\nDATE:([0-9-]+)',i).group(1)
- datefolder = os.path.join(userfolder, date)
- if not os.path.exists(datefolder):
- os.makedirs(datefolder)
- with open(os.path.join(datefolder, msgid+'.txt'), mode='wt', encoding='utf-8') as f:
- f.write(i)
- def getmessages(code):
- code = re.sub(r'\n','#LBR#',code)
- code = re.sub(r'\A.*?<div class="topic">','',code)
- code = re.sub(r'<div class="pagesection" style="margin-bottom: 0;">.*\Z','',code)
- code = re.sub(
- r'<strong>Today</strong> at',
- datetime.strftime(datetime.now()+timedelta(hours=-0),"%Y-%m-%d")+',',
- code)
- code = re.sub(
- r'<strong>Yesterday</strong> at',
- datetime.strftime(datetime.now()+timedelta(hours=-24),"%Y-%m-%d")+',',
- code)
- m = re.split('<div class="topic">', code)
- m = [re.sub('\A.*?#msg(\d+)">([^<]+)</a>.+?</strong> ([^<]+) .+<div class="list_posts">#LBR#\t*(.+)#LBR#\t\t\t\t\t</div>.*\Z',r'MSG:\1\nSUBJ:\2\nDATE:\3\nBODY:\4',i) for i in m]
- return m
- def getpage(n):
- session = authorize(login, password)
- p = session.get('http://lingvoforum.net/index.php?action=profile;u={};area=showposts;start={}'.format(user,n*25)).text
- return p
- def authorize(login, password):
- session = requests.Session()
- index = session.get("http://lingvoforum.net/")
- session_id = re.search("hashLoginPassword\(this, '(.+?)'\)", index.text).group(1)
- hash_password = hashLoginPassword(login, password, session_id)
- login_response = session.post("http://lingvoforum.net/index.php?action=login2", data = {
- "user": login, "passwrd": "", "coockielength": -1, "hash_passwrd": hash_password
- })
- return session
- def hashLoginPassword(login, password, session_id):
- return sha1(sha1(php_to8bit(login).lower() + php_to8bit(password)).hexdigest().encode("ascii")
- + session_id.encode("ascii")).hexdigest()
- def php_to8bit(string):
- res = b""
- byte = lambda i: bytes((i,))
- for n in (ord(c) for c in string):
- if n < 128:
- res += byte(n)
- elif n < 2048:
- res += byte(192 | n >> 6) + byte(128 | n & 63)
- elif n < 65536:
- res += byte(224 | n >> 12) + byte(128 | n >> 6 & 63) + byte(128 | n & 63)
- else:
- res += byte(240 | n >> 18) + byte(128 | n >> 12 & 63) + byte(128 | n >> 6 & 63) + byte(128 | n & 63)
- return res
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment