Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Fri Jun 8 19:29:06 2018
- @author: Mauro
- """
- import requests
- import os
- import re
- import time
- import pickle
- import ssl
- last_message_id = 462254
- dump_folder = "./messages/"
- if not os.path.isdir(dump_folder):
- os.mkdir(dump_folder)
- req_counter = 0
- for i in range(last_message_id):
- mid = last_message_id - i
- print("scraping message id ", mid, "...")
- filename = "mid_" + str(mid) + ".pickle"
- if os.path.isfile(dump_folder + filename):
- continue
- r = None
- try:
- r = requests.get("https://t.me/Scienza/" + str(mid) + "?embed=1")
- req_counter += 1
- except ssl.SSLError as ssle:
- print(ssle)
- print("waiting...")
- time.sleep(10)
- print("restarting...")
- except Exception as e:
- print(e)
- raise e
- finally:
- print("requests till now", req_counter)
- if r is not None and r.status_code == 200:
- with open(dump_folder + filename, "wb") as f:
- pickle.dump(r, f)
- elif r is not None:
- print(r.status_code)
- print("requests till now", req_counter)
- raise Exception
- time.sleep(0.11)
Add Comment
Please, Sign In to add comment