Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import os
- import time
- from typing import List
- import requests
- from bs4 import BeautifulSoup
- from requests import Session, Response
- class JonnySPScraper(object):
- base_url: str = "http://jonnysp.bplaced.net/"
- url: str = "{}data.php?draw=1&_=()&start=0&length=100000&columns%5B0%5D%5Bd%5D".format(
- base_url, int(time.time() * 1000)
- )
- def __init__(self, files_root: str):
- self.ird_urls: List[str] = []
- self.files_root: str = files_root
- with Session() as s:
- data = json.loads(s.get(self.url).content, encoding='UTF-8')
- for ird in data['data']:
- soup: BeautifulSoup = BeautifulSoup(ird['filename'], 'lxml')
- ird_url: str = self.base_url + soup.find('a')['href']
- self.ird_urls.append(ird_url)
- for ird_url in self.ird_urls:
- self.download_ird(ird_url)
- def download_ird(self, ird_url: str):
- file_name: str = ird_url.split('/')[-1]
- r: Response = requests.get(ird_url, stream=True)
- with open(os.path.join(self.files_root, file_name), 'wb') as f:
- for chunk in r.iter_content(chunk_size=512 * 1024 * 1024):
- if chunk:
- f.write(chunk)
- print(ird_url + " has been downloaded!")
- if __name__ == '__main__':
- JonnySPScraper(files_root="H:/IRD/")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement