Advertisement
Nicba1010

JonnySP

Dec 28th, 2018
153
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.38 KB | None | 0 0
  1. import json
  2. import os
  3. import time
  4. from typing import List
  5.  
  6. import requests
  7. from bs4 import BeautifulSoup
  8. from requests import Session, Response
  9.  
  10.  
  11. class JonnySPScraper(object):
  12.     base_url: str = "http://jonnysp.bplaced.net/"
  13.     url: str = "{}data.php?draw=1&_=()&start=0&length=100000&columns%5B0%5D%5Bd%5D".format(
  14.         base_url, int(time.time() * 1000)
  15.     )
  16.  
  17.     def __init__(self, files_root: str):
  18.         self.ird_urls: List[str] = []
  19.         self.files_root: str = files_root
  20.         with Session() as s:
  21.             data = json.loads(s.get(self.url).content, encoding='UTF-8')
  22.             for ird in data['data']:
  23.                 soup: BeautifulSoup = BeautifulSoup(ird['filename'], 'lxml')
  24.                 ird_url: str = self.base_url + soup.find('a')['href']
  25.                 self.ird_urls.append(ird_url)
  26.  
  27.         for ird_url in self.ird_urls:
  28.             self.download_ird(ird_url)
  29.  
  30.     def download_ird(self, ird_url: str):
  31.         file_name: str = ird_url.split('/')[-1]
  32.         r: Response = requests.get(ird_url, stream=True)
  33.         with open(os.path.join(self.files_root, file_name), 'wb') as f:
  34.             for chunk in r.iter_content(chunk_size=512 * 1024 * 1024):
  35.                 if chunk:
  36.                     f.write(chunk)
  37.         print(ird_url + " has been downloaded!")
  38.  
  39.  
  40. if __name__ == '__main__':
  41.     JonnySPScraper(files_root="H:/IRD/")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement