Advertisement
Morgan_iv

Untitled

May 21st, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. imgs = []
  2. for a in tqdm(arts):
  3.     so = BeautifulSoup(a['text'], 'html5lib')
  4.     for t in so('img'):
  5.         imgs.append(t['src'])
  6.  
  7. imgs = list(map(lambda i: re.sub(r'^/pictures/enc_mathematics/(\d+-\d+\.jpg)$', r'\1', i), imgs))
  8. json.dump(imgs, open('imgs.json', 'w'), indent=4)
  9.  
  10. imgs = json.load(open('imgs.json', 'r'))
  11.  
  12. def dl_image(name):
  13.     pic_url = 'https://dic.academic.ru/pictures/enc_mathematics/{}'.format(name)
  14.     filename = './pics/{}'.format(name)
  15.     with open(filename, 'wb') as handle:
  16.         try:
  17.             response = requests.get(pic_url, stream=True)
  18.         except Exception:
  19.             print (name)
  20.             return name
  21.  
  22.         if not response.ok:
  23.             print (name)
  24.             return name
  25.  
  26.         for block in response.iter_content(1024):
  27.             if not block:
  28.                 print (name)
  29.                 return name
  30.  
  31.             handle.write(block)
  32.            
  33.         return 'ok'
  34.  
  35. with Pool(8) as p:
  36.     r = list(tqdm(p.imap(dl_image, imgs), total=len(imgs)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement