Guest User

Untitled

a guest
Jul 15th, 2021
1,064
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.42 KB | None | 0 0
  1. import time
  2. import logging
  3. import concurrent.futures
  4. import urllib.request
  5. from bs4 import BeautifulSoup
  6. from tqdm import tqdm
  7.  
  8. userid=576203327 #id vk
  9. docid=574850052 #start doc number
  10. num_page=1000 #number scan
  11. URLS = [f"https://vk.com/doc{userid}_{doc_number}" for doc_number in range(docid,docid+num_page)]
  12. logging.basicConfig(filename='app.log', filemode='w',
  13.  format='%(message)s', level=logging.INFO)  
  14.  
  15. def load_url(url, timeout):
  16.     with urllib.request.urlopen(url, timeout=timeout) as conn:
  17.         url= ''
  18.         bs = BeautifulSoup(conn, 'html.parser')
  19.         images = bs.find_all('img', class_="can_zoom")
  20.         if images:
  21.             url=images[0].get('src')
  22.             logging.info(url)
  23.         return url
  24.  
  25.  
  26. start=time.time()
  27. with tqdm(total=num_page) as pbar:
  28.     with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
  29.             future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
  30.             for future in concurrent.futures.as_completed(future_to_url):
  31.                 pbar.update(1)
  32.                 url = future_to_url[future]
  33.                 try:
  34.                     data = future.result()
  35.                 except Exception as exc:
  36.                     print('%r generated an exception: %s' % (url, exc))
  37.                 else:
  38.                     if data!='':
  39.                         print(f'{data}')
  40. end=time.time()
  41. print(end-start)
  42. input()
  43.  
Advertisement
Add Comment
Please, Sign In to add comment