Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import logging
- import concurrent.futures
- import urllib.request
- from bs4 import BeautifulSoup
- from tqdm import tqdm
- userid=576203327 #id vk
- docid=574850052 #start doc number
- num_page=1000 #number scan
- URLS = [f"https://vk.com/doc{userid}_{doc_number}" for doc_number in range(docid,docid+num_page)]
- logging.basicConfig(filename='app.log', filemode='w',
- format='%(message)s', level=logging.INFO)
- def load_url(url, timeout):
- with urllib.request.urlopen(url, timeout=timeout) as conn:
- url= ''
- bs = BeautifulSoup(conn, 'html.parser')
- images = bs.find_all('img', class_="can_zoom")
- if images:
- url=images[0].get('src')
- logging.info(url)
- return url
- start=time.time()
- with tqdm(total=num_page) as pbar:
- with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
- future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
- for future in concurrent.futures.as_completed(future_to_url):
- pbar.update(1)
- url = future_to_url[future]
- try:
- data = future.result()
- except Exception as exc:
- print('%r generated an exception: %s' % (url, exc))
- else:
- if data!='':
- print(f'{data}')
- end=time.time()
- print(end-start)
- input()
Advertisement
Add Comment
Please, Sign In to add comment