Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- from fake_useragent import UserAgent
- import ssl
- import OpenSSL
- import asyncio
- import aiohttp
- from aiosocksy import Socks5Auth
- from aiosocksy.connector import ProxyConnector, ProxyClientRequest
- proxies = []
- headers = {}
- cities = ['city1', 'city2', 'city3']
- categories = ['cat1', 'cat2', 'cat3']
- targets = []
- #Подставляем заголовки
- def get_headers():
- global headers
- headers = {'User-Agent': str(ua.chrome)}
- return headers
- #Собираем список прокси
- def get_proxy():
- global proxies
- r = requests.get('https://api.proxyscrape.com/?request=getproxies&proxytype=socks5&timeout=10000&country=all&uptime=0')
- rows = r.text.split(u'\r\n')
- for row in rows:
- #print(row)
- if not row in proxies:
- proxies.append(row)
- print('Количество прокси:', len(proxies))
- #Выбираем прокси из списка
- def set_proxy():
- global proxy, proxies
- if len(proxies) < 5:
- get_proxy()
- print('ЗАПРОШЕНЫ НОВЫЕ ПРОКСИ', len(proxies))
- proxy = proxies[0]
- try:
- r = requests.get('https://ya.ru', headers=get_headers(), proxies={'https': 'socks5h://' + proxy, 'http': 'socks5h://' + proxy}, timeout=3)
- soup = BeautifulSoup(r.content, 'html.parser')
- title = soup.find('title')
- if '— Яндекс' in title.text:
- return proxy
- else:
- proxies.remove(proxy)
- set_proxy()
- except Exception as e:
- proxies.remove(proxy)
- set_proxy()
- get_proxy()
- ua = UserAgent()
- async def fetch(client, city):
- for category in categories:
- async with client.get('category', headers=get_headers(), proxy='socks5://'+str(set_proxy())) as a:
- soup = await a.read()
- soup = BeautifulSoup(soup, 'html.parser')
- new_links = soup.find_all('a', class_='mylink')
- for new_link in new_links:
- async with client.get(new_link, headers=get_headers(), proxy='socks5://'+str(set_proxy())) as b:
- soup = await b.read()
- soup = BeautifulSoup(soup, 'html.parser')
- new_new_links = soup.find_all('a', class_='mylink2')
- for new_new_link in new_new_links:
- async with client.get(new_link, headers=get_headers(), proxy='socks5://'+str(set_proxy())) as c:
- soup = await c.read()
- soup = BeautifulSoup(soup, 'html.parser')
- info = soup.find('span', class_='myinfo')
- targets.append(info)
- async def main():
- """
- Создает группу сопрограмм и ожидает их завершения
- """
- # создаем экземпляр клиента
- connector = ProxyConnector()
- async with aiohttp.ClientSession(connector=connector, request_class=ProxyClientRequest) as client:
- # создаем корутины
- coroutines = [fetch(client, city) for city in cities]
- # ожидаем выполнения всех корутин
- completed, pending = await asyncio.wait(coroutines)
- # итерация по завершенным результатам
- for item in completed:
- print('Город готов')
- if __name__ == '__main__':
- # получаем экзепляр цикла событий
- event_loop = asyncio.get_event_loop()
- try:
- # запуск цикла обработки событий
- event_loop.run_until_complete(main(cities))
- finally:
- # обязательно закрываем
- event_loop.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement