Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- from multiprocessing import Pool
- import re
- import time
- import random
- import math
- from fake_useragent import UserAgent
- proxies = {"http" : "socks5://127.0.0.1:9050", "https" : "socks5://127.0.0.1:9050", "socks5" : "socks5://127.0.0.1:9050"}
- regions_list = ['https://synapsenet.ru/search/categoryinregion/bryanskaya-obl', 'https://synapsenet.ru/search/categoryinregion/ivanovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kurskaya-obl', 'https://synapsenet.ru/search/categoryinregion/orlovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tambovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/yaroslavskaya-obl', 'https://synapsenet.ru/search/categoryinregion/vladimirskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kaluzhskaya-obl', 'https://synapsenet.ru/search/categoryinregion/lipeckaya-obl', 'https://synapsenet.ru/search/categoryinregion/ryazanskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tverskaya-obl', 'https://synapsenet.ru/search/categoryinregion/belgorodskaya-obl', 'https://synapsenet.ru/search/categoryinregion/voronezhskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kostromskaya-obl', 'https://synapsenet.ru/search/categoryinregion/smolenskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tulskaya-obl']
- categories_list = [['/bitovaya-elektrotehnika', '_label_0'], ['/category/zhkh', '_label_1'], ['/klining-utilizaciya-i-himchistka', '_label_2'], ['kompyuteri-mebel-i-kanctovari', '_label_3'], ['/les-i-pilomaterili', '_label_4'], ['/materiali-sire-i-polufabrikati', '_label_5'], ['/medicina-i-farmakologiya', '_label_6'], ['/metall', '_label_7'], ['/nedvizhimost', '_label_8'], ['/neft-i-gaz', '_label_9'], ['/obuchenie-i-obrazovanie', '_label_10'], ['/odezhda-i-hoztovari', '_label_11'], ['/ohrana-bezopasnost-i-signalizaciya', '_label_12'], ['/perevozka-transportnie-uslugi', '_label_13'], ['/produkti', '_label_14'], ['/proektirovanie-i-inzhenernie-uslugi', '_label_15'], ['/promishlennoe-oborudovanie', '_label_16'], ['/reklama', '_label_17'], ['/svyaz-i-kommunikacii', '_label_18'], ['/selskoe-hozyajstvo', '_label_19'], ['/strahovanie-i-yuridicheskie-uslugi', '_label_20'], ['/stroitelnie-materiali-i-oborudovanie', '_label_21'], ['/stroitelstvo-i-remont', '_label_22'], ['/transport-i-spectehnika', '_label_23'], ['/uslugi-dlya-naseleniya', '_label_24'], ['/himiya', '_label_25'], ['/energetika', '_label_26']]
- def bot(regions_list):
- for a in range(len(categories_list)):
- get_request_to_get_tenders_count = requests.Session()
- try:
- get_request_to_get_tenders_count_html = get_request_to_get_tenders_count.get(regions_list + categories_list[a][0], headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
- except Exception:
- print("Cоединение оборвалось, попытка его востановить")
- while True:
- try:
- get_request_to_get_tenders_count_html = get_request_to_get_tenders_count.get(regions_list + categories_list[a][0], headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
- break
- except Exception as exc:
- print(exc)
- print("Попытка востновить соединение")
- get_pages_count_item = BeautifulSoup(get_request_to_get_tenders_count_html.text, "html.parser")
- pages_count = int(re.sub(r'[^x00-x7f]', '', str(get_pages_count_item.select("#searchV2-tenders-count")[0].getText()))) / 20
- for b in range(1, int(math.ceil(pages_count))): # перебор категории тендеров
- headers_data = {'User-Agent': str(UserAgent().random)}
- reqouest_to_get_tenders_descriptions_list = requests.Session()
- try:
- reqouest_to_get_tenders_descriptions_list_html = reqouest_to_get_tenders_descriptions_list.get(regions_list + categories_list[a][0] + "?page=" + str(b), headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
- print(reqouest_to_get_tenders_descriptions_list_html.status_code)
- except Exception:
- print("Cоединение оборвалось, попытка его востановить")
- while True:
- try:
- reqouest_to_get_tenders_descriptions_list_html = reqouest_to_get_tenders_descriptions_list.get(regions_list + categories_list[a][0] + "?page=" + str(b), headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
- print(reqouest_to_get_tenders_descriptions_list_html.status_code)
- break
- except Exception as exc:
- print(exc)
- print("Попытка востновить соединение")
- tenders_descriptions_list = BeautifulSoup(reqouest_to_get_tenders_descriptions_list_html.text, "html.parser")
- if tenders_descriptions_list.find("div", class_="searchV2-pressed-text") != None:
- print("тендеры в данной категории закончились")
- break
- elif tenders_descriptions_list.find("div", class_="tender-money") == None: # проверка на существование тендеров на данной странице
- print("при парсинге возникла ошибка")
- break
- for description_item in tenders_descriptions_list.select(".searchV2-found-in"):
- if description_item.find("div") != None:
- description_item.find("div").decompose()
- reg = re.compile('[^а-яА-Я ]')
- str_to_write = categories_list[a][1] + " " + str(reg.sub('', description_item.getText()) + 'n')
- with open("fortest.txt", "a", encoding='utf8') as f:
- f.write(str_to_write)
- time.sleep(random.uniform(1, 1.5))
- if __name__ == "__main__":
- p = Pool(len(regions_list))
- p.map(bot, regions_list)
- p.close()
- p.join()
- The above exception was the direct cause of the following exception:
- Traceback (most recent call last):
- File "BotMainPart.py", line 73, in <module>
- p.map(bot, regions_list)
- File "C:UsersuserAppDataLocalProgramsPythonPython35libmultiprocessingpool.py", line 266, in map
- return self._map_async(func, iterable, mapstar, chunksize).get()
- File "C:UsersuserAppDataLocalProgramsPythonPython35libmultiprocessingpool.py", line 644, in get
- raise self._value
- UnboundLocalError: local variable 'reqouest_to_get_tenders_descriptions_list_html' referenced before assignment
Add Comment
Please, Sign In to add comment