Guest User

Untitled

a guest
Dec 18th, 2018
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.54 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from multiprocessing import Pool
  4. import re
  5. import time
  6. import random
  7. import math
  8. from fake_useragent import UserAgent
  9.  
  10.  
  11. proxies = {"http" : "socks5://127.0.0.1:9050", "https" : "socks5://127.0.0.1:9050", "socks5" : "socks5://127.0.0.1:9050"}
  12.  
  13. regions_list = ['https://synapsenet.ru/search/categoryinregion/bryanskaya-obl', 'https://synapsenet.ru/search/categoryinregion/ivanovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kurskaya-obl', 'https://synapsenet.ru/search/categoryinregion/orlovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tambovskaya-obl', 'https://synapsenet.ru/search/categoryinregion/yaroslavskaya-obl', 'https://synapsenet.ru/search/categoryinregion/vladimirskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kaluzhskaya-obl', 'https://synapsenet.ru/search/categoryinregion/lipeckaya-obl', 'https://synapsenet.ru/search/categoryinregion/ryazanskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tverskaya-obl', 'https://synapsenet.ru/search/categoryinregion/belgorodskaya-obl', 'https://synapsenet.ru/search/categoryinregion/voronezhskaya-obl', 'https://synapsenet.ru/search/categoryinregion/kostromskaya-obl', 'https://synapsenet.ru/search/categoryinregion/smolenskaya-obl', 'https://synapsenet.ru/search/categoryinregion/tulskaya-obl']
  14. categories_list = [['/bitovaya-elektrotehnika', '_label_0'], ['/category/zhkh', '_label_1'], ['/klining-utilizaciya-i-himchistka', '_label_2'], ['kompyuteri-mebel-i-kanctovari', '_label_3'], ['/les-i-pilomaterili', '_label_4'], ['/materiali-sire-i-polufabrikati', '_label_5'], ['/medicina-i-farmakologiya', '_label_6'], ['/metall', '_label_7'], ['/nedvizhimost', '_label_8'], ['/neft-i-gaz', '_label_9'], ['/obuchenie-i-obrazovanie', '_label_10'], ['/odezhda-i-hoztovari', '_label_11'], ['/ohrana-bezopasnost-i-signalizaciya', '_label_12'], ['/perevozka-transportnie-uslugi', '_label_13'], ['/produkti', '_label_14'], ['/proektirovanie-i-inzhenernie-uslugi', '_label_15'], ['/promishlennoe-oborudovanie', '_label_16'], ['/reklama', '_label_17'], ['/svyaz-i-kommunikacii', '_label_18'], ['/selskoe-hozyajstvo', '_label_19'], ['/strahovanie-i-yuridicheskie-uslugi', '_label_20'], ['/stroitelnie-materiali-i-oborudovanie', '_label_21'], ['/stroitelstvo-i-remont', '_label_22'], ['/transport-i-spectehnika', '_label_23'], ['/uslugi-dlya-naseleniya', '_label_24'], ['/himiya', '_label_25'], ['/energetika', '_label_26']]
  15.  
  16. def bot(regions_list):
  17. for a in range(len(categories_list)):
  18. get_request_to_get_tenders_count = requests.Session()
  19. try:
  20. get_request_to_get_tenders_count_html = get_request_to_get_tenders_count.get(regions_list + categories_list[a][0], headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
  21. except Exception:
  22. print("Cоединение оборвалось, попытка его востановить")
  23. while True:
  24. try:
  25. get_request_to_get_tenders_count_html = get_request_to_get_tenders_count.get(regions_list + categories_list[a][0], headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
  26. break
  27. except Exception as exc:
  28. print(exc)
  29. print("Попытка востновить соединение")
  30.  
  31. get_pages_count_item = BeautifulSoup(get_request_to_get_tenders_count_html.text, "html.parser")
  32. pages_count = int(re.sub(r'[^x00-x7f]', '', str(get_pages_count_item.select("#searchV2-tenders-count")[0].getText()))) / 20
  33.  
  34.  
  35. for b in range(1, int(math.ceil(pages_count))): # перебор категории тендеров
  36. headers_data = {'User-Agent': str(UserAgent().random)}
  37. reqouest_to_get_tenders_descriptions_list = requests.Session()
  38. try:
  39. reqouest_to_get_tenders_descriptions_list_html = reqouest_to_get_tenders_descriptions_list.get(regions_list + categories_list[a][0] + "?page=" + str(b), headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
  40. print(reqouest_to_get_tenders_descriptions_list_html.status_code)
  41. except Exception:
  42. print("Cоединение оборвалось, попытка его востановить")
  43. while True:
  44. try:
  45. reqouest_to_get_tenders_descriptions_list_html = reqouest_to_get_tenders_descriptions_list.get(regions_list + categories_list[a][0] + "?page=" + str(b), headers={'User-Agent': str(UserAgent().random)}, proxies=proxies, timeout=6)
  46. print(reqouest_to_get_tenders_descriptions_list_html.status_code)
  47. break
  48. except Exception as exc:
  49. print(exc)
  50. print("Попытка востновить соединение")
  51.  
  52. tenders_descriptions_list = BeautifulSoup(reqouest_to_get_tenders_descriptions_list_html.text, "html.parser")
  53.  
  54. if tenders_descriptions_list.find("div", class_="searchV2-pressed-text") != None:
  55. print("тендеры в данной категории закончились")
  56. break
  57. elif tenders_descriptions_list.find("div", class_="tender-money") == None: # проверка на существование тендеров на данной странице
  58. print("при парсинге возникла ошибка")
  59. break
  60.  
  61. for description_item in tenders_descriptions_list.select(".searchV2-found-in"):
  62. if description_item.find("div") != None:
  63. description_item.find("div").decompose()
  64. reg = re.compile('[^а-яА-Я ]')
  65. str_to_write = categories_list[a][1] + " " + str(reg.sub('', description_item.getText()) + 'n')
  66. with open("fortest.txt", "a", encoding='utf8') as f:
  67. f.write(str_to_write)
  68. time.sleep(random.uniform(1, 1.5))
  69.  
  70.  
  71. if __name__ == "__main__":
  72. p = Pool(len(regions_list))
  73. p.map(bot, regions_list)
  74. p.close()
  75. p.join()
  76.  
  77. The above exception was the direct cause of the following exception:
  78.  
  79. Traceback (most recent call last):
  80. File "BotMainPart.py", line 73, in <module>
  81. p.map(bot, regions_list)
  82. File "C:UsersuserAppDataLocalProgramsPythonPython35libmultiprocessingpool.py", line 266, in map
  83. return self._map_async(func, iterable, mapstar, chunksize).get()
  84. File "C:UsersuserAppDataLocalProgramsPythonPython35libmultiprocessingpool.py", line 644, in get
  85. raise self._value
  86. UnboundLocalError: local variable 'reqouest_to_get_tenders_descriptions_list_html' referenced before assignment
Add Comment
Please, Sign In to add comment