Advertisement
daniilak

Untitled

Oct 28th, 2021
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.85 KB | None | 0 0
  1. from requests import get
  2. from requests import Session
  3. from json import loads as json_loads, dumps as json_dumps
  4. from seleniumwire import webdriver
  5. from selenium.webdriver.chrome.options import Options
  6. from pickle import dump as pickle_dump, load as pickle_load
  7. from time import sleep
  8.  
  9. from conf import SEARCH_HEAD, proxy_options, executable_chrome_path, cookies_file, categories, anticaptcha
  10.  
  11. def work_chrome(category, is_captcha = 0):
  12. print('Open Browser')
  13. chrome_options = Options()
  14. chrome_options.add_extension(anticaptcha)
  15. d = webdriver.Chrome(executable_path=executable_chrome_path, options=chrome_options)
  16. d.get('https://www.ozon.ru'+category)
  17. # поставить проверку, что не капча
  18. if is_captcha == 1:
  19. sleep(30)
  20. pickle_dump(d.get_cookies(), open(cookies_file, "wb"))
  21. print("Close Browser")
  22. d.quit()
  23.  
  24. def debugging(text, name):
  25. f = open('jsons/'+str(name)+'.json', 'w+', encoding='utf-8')
  26. f.write(text)
  27. f.close()
  28.  
  29. def make_session():
  30. cookies = pickle_load(open(cookies_file, 'rb'))
  31. session = Session()
  32. for cookie in cookies:
  33. session.cookies.set(cookie['name'], cookie['value'])
  34. return session
  35.  
  36. def make_request(session, URL):
  37. res = session.get(URL, headers=SEARCH_HEAD)
  38. return res.text
  39.  
  40. def request_product(session, URL, category):
  41. df = json_loads(make_request(session, URL))
  42. if 'widgetStates' not in df:
  43. print(f"Need captcha in request_product() url={URL}")
  44. work_chrome(category, 1)
  45. session = make_session()
  46. df = json_loads(make_request(session, URL))
  47. df = df['widgetStates']
  48. for r in df:
  49. debugging(json_dumps(json_loads(df[r])), r)
  50. if 'addToFavorite' in r:
  51. info = json_loads(df[r])['cellTrackingInfo']['product']
  52. print(f"""Info product: \n URL={URL} \n ID={info['id']} \n title={info['title']} \n finalPrice={info['finalPrice']} \n price={info['price']} \n discount={info['discount']} \n brandName={info['brandName']} \n brandId={info['brandId']} \n categoryId={info['categoryId']} \n """)
  53. if 'webCharacteristics' in r:
  54. print("characteristics:")
  55. if 'characteristics' in df[r]:
  56. for char in json_loads(df[r])['characteristics'][0]['short']:
  57. print(f""" key={char['key']} \n name={char['name']} \n value={char['values'][0]['text']} \n """)
  58. if 'webGallery' in r:
  59. for image in json_loads(df[r])['images']:
  60. print(f"image={image['src']}")
  61. if 'webAspects' in r:
  62. asp = json_loads(df[r])['aspects']
  63. aspects_0 = asp[0]
  64. print(aspects_0['type']+':')
  65. for variant in aspects_0['variants']:
  66. print(f""" availability={variant['availability']} size={variant['data']['textRs'][0]['content']} coverImage={variant['data']['coverImage']}""")
  67. if len(asp) == 2:
  68. aspects_1 = asp[1]
  69. print(aspects_0['type']+':')
  70. for variant in aspects_1['variants']:
  71. print(f""" availability={variant['availability']} size={variant['data']['textRs'][0]['content']} coverImage={variant['data']['coverImage']}""")
  72. print("\n\n")
  73.  
  74. def request_category(category, index):
  75. URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?url="+category+"?page="+str(index)+"&page_changed=true"
  76. if index == 1:
  77. URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?page_changed=true&url="+category
  78.  
  79. session = make_session()
  80. df = json_loads(make_request(session, URL))
  81.  
  82. debugging(json_dumps(df), 'category_info')
  83.  
  84. if 'widgetStates' not in df:
  85. print(f"Need captcha in request_category() category={category} index={index}")
  86. work_chrome(category, 1)
  87. session = make_session()
  88. df = json_loads(make_request(session, URL))
  89. debugging(json_dumps(df), 'category_info')
  90.  
  91. df = df['widgetStates']
  92.  
  93. number = 0
  94.  
  95. for el in df:
  96. if 'searchResultsV2' not in el:
  97. continue
  98.  
  99. items = json_loads(df[el])['items']
  100. for item in items:
  101. link = 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url=' + item['action']['link'].split('/?asb')[0]+'/'
  102. print(f"\n link product #{number}: {link}")
  103. number += 1
  104. # Если нужен перерыв между заходом в каждый товар, то ставим sleep
  105. # sleep(1)
  106. request_product(session, link, category)
  107. return number
  108.  
  109. number = 0
  110. for category in categories:
  111. # work_chrome(category)
  112. number += request_category(category, 1)
  113. number += request_category(category, 2)
  114. number += request_category(category, 3)
  115. number += request_category(category, 4)
  116. exit()
  117.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement