Advertisement
daniilak

Untitled

Oct 27th, 2021
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.51 KB | None | 0 0
  1. from requests import get
  2. from requests import Session
  3. from json import loads, dumps
  4. SEARCH_HEAD={
  5. 'Accept': 'application/json',
  6. 'Content-Type': 'application/json',
  7. 'DNT': '1',
  8. 'Referer': 'https://www.ozon.ru/',
  9. 'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
  10. 'sec-ch-ua-mobile': '?0',
  11. 'sec-ch-ua-platform': '"Windows"',
  12. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
  13. }
  14. from seleniumwire import webdriver
  15. from selenium.webdriver.chrome.options import Options
  16. import pickle
  17. import time
  18. chrome_options = Options()
  19. # options = {
  20.  
  21. # 'proxy': {
  22. # 'http': 'http://login:[email protected]:8000',
  23. # 'https': 'https://login:[email protected]:8000',
  24. # 'no_proxy': 'localhost,127.0.0.1'
  25. # }
  26. # }
  27. d = webdriver.Chrome(executable_path=r'C:\Users\biklu\chromedriver.exe', options=chrome_options)
  28. d.get('https://www.ozon.ru/category/bluzy-i-rubashki-zhenskie-7511/')
  29. time.sleep(200)
  30. pickle.dump(d.get_cookies(), open("cookies2.pkl", "wb"))
  31. d.quit()
  32. print("Close browser")
  33.  
  34. def getData(session, link):
  35. res = session.get(link, headers=SEARCH_HEAD)
  36. try:
  37. res = loads(res.text)['widgetStates']
  38. except:
  39. return False
  40. # index = 0
  41. for r in res:
  42. # f = open(str(index)+'.json', 'w+', encoding='utf-8')
  43. # f.write(res[r])
  44. # f.close()
  45. # index = index + 1
  46. if 'webCharacteristics' in r:
  47. if 'characteristics' in res[r]:
  48. print(loads(res[r])['characteristics'])
  49. if 'webGallery' in r:
  50. print(loads(res[r])['images'])
  51.  
  52. if 'webAspects' in r:
  53. print(loads(res[r])['aspects'])
  54. if 'addToFavorite' in r:
  55. print(loads(res[r])['cellTrackingInfo']['product'])
  56. # exit()
  57. # print(r)
  58. # items = loads(res[r])
  59. # print(items)
  60. # print(index)
  61. # print(link)
  62. # f = open(str(index)+'.json', 'w+', encoding='utf-8')
  63. # f.write(dumps(items))
  64. # f.close()
  65. # index = index + 1
  66. import json
  67. def get(pum, i):
  68. if i==1:
  69. URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?page_changed=true&url="+pum
  70. else:
  71. URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?url="+pum+"?page="+str(i)+"&page_changed=true"
  72. cookies = pickle.load(open("cookies2.pkl", 'rb'))
  73. session = Session()
  74. for cookie in cookies:
  75. session.cookies.set(cookie['name'], cookie['value'])
  76. res = session.get(URL, headers=SEARCH_HEAD)
  77. f = open('a.json', 'w+', encoding='utf-8')
  78. f.write(res.text)
  79. f.close()
  80. f = open('a.json', 'r', encoding='utf-8')
  81. df = loads(f.read())
  82. f.close()
  83. counter_num = 0
  84. try:
  85. widgetStates = df['widgetStates']
  86. except:
  87. widgetStates = None
  88. index = 0
  89. for el in widgetStates:
  90. if 'searchResultsV2' not in el:
  91. continue
  92. items = loads(widgetStates[el])
  93. items = items['items']
  94. for el_item in items:
  95. print("\n")
  96. counter_num += 1
  97. print("LINK: ", el_item['action']['link'])
  98. link = 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url=' + el_item['action']['link'].split('/?asb')[0]+'/'
  99. getData(session, link)
  100. time.sleep(1)
  101. # print('isAdult:', el_item['isAdult'])
  102. # mainStates = el_item['mainState']
  103. # for mainState in mainStates:
  104. # atom = mainState['atom']
  105. # tp = atom['type']
  106. # if tp == 'price':
  107. # print('price:',atom['price']['price'])
  108. # try:
  109. # print('originalPrice:',atom['price']['originalPrice'])
  110. # except:
  111. # pass
  112. # if tp == 'textAtom':
  113. # print('textAtom:',atom['textAtom']['text'])
  114. # if tp == 'textVariants':
  115. # print('textVariants:',atom['textVariants']['items'])
  116. # print('images:', el_item['tileImage']['images'])
  117.  
  118. print('ВСЕГО ',counter_num)
  119. # return 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url='+df['nextPage']
  120.  
  121. d=1
  122. # это в цикл
  123. counter = 100
  124. while True:
  125. if counter == 0:
  126. exit()
  127. get("/category/bluzy-i-rubashki-zhenskie-7511/", d)
  128. d=d+1
  129. counter = counter - 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement