Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from requests import get
- from requests import Session
- from json import loads, dumps
- SEARCH_HEAD={
- 'Accept': 'application/json',
- 'Content-Type': 'application/json',
- 'DNT': '1',
- 'Referer': 'https://www.ozon.ru/',
- 'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"Windows"',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
- }
- from seleniumwire import webdriver
- from selenium.webdriver.chrome.options import Options
- import pickle
- import time
- chrome_options = Options()
- # options = {
- # 'proxy': {
- # 'http': 'http://login:[email protected]:8000',
- # 'https': 'https://login:[email protected]:8000',
- # 'no_proxy': 'localhost,127.0.0.1'
- # }
- # }
- d = webdriver.Chrome(executable_path=r'C:\Users\biklu\chromedriver.exe', options=chrome_options)
- d.get('https://www.ozon.ru/category/bluzy-i-rubashki-zhenskie-7511/')
- time.sleep(200)
- pickle.dump(d.get_cookies(), open("cookies2.pkl", "wb"))
- d.quit()
- print("Close browser")
- def getData(session, link):
- res = session.get(link, headers=SEARCH_HEAD)
- try:
- res = loads(res.text)['widgetStates']
- except:
- return False
- # index = 0
- for r in res:
- # f = open(str(index)+'.json', 'w+', encoding='utf-8')
- # f.write(res[r])
- # f.close()
- # index = index + 1
- if 'webCharacteristics' in r:
- if 'characteristics' in res[r]:
- print(loads(res[r])['characteristics'])
- if 'webGallery' in r:
- print(loads(res[r])['images'])
- if 'webAspects' in r:
- print(loads(res[r])['aspects'])
- if 'addToFavorite' in r:
- print(loads(res[r])['cellTrackingInfo']['product'])
- # exit()
- # print(r)
- # items = loads(res[r])
- # print(items)
- # print(index)
- # print(link)
- # f = open(str(index)+'.json', 'w+', encoding='utf-8')
- # f.write(dumps(items))
- # f.close()
- # index = index + 1
- import json
- def get(pum, i):
- if i==1:
- URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?page_changed=true&url="+pum
- else:
- URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?url="+pum+"?page="+str(i)+"&page_changed=true"
- cookies = pickle.load(open("cookies2.pkl", 'rb'))
- session = Session()
- for cookie in cookies:
- session.cookies.set(cookie['name'], cookie['value'])
- res = session.get(URL, headers=SEARCH_HEAD)
- f = open('a.json', 'w+', encoding='utf-8')
- f.write(res.text)
- f.close()
- f = open('a.json', 'r', encoding='utf-8')
- df = loads(f.read())
- f.close()
- counter_num = 0
- try:
- widgetStates = df['widgetStates']
- except:
- widgetStates = None
- index = 0
- for el in widgetStates:
- if 'searchResultsV2' not in el:
- continue
- items = loads(widgetStates[el])
- items = items['items']
- for el_item in items:
- print("\n")
- counter_num += 1
- print("LINK: ", el_item['action']['link'])
- link = 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url=' + el_item['action']['link'].split('/?asb')[0]+'/'
- getData(session, link)
- time.sleep(1)
- # print('isAdult:', el_item['isAdult'])
- # mainStates = el_item['mainState']
- # for mainState in mainStates:
- # atom = mainState['atom']
- # tp = atom['type']
- # if tp == 'price':
- # print('price:',atom['price']['price'])
- # try:
- # print('originalPrice:',atom['price']['originalPrice'])
- # except:
- # pass
- # if tp == 'textAtom':
- # print('textAtom:',atom['textAtom']['text'])
- # if tp == 'textVariants':
- # print('textVariants:',atom['textVariants']['items'])
- # print('images:', el_item['tileImage']['images'])
- print('ВСЕГО ',counter_num)
- # return 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url='+df['nextPage']
- d=1
- # это в цикл
- counter = 100
- while True:
- if counter == 0:
- exit()
- get("/category/bluzy-i-rubashki-zhenskie-7511/", d)
- d=d+1
- counter = counter - 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement