Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from requests import get
- from requests import Session
- from json import loads as json_loads, dumps as json_dumps
- from seleniumwire import webdriver
- from selenium.webdriver.chrome.options import Options
- from pickle import dump as pickle_dump, load as pickle_load
- from time import sleep
- from conf import SEARCH_HEAD, proxy_options, executable_chrome_path, cookies_file, categories
- def work_chrome(category, is_captcha = 0):
- print('Open Browser')
- chrome_options = Options()
- chrome_options.add_extension(r'anitcaptcha_plugin.zip')
- d = webdriver.Chrome(executable_path=executable_chrome_path, options=chrome_options)
- d.get('https://www.ozon.ru'+category)
- # поставить проверку, что не капча
- if is_captcha == 1:
- sleep(30)
- pickle_dump(d.get_cookies(), open(cookies_file, "wb"))
- print("Close Browser")
- d.quit()
- def debugging(text, name):
- f = open('jsons/'+str(name)+'.json', 'w+', encoding='utf-8')
- f.write(text)
- f.close()
- def make_session():
- cookies = pickle_load(open(cookies_file, 'rb'))
- session = Session()
- for cookie in cookies:
- session.cookies.set(cookie['name'], cookie['value'])
- return session
- def make_request(session, URL):
- res = session.get(URL, headers=SEARCH_HEAD)
- return res.text
- def request_product(session, URL, category):
- df = json_loads(make_request(session, URL))
- if 'widgetStates' not in df:
- print(f"Need captcha in request_product() url={URL}")
- work_chrome(category, 1)
- session = make_session()
- df = json_loads(make_request(session, URL))
- df = df['widgetStates']
- for r in df:
- debugging(json_dumps(json_loads(df[r])), r)
- if 'addToFavorite' in r:
- info = json_loads(df[r])['cellTrackingInfo']['product']
- print(f"""Info product: \n URL={URL} \n ID={info['id']} \n title={info['title']} \n finalPrice={info['finalPrice']} \n price={info['price']} \n discount={info['discount']} \n brandName={info['brandName']} \n brandId={info['brandId']} \n categoryId={info['categoryId']} \n """)
- if 'webCharacteristics' in r:
- print("characteristics:")
- if 'characteristics' in df[r]:
- for char in json_loads(df[r])['characteristics'][0]['short']:
- print(f""" key={char['key']} \n name={char['name']} \n value={char['values'][0]['text']} \n """)
- if 'webGallery' in r:
- for image in json_loads(df[r])['images']:
- print(f"image={image['src']}")
- if 'webAspects' in r:
- asp = json_loads(df[r])['aspects']
- aspects_0 = asp[0]
- print(aspects_0['type']+':')
- for variant in aspects_0['variants']:
- print(f""" availability={variant['availability']} size={variant['data']['textRs'][0]['content']} coverImage={variant['data']['coverImage']}""")
- if len(asp) == 2:
- aspects_1 = asp[1]
- print(aspects_0['type']+':')
- for variant in aspects_1['variants']:
- print(f""" availability={variant['availability']} size={variant['data']['textRs'][0]['content']} coverImage={variant['data']['coverImage']}""")
- print("\n\n")
- def request_category(category, index):
- URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?url="+category+"?page="+str(index)+"&page_changed=true"
- if index == 1:
- URL = "https://www.ozon.ru/api/composer-api.bx/page/json/v2?page_changed=true&url="+category
- session = make_session()
- df = json_loads(make_request(session, URL))
- debugging(json_dumps(df), 'category_info')
- if 'widgetStates' not in df:
- print(f"Need captcha in request_category() category={category} index={index}")
- work_chrome(category, 1)
- session = make_session()
- df = json_loads(make_request(session, URL))
- debugging(json_dumps(df), 'category_info')
- df = df['widgetStates']
- number = 0
- for el in df:
- if 'searchResultsV2' not in el:
- continue
- items = json_loads(df[el])['items']
- for item in items:
- link = 'https://www.ozon.ru/api/composer-api.bx/page/json/v2?url=' + item['action']['link'].split('/?asb')[0]+'/'
- print(f"\n link product #{number}: {link}")
- number += 1
- # Если нужен перерыв между заходом в каждый товар, то ставим sleep
- # sleep(1)
- request_product(session, link, category)
- return number
- number = 0
- for category in categories:
- # work_chrome(category)
- number += request_category(category, 1)
- number += request_category(category, 2)
- number += request_category(category, 3)
- number += request_category(category, 4)
- exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement