Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import requests
- import json
- import sys
- from pprint import pprint
- from html_to_json import convert as cnv
- from random import randint
- from lxml.html.clean import Cleaner
- import lxml
- import re
- from bs4 import BeautifulSoup as bs
- from timeit import timeit
- import os
- from glob import glob
- headers = {
- 'authority': 'yandex.ru',
- 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
- 'device-memory': '8',
- 'rtt': '150',
- 'sec-ch-ua-mobile': '?0',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
- 'viewport-width': '575',
- 'dpr': '1',
- 'downlink': '4.15',
- 'ect': '4g',
- 'sec-ch-ua-platform': '"Windows"',
- 'accept': 'application/json',
- 'sec-fetch-site': 'same-origin',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-dest': 'empty',
- 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6,zh;q=0.5'
- }
- def info(id):
- params = (
- ('docid', f'{id}'),
- ('lang', 'ru'),
- ('mt', '1'),
- ('family', '0'),
- ('pornowhitelist', '1'),
- ('ipnd', '1'),
- )
- response = requests.get('https://yandex.ru/images-apphost/rim',
- headers=headers, params=params).json()
- return response
- def load_image(byte):
- headers = {
- 'authority': 'yandex.ru',
- 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
- 'device-memory': '8',
- 'rtt': '200',
- 'sec-ch-ua-mobile': '?0',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
- 'viewport-width': '794',
- 'content-type': 'image/jpeg',
- 'dpr': '1',
- 'downlink': '2.65',
- 'ect': '4g',
- 'sec-ch-ua-platform': '"Windows"',
- 'accept': '*/*',
- 'origin': 'https://yandex.ru',
- 'sec-fetch-site': 'same-origin',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-dest': 'empty',
- 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6,zh;q=0.5'
- }
- params = (
- ('cbird', '37'),
- ('images_avatars_size', 'preview'),
- ('images_avatars_namespace', 'images-cbir')
- )
- data = byte
- response = requests.post('https://yandex.ru/images-apphost/image-download',
- headers=headers, params=params, data=data).json()
- print(response)
- return response
- def getInfoImage(url):
- headers = {
- 'authority': 'yandex.ru',
- 'cache-control': 'max-age=0',
- 'device-memory': '8',
- 'dpr': '1',
- 'viewport-width': '1280',
- 'rtt': '200',
- 'downlink': '2.2',
- 'ect': '4g',
- 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"Windows"',
- 'upgrade-insecure-requests': '1',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
- 'sec-fetch-site': 'same-origin',
- 'sec-fetch-mode': 'navigate',
- 'sec-fetch-user': '?1',
- 'sec-fetch-dest': 'document',
- 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6,zh;q=0.5'
- }
- if isinstance(dict, type(url)):
- params = (
- ('url', "".join(url['url'].split("/")[:-2]) + "orig"),
- ('cbir_id', url['url'].split("get-images-cbir/")
- [-1].split("/preview")[0]),
- ('cbir_page', 'similar'),
- ('rpt', 'imageview'),
- ('family', '0'),
- ('pornowhitelist', '1'),
- ('ipnd', '1'),
- )
- elif not url.startswith("http"):
- params = (
- ('text', url),
- ('from', 'tabbar'),
- ('family', '0'),
- ('pornowhitelist', '1'),
- ('ipnd', '1'),
- )
- else:
- params = (
- ('url', url),
- ('cbir_page', 'similar'),
- ('rpt', 'imageview'),
- ('family', '0'),
- ('pornowhitelist', '1'),
- ('ipnd', '1'),
- )
- response = requests.get('https://yandex.ru/images/search',
- headers=headers, params=params)
- print(response.url)
- root = lxml.html.fromstring(response.content)
- data = list(root.xpath('//*[@id]/@data-bem'))
- for i in data:
- i = json.loads(i)
- if "serp-item" in i:
- if "rimId" in i["serp-item"]:
- yield i["serp-item"]["rimId"]
- def sJson(response, name):
- with open(f"{name}.html", "w", encoding="utf-8")as f:
- cleaner = Cleaner(style=True, scripts=True, javascript=True, inline_style=True, links=True, add_nofollow=False,
- page_structure=True, safe_attrs_only=False)
- f.write(cleaner.clean_html(response))
- all_links = []
- def vldc(elem):
- try:
- requests.get(elem)
- except:
- return False
- def map_append(elem):
- all_links.append(elem["iu"])
- def get_from_dict(all_links, response):
- for num, i in enumerate(response["rld"]):
- infos = i["s"]
- map(map_append, infos)
- def Glob_matching(src):
- prt = glob("*.*")
- if src in prt:
- return True
- def links_yd(uri):
- if isinstance(str, type(uri)) and uri.startswith("C:") or Glob_matching(uri):
- with open(uri, "rb") as image:
- f = image.read()
- try:
- response = info(getInfoImage(load_image(f)))
- for l in response:
- for num, i in enumerate(response["rld"]):
- infos = i["s"]
- for i in infos:
- all_links.append(i["iu"])
- if "id" in i:
- get_from_dict(all_links, info(i["id"]))
- return all_links
- except:
- return None
- elif isinstance(bytes, type(uri)):
- f = uri
- response = info(getInfoImage(load_image(f)))
- for l in response:
- for num, i in enumerate(response["rld"]):
- infos = i["s"]
- for i in infos:
- all_links.append(i["iu"])
- if "id" in i:
- get_from_dict(all_links, info(i["id"]))
- return all_links
- else:
- response = map(info,getInfoImage(uri))
- for l in response:
- for num, i in enumerate(l["rld"]):
- infos = i["s"]
- for i in infos:
- all_links.append(i["iu"])
- if "id" in i:
- get_from_dict(all_links, info(i["id"]))
- return all_links
- print(links_yd("cats"))
Advertisement
Add Comment
Please, Sign In to add comment