Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- import base64
- # import pytesseract
- # from PIL import Image
- import time
- class Avito:
- def __init__(self):
- self.driver = webdriver.Chrome()
- def browser_close(self):
- self.driver.close()
- def get_phone_img(self, url, path):
- browser = self.driver
- browser.get(url)
- time.sleep(4)
- browser.find_element_by_xpath("//div[@class='item-actions-line']").click()
- time.sleep(3)
- item_id = browser.find_element_by_xpath("//span[@data-marker='item-view/item-id']").text
- item_id = str(item_id).replace('№ ', '').strip()
- print(item_id)
- base64Img = browser.find_element_by_xpath('//html/body/div[9]/div[2]/div/div/div/div[1]/img').get_attribute(
- 'src')
- base64Img = str(base64Img).partition(",")[2].strip()
- base64Img = base64Img.encode()
- with open(path + "/" + item_id + '.png', 'wb') as fw:
- fw.write(base64.decodebytes(base64Img))
- # self.img_to_text(self, item_id)
- @staticmethod
- def img_to_text(self, item_id):
- f = open(item_id + '.txt', 'w')
- pytesseract.pytesseract.tesseract_cmd = r'F:\Program Files\Tesseract-OCR\tesseract.exe'
- text = pytesseract.image_to_string(Image.open(item_id + '.png'),
- config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
- f.write(text)
- if __name__ == '__main__':
- avitoBot = Avito()
- avitoBot.get_phone_img('https://www.avito.ru/novosibirsk/zemelnye_uchastki/uchastok_8_sot._izhs_1762958677')
- avitoBot.browser_close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement