Advertisement
repente

Untitled

Nov 24th, 2019
265
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.63 KB | None | 0 0
  1. from selenium import webdriver
  2. import base64
  3. # import pytesseract
  4. # from PIL import Image
  5. import time
  6.  
  7.  
  8. class Avito:
  9.  
  10. def __init__(self):
  11. self.driver = webdriver.Chrome()
  12.  
  13. def browser_close(self):
  14. self.driver.close()
  15.  
  16. def get_phone_img(self, url, path):
  17. browser = self.driver
  18. browser.get(url)
  19. time.sleep(4)
  20. browser.find_element_by_xpath("//div[@class='item-actions-line']").click()
  21. time.sleep(3)
  22. item_id = browser.find_element_by_xpath("//span[@data-marker='item-view/item-id']").text
  23. item_id = str(item_id).replace('№ ', '').strip()
  24. print(item_id)
  25. base64Img = browser.find_element_by_xpath('//html/body/div[9]/div[2]/div/div/div/div[1]/img').get_attribute(
  26. 'src')
  27. base64Img = str(base64Img).partition(",")[2].strip()
  28. base64Img = base64Img.encode()
  29. with open(path + "/" + item_id + '.png', 'wb') as fw:
  30. fw.write(base64.decodebytes(base64Img))
  31.  
  32. # self.img_to_text(self, item_id)
  33.  
  34. @staticmethod
  35. def img_to_text(self, item_id):
  36. f = open(item_id + '.txt', 'w')
  37. pytesseract.pytesseract.tesseract_cmd = r'F:\Program Files\Tesseract-OCR\tesseract.exe'
  38. text = pytesseract.image_to_string(Image.open(item_id + '.png'),
  39. config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
  40. f.write(text)
  41.  
  42. if __name__ == '__main__':
  43. avitoBot = Avito()
  44. avitoBot.get_phone_img('https://www.avito.ru/novosibirsk/zemelnye_uchastki/uchastok_8_sot._izhs_1762958677')
  45. avitoBot.browser_close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement