Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pytesseract
- from PIL import Image
- import pdf2image
- # Converteste PDF in imagini
- images = pdf2image.convert_from_path('calea_catre_fisierul_pdf')
- # Extrage textul din fiecare imagine
- text = ""
- for img in images:
- text += pytesseract.image_to_string(img)
- # Salveaza textul intr-un fisier .txt
- with open('calea_catre_fisierul_txt', 'w', encoding='utf-8') as txt_file:
- txt_file.write(text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement