Advertisement
Guest User

Untitled

a guest
Jun 16th, 2019
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.52 KB | None | 0 0
  1. import io
  2. from PIL import Image
  3. import pytesseract
  4. from wand.image import Image as wi
  5.  
  6.  
  7.  
  8.  
  9. pdf = wi(filename = r"D:filesaba7d525-04b8-4474-a40d-e94f9656ed42.pdf", resolution = 300)
  10.  
  11. pdfImg = pdf.convert('jpeg')
  12.  
  13. imgBlobs = []
  14.  
  15. for img in pdfImg.sequence:
  16. page = wi(image = img)
  17. imgBlobs.append(page.make_blob('jpeg'))
  18.  
  19. extracted_text = []
  20.  
  21. for imgBlob in imgBlobs:
  22. im = Image.open(io.BytesIO(imgBlob))
  23. text = pytesseract.image_to_string(im, lang = 'eng')
  24. extracted_text.append(text)
  25.  
  26. print(extracted_text[0])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement