Advertisement
furas

Python - OCR text with tesseract

Mar 15th, 2017
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.06 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. #
  4. # Linux (Debian/Ubuntu/Mint) install:
  5. #
  6. #   apt install tesseract-ocr
  7. #
  8. # Python install:
  9. #
  10. #   pip install pillow
  11. #   pip install pytesseract
  12. #
  13. # Links:
  14. #   tesseract-ocr: https://github.com/tesseract-ocr/tesseract/wiki
  15. #   pytesseract: https://github.com/madmaze/pytesseract
  16. #   pillow: http://pillow.readthedocs.io
  17. #
  18.  
  19. from PIL import Image
  20. import pytesseract
  21. #import PIL.ImageOps
  22.  
  23. region_power = (880, 810, 880+170, 810+60)
  24.  
  25. # test on screenshots: 0.jpg ... 9.jpg
  26. for x in range(10):
  27.     # open image
  28.     img = Image.open('{}.jpg'.format(x))
  29.  
  30.     # get region
  31.     img = img.crop(region_power)
  32.  
  33.     # in one line
  34.     #img = Image.open('{}.jpg'.format(x)).crop(region_power)
  35.  
  36.     # invert colors - to get dark text on light background
  37.     #img = PIL.ImageOps.invert(img)
  38.  
  39.     # convert to grayscale
  40.     #img = img.convert('L')
  41.  
  42.     # in one line
  43.     #img = PIL.ImageOps.invert(img).convert('L')
  44.  
  45.     #img.save('region-{}.jpg''.format(x))
  46.  
  47.     result = pytesseract.image_to_string(img)
  48.  
  49.     print(result)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement