Advertisement
Guest User

Untitled

a guest
Apr 8th, 2020
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.05 KB | None | 0 0
  1. import base64
  2. import cv2 as cv
  3. import os
  4. import pytesseract
  5. from PIL import Image
  6.  
  7. def create_encoding(filepath):
  8.     with open(filepath, "rb") as image_file:
  9.         encoded_string = base64.b64encode(image_file.read())
  10.     return encoded_string
  11.  
  12. def get_extension(filepath):
  13.     extension = os.path.splitext(filepath)[1]
  14.     return extension
  15.  
  16. def decode(string_b64,filename):
  17.     imgdata = base64.b64decode(string_b64)
  18.     with open(filename, 'wb') as f:
  19.         f.write(imgdata)
  20.  
  21. def denoise_image(filename):
  22.     img = cv.imread(filename)
  23.     clear_image = cv.fastNlMeansDenoisingColored(img, None, 43, 13, 7, 21)
  24.     cv.imwrite("ReadyToTesseract.png", clear_image)
  25.     cv.imshow("Denoised image", clear_image)
  26.  
  27. def extractText(filename):
  28.     text = pytesseract.image_to_string(Image.open(filename))
  29.     return text
  30.  
  31.  
  32. filepath="poza.png"
  33. extension=get_extension(filepath)
  34. filename = "Decoded" + extension
  35. string_b64=create_encoding(filepath)
  36. decode(string_b64,filename)
  37. denoise_image(filename)
  38.  
  39. print(extractText('ReadyToTesseract.png'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement