Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import base64
- import cv2 as cv
- import os
- import pytesseract
- from PIL import Image
- def create_encoding(filepath):
- with open(filepath, "rb") as image_file:
- encoded_string = base64.b64encode(image_file.read())
- return encoded_string
- def get_extension(filepath):
- extension = os.path.splitext(filepath)[1]
- return extension
- def decode(string_b64,filename):
- imgdata = base64.b64decode(string_b64)
- with open(filename, 'wb') as f:
- f.write(imgdata)
- def denoise_image(filename):
- img = cv.imread(filename)
- clear_image = cv.fastNlMeansDenoisingColored(img, None, 43, 13, 7, 21)
- cv.imwrite("ReadyToTesseract.png", clear_image)
- cv.imshow("Denoised image", clear_image)
- def extractText(filename):
- text = pytesseract.image_to_string(Image.open(filename))
- return text
- filepath="poza.png"
- extension=get_extension(filepath)
- filename = "Decoded" + extension
- string_b64=create_encoding(filepath)
- decode(string_b64,filename)
- denoise_image(filename)
- print(extractText('ReadyToTesseract.png'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement