Untitled

import base64
import cv2 as cv
import os
import pytesseract
from PIL import Image

def create_encoding(filepath):
    with open(filepath, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read())
    return encoded_string

def get_extension(filepath):
    extension = os.path.splitext(filepath)[1]
    return extension

def decode(string_b64,filename):
    imgdata = base64.b64decode(string_b64)
    with open(filename, 'wb') as f:
        f.write(imgdata)

def denoise_image(filename):
    img = cv.imread(filename)
    clear_image = cv.fastNlMeansDenoisingColored(img, None, 43, 13, 7, 21)
    cv.imwrite("ReadyToTesseract.png", clear_image)
    cv.imshow("Denoised image", clear_image)

def extractText(filename):
    text = pytesseract.image_to_string(Image.open(filename))
    return text


filepath="poza.png"
extension=get_extension(filepath)
filename = "Decoded" + extension
string_b64=create_encoding(filepath)
decode(string_b64,filename)
denoise_image(filename)

print(extractText('ReadyToTesseract.png'))