VisionAI_local_tesseract

import cv2
import pytesseract
from matplotlib import pyplot as plt

# show image function
def show_img(img):
  fig = plt.gcf()
  fig.set_size_inches(16, 8)
  plt.axis("off")
  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  plt.show()

def detect_text(image_file, text_area):
    """Detects text in an image and returns it as a float."""

    try:
        # Read the image
        img = cv2.imread(image_file)

        # Rotate the image if needed
        height , width = img.shape[: 2]
        rotation_matrix=cv2.getRotationMatrix2D((width / 2 , height / 2), 0, 1)
        img_rotated=cv2.warpAffine(img , rotation_matrix , (width , height))
        # show_img(img_rotated)

        # Crop the image with given text_area=(left, top, width, height)
        (x, y, w, h) = text_area
        img_crop=img_rotated[y:y+h,x:x+w]
        # show_img(img_crop)

        # Convert the image to grayscale mode and apply thresholding
        img_gray = cv2.cvtColor(img_crop, cv2.COLOR_BGR2GRAY)
        _, img_bw = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        # show_img(img_bw)

        # Reduce the brightness and enhance the contrast of the image if needed
        img_low_brigh = cv2.addWeighted(img_bw, 1.5, img_bw, 0.0 , -50)
        alpha = 3.0                                                                         # Contrast control (1.0-3.0)
        beta = 0                                                                            # Brightness control (0-100)
        img_contrast = cv2.convertScaleAbs(img_low_brigh , alpha=alpha , beta=beta )
        # show_img(img_contrast)

        # Reverse the colors by subtracting the grayscale image from 255 if original digits are in white color
        img_reversed = 255 - img_contrast
        # show_img(img_reversed)

       # Save the modified image for debugging purposes
        cv2.imwrite("reversed_image.png",img_reversed)

    except FileNotFoundError:
        print(f"Error: File not found: {image_file}")
        return None

    # Use pytesseract to perform optical character recognition on the image
    custom_config = r'--oem 3 --psm 7 outbase digits'

    text=pytesseract.image_to_string(img_reversed, config=custom_config)   #use this line if original digits are white
    #text=pytesseract.image_to_string(img_contrast, config=custom_config)   #use this line if original digits are not white

    # Return the detected text as a float or None if no text is found
    try:
        return float(text)

    except ValueError:
        return None

detect_text("test.png", text_area=(200, 190, 410, 65)) # text_area=(left, top, width, height)