Untitled

import cv2
import numpy as np
global_warming = []
global_warming_results = []


def split_image_and_save(image):
    height, width = image.shape[:2]

    start_row, start_col = int(0), int(0)
    end_row, end_col = int(height * .35), int(width * .5)
    cropped_top = image[start_row:end_row, start_col:end_col]
    cv2.imwrite("TopSide.jpg", cropped_top)

    start_row, start_col = int(height * .52), int(width * 0.1)
    end_row, end_col = int(height * 0.87), int(width)
    end_col_left = int(width * .37)
    start_col_right = int(width * .63)
    end_col = int(width * 0.9)
    choice_start_row = int(height * 0.45)
    choice_end_row = start_row

    cropped_bot_left = image[start_row:end_row, start_col:end_col_left]
    cropped_bot_right = image[start_row:end_row, start_col_right:end_col]
    cropped_bot_right_choice = image[choice_start_row:choice_end_row, start_col_right:end_col]

    cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
    cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
    cv2.imwrite("BotSideRightChoice.jpg", cropped_bot_right_choice)


def split_image(image):
    height, width = image.shape[:2]

    start_row, start_col = 0, 0
    middle_row = int(height * 0.5)
    row_for_right_side = int(height * 0.4)
    middle_collumn = int(width * 0.5)
    cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
    cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
    cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]

    cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
    cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
    cv2.imwrite("TopSideLeft.jpg", cropped_top_left)

    return cropped_bot_left, cropped_bot_right, cropped_top_left


def split_image_less_custom(image):
    height, width = image.shape[:2]

    start_row, start_col = 0, 0
    middle_row = int(height * 0.5)
    row_for_right_side = int(height * 0.4)
    middle_collumn = int(width * 0.5)
    cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
    cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
    cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]

    cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
    cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
    cv2.imwrite("TopSideLeft.jpg", cropped_top_left)


def remove_duplicates(lines):
    # remove duplicate lines (lines within 10 pixels of eachother)
    for line in lines:
        line = line[0]
        x1 = line[0]
        y1 = line[1]
        x2 = line[2]
        y2 = line[3]
        for index, line in enumerate(lines):
            line = line[0]
            x3 = line[0]
            y3 = line[1]
            x4 = line[2]
            y4 = line[3]
            if y1 == y2 and y3 == y4:
                diff = abs(y1 - y3)
            elif x1 == x2 and x3 == x4:
                diff = abs(x1 - x3)
            else:
                diff = 0
            if diff < 10 and diff is not 0:
                del lines[index]
    return lines


def sort_line_list(lines):
    # sort lines into horizontal and vertical
    vertical = []
    horizontal = []
    for line in lines:
        line = line[0]
        if line[0] == line[2]:
            vertical.append(line)
        elif line[1] == line[3]:
            horizontal.append(line)
    vertical.sort()
    horizontal.sort(key=lambda x: x[1])
    return horizontal, vertical


def get_results_from_table(horizontal, vertical, thresh_image):
    rows = []
    for i, h in enumerate(horizontal):
        if i < len(horizontal) - 1:
            row = []
            for j, v in enumerate(vertical):
                if i < len(horizontal) - 1 and j < len(vertical) - 1:
                    # every cell before last cell
                    # get width & height
                    width = horizontal[i + 1][1] - h[1]
                    height = vertical[j + 1][0] - v[0]
                else:
                    # last cell, width = cell start to end of image
                    # get width & height
                    width = tW
                    height = tH
                tW = width
                tH = height
                # get roi (region of interest)
                roi = thresh_image[h[1]:h[1] + width, v[0]:v[0] + height]
                row.append(roi)
            row.pop()
            rows.append(row)

    results = dict()
    for i, row in enumerate(rows):
        cell_a = row[0]
        cell_b = row[1]
        cell_c = row[2]
        cell_d = row[3]

        cell_a_white = np.sum(cell_a == 0)
        cell_b_white = np.sum(cell_b == 0)
        cell_c_white = np.sum(cell_c == 0)
        cell_d_white = np.sum(cell_d == 0)

        min_white_pixels = max(cell_a_white, max(cell_b_white, max(cell_c_white, cell_d_white)))

        if min_white_pixels == cell_a_white:
            results[i + 1] = "A"
        if min_white_pixels == cell_b_white:
            results[i + 1] = "B"
        if min_white_pixels == cell_c_white:
            results[i + 1] = "C"
        if min_white_pixels == cell_d_white:
            results[i + 1] = "D"

    return results
    # for key, value in results.items():
    #     print("Result for question " + str(key) + " is " + value)


# returns 0 for Informatica and 1 for Fizica
def get_results_from_selection(horizontal, vertical, thresh_image):
    # Can be hardcoded as less line will give bad results (should not even work) and more lines is not a good answer.
    # Expectation is only for those lines
    horizontal_1 = horizontal[0]
    horizontal_2 = horizontal[1]
    horizontal_3 = horizontal[2]
    horizontal_4 = horizontal[3]
    vertical_1 = vertical[0]
    vertical_2 = vertical[1]

    width_1 = horizontal_2[1] - horizontal_1[1]
    height = vertical_2[0] - vertical_1[0]
    width_2 = horizontal_4[1] - horizontal_3[1]

    roi1 = thresh_image[horizontal_1[1]:horizontal_1[1] + width_1, vertical_1[0]:vertical_1[0] + height]
    roi2 = thresh_image[horizontal_3[1]:horizontal_3[1] + width_2, vertical_1[0]:vertical_1[0] + height]

    if np.sum(roi1 == 0) > np.sum(roi2 == 0):
        return 0
        # print("Informatica")
    else:
        return 1
        # print("Fizica")


def remove_reduntant_horizontal_lines(horizontal, threshold):
    # remove redundant horizontal lines
    to_remove = []

    for i in range(1, len(horizontal)):
        y1 = horizontal[i - 1][1]
        y2 = horizontal[i][1]
        if abs(y2 - y1) < threshold:
            to_remove.append(i - 1)

    new_horizontals = []
    for i, el in enumerate(horizontal):
        if i not in to_remove:
            new_horizontals.append(el)

    return new_horizontals


def remove_redundant_vertical_lines(vertical, threshold):
    # remove redundant vertical lines
    to_remove = []

    for i in range(1, len(vertical)):
        x1 = vertical[i - 1][0]
        x2 = vertical[i][0]

        if abs(x2 - x1) < threshold:
            to_remove.append(i - 1)

    new_verticals = []
    for i, el in enumerate(vertical):
        if i not in to_remove:
            new_verticals.append(el)

    return new_verticals


def normalize_lines(horizontal, vertical):
    # stretch horizontals
    for i, line in enumerate(horizontal):
        x1 = vertical[0][0]
        y1 = line[1]
        x2 = vertical[-1][0]
        y2 = line[3]

        horizontal[i] = [x1, y1, x2, y2]

    # stretch verticals
    for i, line in enumerate(vertical):
        x1 = line[0]
        y1 = horizontal[0][1]
        x2 = line[2]
        y2 = horizontal[-1][1]

        vertical[i] = [x1, y1, x2, y2]

    return horizontal, vertical


def handle_left_table(img):
    # gray scale image
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # apply adaptive threshold
    # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
    thresh = cv2.threshold(gray, 125, 255, cv2.THRESH_BINARY)[1]
    # apply Canny algorithm to get the edges
    edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
    # Use HoughtLines to get the lines from the Canny result
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
    # Remove duplicate lines
    lines = remove_duplicates(lines)
    # break lines into horizontal and vertical lines
    horizontal, vertical = sort_line_list(lines)

    horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
    # get rid of last horizontal line
    horizontal = horizontal[:-1]
    # take only the 15 needed lines (starting to count from last to the 1st line)
    horizontal = horizontal[len(horizontal) - 15 - 1:]

    vertical = remove_redundant_vertical_lines(vertical, 75)
    # get rid of the first 2 vertical lines. We don't need that part of the table
    vertical = vertical[2:]

    horizontal, vertical = normalize_lines(horizontal, vertical)

    results = get_results_from_table(horizontal, vertical, thresh)

    # for key, value in results.items():
    #     print("Result for question " + str(key) + " is " + value)


def handle_right_table_and_choice(img):
    # gray scale image
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # apply adaptive threshold
    thresh_for_results = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
    thresh = cv2.threshold(gray, 175, 255, cv2.THRESH_BINARY)[1]
    # apply Canny algorithm to get the edges
    edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
    # Use HoughtLines to get the lines from the Canny result
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
    # Remove duplicate lines
    lines = remove_duplicates(lines)
    # break lines into horizontal and vertical lines
    horizontal, vertical = sort_line_list(lines)

    horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
    # get rid of last horizontal line
    horizontal = horizontal[:-1]
    # take only the 15 needed lines (starting to count from last to the 1st line)
    horizontal = horizontal[len(horizontal) - 15 - 1:]

    vertical = remove_redundant_vertical_lines(vertical, 75)
    # get rid of the first 2 vertical lines. We don't need that part of the table
    vertical = vertical[2:]

    horizontal, vertical = normalize_lines(horizontal, vertical)

    results = get_results_from_table(horizontal, vertical, thresh_for_results)

    # for key, value in results.items():
    #     print("Result for question " + str(key) + " is " + value)

    # create selection region
    # The selection region will be the region starting from TOP to the first horizontal from where we move upwards
    # with a threshold (this time 250).
    # The vertical lines will be the last 2 (the column for D) which we strtch a bit to be sure to get
    # the 2 selection boxes
    region_y = horizontal[0][1] - 250
    region_x1 = vertical[-2][0] - 50
    region_x2 = vertical[-1][2] + 50
    selection_region_color = img[0:region_y, region_x1:region_x2]
    selection_result = get_selection(selection_region_color)


# returns 0 for Informatioca and 1 for Fizica
def get_selection(selection_region_color):
    # gray scale image
    gray = cv2.cvtColor(selection_region_color, cv2.COLOR_BGR2GRAY)
    # apply adaptive threshold
    # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
    thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)[1]

    edges = cv2.Canny(gray, 30, 200)
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    for i, ctr in enumerate(contours):
        # Get bounding box
        x, y, w, h = cv2.boundingRect(ctr)

        # Getting ROI
        roi = selection_region_color[y:y + h, x:x + w]

        # cv2.imshow('segment no:'+str(i),roi)
        cv2.rectangle(selection_region_color, (x, y), (x + w, y + h), (0, 255, 0), 2)

        if w > 15 and h > 15:
            cv2.imwrite('{}.png'.format(i), roi)

    cv2.imshow('Marked Numbers', selection_region_color)
    cv2.waitKey(0)

    rect_bottom = cv2.boundingRect(contours[0])
    x, y, w, h = rect_bottom
    bottom_box = thresh[y: y + h, x: x+w]

    rect_top = cv2.boundingRect(contours[1])
    x, y, w, h = rect_top
    top_box = thresh[y: y + h, x: x + w]

    if np.sum(top_box == 0) > np.sum(bottom_box == 0):
        top_box = ~top_box
        BH = 19
        BV = 21
        top_box = top_box[BH:-BH, BV:-BV]
        global_warming.append("I")
        cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", top_box)
        return 0
        # print("Informatica")
    else:
        bottom_box = ~bottom_box
        BH = 19
        BV = 21
        bottom_box = bottom_box[BH:-BH, BV:-BV]
        global_warming.append("F")
        cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", bottom_box)
        return 1
        # print("Fizica")

if __name__ == "__main__":
	# got 150 images
    for i in range(1, 151):
		# 92nd image has an X instead of a number in the selection zone
        if i == 92:
            global_warming.append("A")
            continue
        print("Handling image " + str(i))
        img = cv2.imread("path_to_image" + str(i) + ".jpg")
        cropped_bot_left, cropped_bot_right, cropped_top_left = split_image(img)
        handle_right_table_and_choice(cropped_bot_right)