SHARE
TWEET

Untitled

a guest Dec 7th, 2019 87 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import cv2
  2. import numpy as np
  3. global_warming = []
  4. global_warming_results = []
  5.  
  6.  
  7. def split_image_and_save(image):
  8.     height, width = image.shape[:2]
  9.  
  10.     start_row, start_col = int(0), int(0)
  11.     end_row, end_col = int(height * .35), int(width * .5)
  12.     cropped_top = image[start_row:end_row, start_col:end_col]
  13.     cv2.imwrite("TopSide.jpg", cropped_top)
  14.  
  15.     start_row, start_col = int(height * .52), int(width * 0.1)
  16.     end_row, end_col = int(height * 0.87), int(width)
  17.     end_col_left = int(width * .37)
  18.     start_col_right = int(width * .63)
  19.     end_col = int(width * 0.9)
  20.     choice_start_row = int(height * 0.45)
  21.     choice_end_row = start_row
  22.  
  23.     cropped_bot_left = image[start_row:end_row, start_col:end_col_left]
  24.     cropped_bot_right = image[start_row:end_row, start_col_right:end_col]
  25.     cropped_bot_right_choice = image[choice_start_row:choice_end_row, start_col_right:end_col]
  26.  
  27.     cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  28.     cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  29.     cv2.imwrite("BotSideRightChoice.jpg", cropped_bot_right_choice)
  30.  
  31.  
  32. def split_image(image):
  33.     height, width = image.shape[:2]
  34.  
  35.     start_row, start_col = 0, 0
  36.     middle_row = int(height * 0.5)
  37.     row_for_right_side = int(height * 0.4)
  38.     middle_collumn = int(width * 0.5)
  39.     cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
  40.     cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
  41.     cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]
  42.  
  43.     cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  44.     cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  45.     cv2.imwrite("TopSideLeft.jpg", cropped_top_left)
  46.  
  47.     return cropped_bot_left, cropped_bot_right, cropped_top_left
  48.  
  49.  
  50. def split_image_less_custom(image):
  51.     height, width = image.shape[:2]
  52.  
  53.     start_row, start_col = 0, 0
  54.     middle_row = int(height * 0.5)
  55.     row_for_right_side = int(height * 0.4)
  56.     middle_collumn = int(width * 0.5)
  57.     cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
  58.     cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
  59.     cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]
  60.  
  61.     cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  62.     cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  63.     cv2.imwrite("TopSideLeft.jpg", cropped_top_left)
  64.  
  65.  
  66. def remove_duplicates(lines):
  67.     # remove duplicate lines (lines within 10 pixels of eachother)
  68.     for line in lines:
  69.         line = line[0]
  70.         x1 = line[0]
  71.         y1 = line[1]
  72.         x2 = line[2]
  73.         y2 = line[3]
  74.         for index, line in enumerate(lines):
  75.             line = line[0]
  76.             x3 = line[0]
  77.             y3 = line[1]
  78.             x4 = line[2]
  79.             y4 = line[3]
  80.             if y1 == y2 and y3 == y4:
  81.                 diff = abs(y1 - y3)
  82.             elif x1 == x2 and x3 == x4:
  83.                 diff = abs(x1 - x3)
  84.             else:
  85.                 diff = 0
  86.             if diff < 10 and diff is not 0:
  87.                 del lines[index]
  88.     return lines
  89.  
  90.  
  91. def sort_line_list(lines):
  92.     # sort lines into horizontal and vertical
  93.     vertical = []
  94.     horizontal = []
  95.     for line in lines:
  96.         line = line[0]
  97.         if line[0] == line[2]:
  98.             vertical.append(line)
  99.         elif line[1] == line[3]:
  100.             horizontal.append(line)
  101.     vertical.sort()
  102.     horizontal.sort(key=lambda x: x[1])
  103.     return horizontal, vertical
  104.  
  105.  
  106. def get_results_from_table(horizontal, vertical, thresh_image):
  107.     rows = []
  108.     for i, h in enumerate(horizontal):
  109.         if i < len(horizontal) - 1:
  110.             row = []
  111.             for j, v in enumerate(vertical):
  112.                 if i < len(horizontal) - 1 and j < len(vertical) - 1:
  113.                     # every cell before last cell
  114.                     # get width & height
  115.                     width = horizontal[i + 1][1] - h[1]
  116.                     height = vertical[j + 1][0] - v[0]
  117.                 else:
  118.                     # last cell, width = cell start to end of image
  119.                     # get width & height
  120.                     width = tW
  121.                     height = tH
  122.                 tW = width
  123.                 tH = height
  124.                 # get roi (region of interest)
  125.                 roi = thresh_image[h[1]:h[1] + width, v[0]:v[0] + height]
  126.                 row.append(roi)
  127.             row.pop()
  128.             rows.append(row)
  129.  
  130.     results = dict()
  131.     for i, row in enumerate(rows):
  132.         cell_a = row[0]
  133.         cell_b = row[1]
  134.         cell_c = row[2]
  135.         cell_d = row[3]
  136.  
  137.         cell_a_white = np.sum(cell_a == 0)
  138.         cell_b_white = np.sum(cell_b == 0)
  139.         cell_c_white = np.sum(cell_c == 0)
  140.         cell_d_white = np.sum(cell_d == 0)
  141.  
  142.         min_white_pixels = max(cell_a_white, max(cell_b_white, max(cell_c_white, cell_d_white)))
  143.  
  144.         if min_white_pixels == cell_a_white:
  145.             results[i + 1] = "A"
  146.         if min_white_pixels == cell_b_white:
  147.             results[i + 1] = "B"
  148.         if min_white_pixels == cell_c_white:
  149.             results[i + 1] = "C"
  150.         if min_white_pixels == cell_d_white:
  151.             results[i + 1] = "D"
  152.  
  153.     return results
  154.     # for key, value in results.items():
  155.     #     print("Result for question " + str(key) + " is " + value)
  156.  
  157.  
  158. # returns 0 for Informatica and 1 for Fizica
  159. def get_results_from_selection(horizontal, vertical, thresh_image):
  160.     # Can be hardcoded as less line will give bad results (should not even work) and more lines is not a good answer.
  161.     # Expectation is only for those lines
  162.     horizontal_1 = horizontal[0]
  163.     horizontal_2 = horizontal[1]
  164.     horizontal_3 = horizontal[2]
  165.     horizontal_4 = horizontal[3]
  166.     vertical_1 = vertical[0]
  167.     vertical_2 = vertical[1]
  168.  
  169.     width_1 = horizontal_2[1] - horizontal_1[1]
  170.     height = vertical_2[0] - vertical_1[0]
  171.     width_2 = horizontal_4[1] - horizontal_3[1]
  172.  
  173.     roi1 = thresh_image[horizontal_1[1]:horizontal_1[1] + width_1, vertical_1[0]:vertical_1[0] + height]
  174.     roi2 = thresh_image[horizontal_3[1]:horizontal_3[1] + width_2, vertical_1[0]:vertical_1[0] + height]
  175.  
  176.     if np.sum(roi1 == 0) > np.sum(roi2 == 0):
  177.         return 0
  178.         # print("Informatica")
  179.     else:
  180.         return 1
  181.         # print("Fizica")
  182.  
  183.  
  184. def remove_reduntant_horizontal_lines(horizontal, threshold):
  185.     # remove redundant horizontal lines
  186.     to_remove = []
  187.  
  188.     for i in range(1, len(horizontal)):
  189.         y1 = horizontal[i - 1][1]
  190.         y2 = horizontal[i][1]
  191.         if abs(y2 - y1) < threshold:
  192.             to_remove.append(i - 1)
  193.  
  194.     new_horizontals = []
  195.     for i, el in enumerate(horizontal):
  196.         if i not in to_remove:
  197.             new_horizontals.append(el)
  198.  
  199.     return new_horizontals
  200.  
  201.  
  202. def remove_redundant_vertical_lines(vertical, threshold):
  203.     # remove redundant vertical lines
  204.     to_remove = []
  205.  
  206.     for i in range(1, len(vertical)):
  207.         x1 = vertical[i - 1][0]
  208.         x2 = vertical[i][0]
  209.  
  210.         if abs(x2 - x1) < threshold:
  211.             to_remove.append(i - 1)
  212.  
  213.     new_verticals = []
  214.     for i, el in enumerate(vertical):
  215.         if i not in to_remove:
  216.             new_verticals.append(el)
  217.  
  218.     return new_verticals
  219.  
  220.  
  221. def normalize_lines(horizontal, vertical):
  222.     # stretch horizontals
  223.     for i, line in enumerate(horizontal):
  224.         x1 = vertical[0][0]
  225.         y1 = line[1]
  226.         x2 = vertical[-1][0]
  227.         y2 = line[3]
  228.  
  229.         horizontal[i] = [x1, y1, x2, y2]
  230.  
  231.     # stretch verticals
  232.     for i, line in enumerate(vertical):
  233.         x1 = line[0]
  234.         y1 = horizontal[0][1]
  235.         x2 = line[2]
  236.         y2 = horizontal[-1][1]
  237.  
  238.         vertical[i] = [x1, y1, x2, y2]
  239.  
  240.     return horizontal, vertical
  241.  
  242.  
  243. def handle_left_table(img):
  244.     # gray scale image
  245.     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  246.     # apply adaptive threshold
  247.     # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  248.     thresh = cv2.threshold(gray, 125, 255, cv2.THRESH_BINARY)[1]
  249.     # apply Canny algorithm to get the edges
  250.     edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
  251.     # Use HoughtLines to get the lines from the Canny result
  252.     lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
  253.     # Remove duplicate lines
  254.     lines = remove_duplicates(lines)
  255.     # break lines into horizontal and vertical lines
  256.     horizontal, vertical = sort_line_list(lines)
  257.  
  258.     horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
  259.     # get rid of last horizontal line
  260.     horizontal = horizontal[:-1]
  261.     # take only the 15 needed lines (starting to count from last to the 1st line)
  262.     horizontal = horizontal[len(horizontal) - 15 - 1:]
  263.  
  264.     vertical = remove_redundant_vertical_lines(vertical, 75)
  265.     # get rid of the first 2 vertical lines. We don't need that part of the table
  266.     vertical = vertical[2:]
  267.  
  268.     horizontal, vertical = normalize_lines(horizontal, vertical)
  269.  
  270.     results = get_results_from_table(horizontal, vertical, thresh)
  271.  
  272.     # for key, value in results.items():
  273.     #     print("Result for question " + str(key) + " is " + value)
  274.  
  275.  
  276. def handle_right_table_and_choice(img):
  277.     # gray scale image
  278.     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  279.     # apply adaptive threshold
  280.     thresh_for_results = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  281.     thresh = cv2.threshold(gray, 175, 255, cv2.THRESH_BINARY)[1]
  282.     # apply Canny algorithm to get the edges
  283.     edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
  284.     # Use HoughtLines to get the lines from the Canny result
  285.     lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
  286.     # Remove duplicate lines
  287.     lines = remove_duplicates(lines)
  288.     # break lines into horizontal and vertical lines
  289.     horizontal, vertical = sort_line_list(lines)
  290.  
  291.     horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
  292.     # get rid of last horizontal line
  293.     horizontal = horizontal[:-1]
  294.     # take only the 15 needed lines (starting to count from last to the 1st line)
  295.     horizontal = horizontal[len(horizontal) - 15 - 1:]
  296.  
  297.     vertical = remove_redundant_vertical_lines(vertical, 75)
  298.     # get rid of the first 2 vertical lines. We don't need that part of the table
  299.     vertical = vertical[2:]
  300.  
  301.     horizontal, vertical = normalize_lines(horizontal, vertical)
  302.  
  303.     results = get_results_from_table(horizontal, vertical, thresh_for_results)
  304.  
  305.     # for key, value in results.items():
  306.     #     print("Result for question " + str(key) + " is " + value)
  307.  
  308.     # create selection region
  309.     # The selection region will be the region starting from TOP to the first horizontal from where we move upwards
  310.     # with a threshold (this time 250).
  311.     # The vertical lines will be the last 2 (the column for D) which we strtch a bit to be sure to get
  312.     # the 2 selection boxes
  313.     region_y = horizontal[0][1] - 250
  314.     region_x1 = vertical[-2][0] - 50
  315.     region_x2 = vertical[-1][2] + 50
  316.     selection_region_color = img[0:region_y, region_x1:region_x2]
  317.     selection_result = get_selection(selection_region_color)
  318.  
  319.  
  320. # returns 0 for Informatioca and 1 for Fizica
  321. def get_selection(selection_region_color):
  322.     # gray scale image
  323.     gray = cv2.cvtColor(selection_region_color, cv2.COLOR_BGR2GRAY)
  324.     # apply adaptive threshold
  325.     # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  326.     thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)[1]
  327.  
  328.     edges = cv2.Canny(gray, 30, 200)
  329.     contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
  330.  
  331.     for i, ctr in enumerate(contours):
  332.         # Get bounding box
  333.         x, y, w, h = cv2.boundingRect(ctr)
  334.  
  335.         # Getting ROI
  336.         roi = selection_region_color[y:y + h, x:x + w]
  337.  
  338.         # cv2.imshow('segment no:'+str(i),roi)
  339.         cv2.rectangle(selection_region_color, (x, y), (x + w, y + h), (0, 255, 0), 2)
  340.  
  341.         if w > 15 and h > 15:
  342.             cv2.imwrite('{}.png'.format(i), roi)
  343.  
  344.     cv2.imshow('Marked Numbers', selection_region_color)
  345.     cv2.waitKey(0)
  346.  
  347.     rect_bottom = cv2.boundingRect(contours[0])
  348.     x, y, w, h = rect_bottom
  349.     bottom_box = thresh[y: y + h, x: x+w]
  350.  
  351.     rect_top = cv2.boundingRect(contours[1])
  352.     x, y, w, h = rect_top
  353.     top_box = thresh[y: y + h, x: x + w]
  354.  
  355.     if np.sum(top_box == 0) > np.sum(bottom_box == 0):
  356.         top_box = ~top_box
  357.         BH = 19
  358.         BV = 21
  359.         top_box = top_box[BH:-BH, BV:-BV]
  360.         global_warming.append("I")
  361.         cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", top_box)
  362.         return 0
  363.         # print("Informatica")
  364.     else:
  365.         bottom_box = ~bottom_box
  366.         BH = 19
  367.         BV = 21
  368.         bottom_box = bottom_box[BH:-BH, BV:-BV]
  369.         global_warming.append("F")
  370.         cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", bottom_box)
  371.         return 1
  372.         # print("Fizica")
  373.  
  374. if __name__ == "__main__":
  375.     # got 150 images
  376.     for i in range(1, 151):
  377.         # 92nd image has an X instead of a number in the selection zone
  378.         if i == 92:
  379.             global_warming.append("A")
  380.             continue
  381.         print("Handling image " + str(i))
  382.         img = cv2.imread("path_to_image" + str(i) + ".jpg")
  383.         cropped_bot_left, cropped_bot_right, cropped_top_left = split_image(img)
  384.         handle_right_table_and_choice(cropped_bot_right)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top