Advertisement
Guest User

Untitled

a guest
Dec 7th, 2019
322
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.42 KB | None | 0 0
  1. import cv2
  2. import numpy as np
  3. global_warming = []
  4. global_warming_results = []
  5.  
  6.  
  7. def split_image_and_save(image):
  8. height, width = image.shape[:2]
  9.  
  10. start_row, start_col = int(0), int(0)
  11. end_row, end_col = int(height * .35), int(width * .5)
  12. cropped_top = image[start_row:end_row, start_col:end_col]
  13. cv2.imwrite("TopSide.jpg", cropped_top)
  14.  
  15. start_row, start_col = int(height * .52), int(width * 0.1)
  16. end_row, end_col = int(height * 0.87), int(width)
  17. end_col_left = int(width * .37)
  18. start_col_right = int(width * .63)
  19. end_col = int(width * 0.9)
  20. choice_start_row = int(height * 0.45)
  21. choice_end_row = start_row
  22.  
  23. cropped_bot_left = image[start_row:end_row, start_col:end_col_left]
  24. cropped_bot_right = image[start_row:end_row, start_col_right:end_col]
  25. cropped_bot_right_choice = image[choice_start_row:choice_end_row, start_col_right:end_col]
  26.  
  27. cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  28. cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  29. cv2.imwrite("BotSideRightChoice.jpg", cropped_bot_right_choice)
  30.  
  31.  
  32. def split_image(image):
  33. height, width = image.shape[:2]
  34.  
  35. start_row, start_col = 0, 0
  36. middle_row = int(height * 0.5)
  37. row_for_right_side = int(height * 0.4)
  38. middle_collumn = int(width * 0.5)
  39. cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
  40. cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
  41. cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]
  42.  
  43. cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  44. cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  45. cv2.imwrite("TopSideLeft.jpg", cropped_top_left)
  46.  
  47. return cropped_bot_left, cropped_bot_right, cropped_top_left
  48.  
  49.  
  50. def split_image_less_custom(image):
  51. height, width = image.shape[:2]
  52.  
  53. start_row, start_col = 0, 0
  54. middle_row = int(height * 0.5)
  55. row_for_right_side = int(height * 0.4)
  56. middle_collumn = int(width * 0.5)
  57. cropped_bot_left = image[middle_row:height, start_col:middle_collumn]
  58. cropped_bot_right = image[row_for_right_side:height, middle_collumn:width]
  59. cropped_top_left = image[start_row:middle_row, start_col:middle_collumn]
  60.  
  61. cv2.imwrite("BotSideLeft.jpg", cropped_bot_left)
  62. cv2.imwrite("BotSideRight.jpg", cropped_bot_right)
  63. cv2.imwrite("TopSideLeft.jpg", cropped_top_left)
  64.  
  65.  
  66. def remove_duplicates(lines):
  67. # remove duplicate lines (lines within 10 pixels of eachother)
  68. for line in lines:
  69. line = line[0]
  70. x1 = line[0]
  71. y1 = line[1]
  72. x2 = line[2]
  73. y2 = line[3]
  74. for index, line in enumerate(lines):
  75. line = line[0]
  76. x3 = line[0]
  77. y3 = line[1]
  78. x4 = line[2]
  79. y4 = line[3]
  80. if y1 == y2 and y3 == y4:
  81. diff = abs(y1 - y3)
  82. elif x1 == x2 and x3 == x4:
  83. diff = abs(x1 - x3)
  84. else:
  85. diff = 0
  86. if diff < 10 and diff is not 0:
  87. del lines[index]
  88. return lines
  89.  
  90.  
  91. def sort_line_list(lines):
  92. # sort lines into horizontal and vertical
  93. vertical = []
  94. horizontal = []
  95. for line in lines:
  96. line = line[0]
  97. if line[0] == line[2]:
  98. vertical.append(line)
  99. elif line[1] == line[3]:
  100. horizontal.append(line)
  101. vertical.sort()
  102. horizontal.sort(key=lambda x: x[1])
  103. return horizontal, vertical
  104.  
  105.  
  106. def get_results_from_table(horizontal, vertical, thresh_image):
  107. rows = []
  108. for i, h in enumerate(horizontal):
  109. if i < len(horizontal) - 1:
  110. row = []
  111. for j, v in enumerate(vertical):
  112. if i < len(horizontal) - 1 and j < len(vertical) - 1:
  113. # every cell before last cell
  114. # get width & height
  115. width = horizontal[i + 1][1] - h[1]
  116. height = vertical[j + 1][0] - v[0]
  117. else:
  118. # last cell, width = cell start to end of image
  119. # get width & height
  120. width = tW
  121. height = tH
  122. tW = width
  123. tH = height
  124. # get roi (region of interest)
  125. roi = thresh_image[h[1]:h[1] + width, v[0]:v[0] + height]
  126. row.append(roi)
  127. row.pop()
  128. rows.append(row)
  129.  
  130. results = dict()
  131. for i, row in enumerate(rows):
  132. cell_a = row[0]
  133. cell_b = row[1]
  134. cell_c = row[2]
  135. cell_d = row[3]
  136.  
  137. cell_a_white = np.sum(cell_a == 0)
  138. cell_b_white = np.sum(cell_b == 0)
  139. cell_c_white = np.sum(cell_c == 0)
  140. cell_d_white = np.sum(cell_d == 0)
  141.  
  142. min_white_pixels = max(cell_a_white, max(cell_b_white, max(cell_c_white, cell_d_white)))
  143.  
  144. if min_white_pixels == cell_a_white:
  145. results[i + 1] = "A"
  146. if min_white_pixels == cell_b_white:
  147. results[i + 1] = "B"
  148. if min_white_pixels == cell_c_white:
  149. results[i + 1] = "C"
  150. if min_white_pixels == cell_d_white:
  151. results[i + 1] = "D"
  152.  
  153. return results
  154. # for key, value in results.items():
  155. # print("Result for question " + str(key) + " is " + value)
  156.  
  157.  
  158. # returns 0 for Informatica and 1 for Fizica
  159. def get_results_from_selection(horizontal, vertical, thresh_image):
  160. # Can be hardcoded as less line will give bad results (should not even work) and more lines is not a good answer.
  161. # Expectation is only for those lines
  162. horizontal_1 = horizontal[0]
  163. horizontal_2 = horizontal[1]
  164. horizontal_3 = horizontal[2]
  165. horizontal_4 = horizontal[3]
  166. vertical_1 = vertical[0]
  167. vertical_2 = vertical[1]
  168.  
  169. width_1 = horizontal_2[1] - horizontal_1[1]
  170. height = vertical_2[0] - vertical_1[0]
  171. width_2 = horizontal_4[1] - horizontal_3[1]
  172.  
  173. roi1 = thresh_image[horizontal_1[1]:horizontal_1[1] + width_1, vertical_1[0]:vertical_1[0] + height]
  174. roi2 = thresh_image[horizontal_3[1]:horizontal_3[1] + width_2, vertical_1[0]:vertical_1[0] + height]
  175.  
  176. if np.sum(roi1 == 0) > np.sum(roi2 == 0):
  177. return 0
  178. # print("Informatica")
  179. else:
  180. return 1
  181. # print("Fizica")
  182.  
  183.  
  184. def remove_reduntant_horizontal_lines(horizontal, threshold):
  185. # remove redundant horizontal lines
  186. to_remove = []
  187.  
  188. for i in range(1, len(horizontal)):
  189. y1 = horizontal[i - 1][1]
  190. y2 = horizontal[i][1]
  191. if abs(y2 - y1) < threshold:
  192. to_remove.append(i - 1)
  193.  
  194. new_horizontals = []
  195. for i, el in enumerate(horizontal):
  196. if i not in to_remove:
  197. new_horizontals.append(el)
  198.  
  199. return new_horizontals
  200.  
  201.  
  202. def remove_redundant_vertical_lines(vertical, threshold):
  203. # remove redundant vertical lines
  204. to_remove = []
  205.  
  206. for i in range(1, len(vertical)):
  207. x1 = vertical[i - 1][0]
  208. x2 = vertical[i][0]
  209.  
  210. if abs(x2 - x1) < threshold:
  211. to_remove.append(i - 1)
  212.  
  213. new_verticals = []
  214. for i, el in enumerate(vertical):
  215. if i not in to_remove:
  216. new_verticals.append(el)
  217.  
  218. return new_verticals
  219.  
  220.  
  221. def normalize_lines(horizontal, vertical):
  222. # stretch horizontals
  223. for i, line in enumerate(horizontal):
  224. x1 = vertical[0][0]
  225. y1 = line[1]
  226. x2 = vertical[-1][0]
  227. y2 = line[3]
  228.  
  229. horizontal[i] = [x1, y1, x2, y2]
  230.  
  231. # stretch verticals
  232. for i, line in enumerate(vertical):
  233. x1 = line[0]
  234. y1 = horizontal[0][1]
  235. x2 = line[2]
  236. y2 = horizontal[-1][1]
  237.  
  238. vertical[i] = [x1, y1, x2, y2]
  239.  
  240. return horizontal, vertical
  241.  
  242.  
  243. def handle_left_table(img):
  244. # gray scale image
  245. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  246. # apply adaptive threshold
  247. # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  248. thresh = cv2.threshold(gray, 125, 255, cv2.THRESH_BINARY)[1]
  249. # apply Canny algorithm to get the edges
  250. edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
  251. # Use HoughtLines to get the lines from the Canny result
  252. lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
  253. # Remove duplicate lines
  254. lines = remove_duplicates(lines)
  255. # break lines into horizontal and vertical lines
  256. horizontal, vertical = sort_line_list(lines)
  257.  
  258. horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
  259. # get rid of last horizontal line
  260. horizontal = horizontal[:-1]
  261. # take only the 15 needed lines (starting to count from last to the 1st line)
  262. horizontal = horizontal[len(horizontal) - 15 - 1:]
  263.  
  264. vertical = remove_redundant_vertical_lines(vertical, 75)
  265. # get rid of the first 2 vertical lines. We don't need that part of the table
  266. vertical = vertical[2:]
  267.  
  268. horizontal, vertical = normalize_lines(horizontal, vertical)
  269.  
  270. results = get_results_from_table(horizontal, vertical, thresh)
  271.  
  272. # for key, value in results.items():
  273. # print("Result for question " + str(key) + " is " + value)
  274.  
  275.  
  276. def handle_right_table_and_choice(img):
  277. # gray scale image
  278. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  279. # apply adaptive threshold
  280. thresh_for_results = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  281. thresh = cv2.threshold(gray, 175, 255, cv2.THRESH_BINARY)[1]
  282. # apply Canny algorithm to get the edges
  283. edges = cv2.Canny(thresh, 10, 50, apertureSize=7)
  284. # Use HoughtLines to get the lines from the Canny result
  285. lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 200, minLineLength=20, maxLineGap=999).tolist()
  286. # Remove duplicate lines
  287. lines = remove_duplicates(lines)
  288. # break lines into horizontal and vertical lines
  289. horizontal, vertical = sort_line_list(lines)
  290.  
  291. horizontal = remove_reduntant_horizontal_lines(horizontal, 75)
  292. # get rid of last horizontal line
  293. horizontal = horizontal[:-1]
  294. # take only the 15 needed lines (starting to count from last to the 1st line)
  295. horizontal = horizontal[len(horizontal) - 15 - 1:]
  296.  
  297. vertical = remove_redundant_vertical_lines(vertical, 75)
  298. # get rid of the first 2 vertical lines. We don't need that part of the table
  299. vertical = vertical[2:]
  300.  
  301. horizontal, vertical = normalize_lines(horizontal, vertical)
  302.  
  303. results = get_results_from_table(horizontal, vertical, thresh_for_results)
  304.  
  305. # for key, value in results.items():
  306. # print("Result for question " + str(key) + " is " + value)
  307.  
  308. # create selection region
  309. # The selection region will be the region starting from TOP to the first horizontal from where we move upwards
  310. # with a threshold (this time 250).
  311. # The vertical lines will be the last 2 (the column for D) which we strtch a bit to be sure to get
  312. # the 2 selection boxes
  313. region_y = horizontal[0][1] - 250
  314. region_x1 = vertical[-2][0] - 50
  315. region_x2 = vertical[-1][2] + 50
  316. selection_region_color = img[0:region_y, region_x1:region_x2]
  317. selection_result = get_selection(selection_region_color)
  318.  
  319.  
  320. # returns 0 for Informatioca and 1 for Fizica
  321. def get_selection(selection_region_color):
  322. # gray scale image
  323. gray = cv2.cvtColor(selection_region_color, cv2.COLOR_BGR2GRAY)
  324. # apply adaptive threshold
  325. # thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 8)
  326. thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)[1]
  327.  
  328. edges = cv2.Canny(gray, 30, 200)
  329. contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
  330.  
  331. for i, ctr in enumerate(contours):
  332. # Get bounding box
  333. x, y, w, h = cv2.boundingRect(ctr)
  334.  
  335. # Getting ROI
  336. roi = selection_region_color[y:y + h, x:x + w]
  337.  
  338. # cv2.imshow('segment no:'+str(i),roi)
  339. cv2.rectangle(selection_region_color, (x, y), (x + w, y + h), (0, 255, 0), 2)
  340.  
  341. if w > 15 and h > 15:
  342. cv2.imwrite('{}.png'.format(i), roi)
  343.  
  344. cv2.imshow('Marked Numbers', selection_region_color)
  345. cv2.waitKey(0)
  346.  
  347. rect_bottom = cv2.boundingRect(contours[0])
  348. x, y, w, h = rect_bottom
  349. bottom_box = thresh[y: y + h, x: x+w]
  350.  
  351. rect_top = cv2.boundingRect(contours[1])
  352. x, y, w, h = rect_top
  353. top_box = thresh[y: y + h, x: x + w]
  354.  
  355. if np.sum(top_box == 0) > np.sum(bottom_box == 0):
  356. top_box = ~top_box
  357. BH = 19
  358. BV = 21
  359. top_box = top_box[BH:-BH, BV:-BV]
  360. global_warming.append("I")
  361. cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", top_box)
  362. return 0
  363. # print("Informatica")
  364. else:
  365. bottom_box = ~bottom_box
  366. BH = 19
  367. BV = 21
  368. bottom_box = bottom_box[BH:-BH, BV:-BV]
  369. global_warming.append("F")
  370. cv2.imwrite("only_so_called_digits/digit_" + str(len(global_warming)) + ".jpg", bottom_box)
  371. return 1
  372. # print("Fizica")
  373.  
  374. if __name__ == "__main__":
  375. # got 150 images
  376. for i in range(1, 151):
  377. # 92nd image has an X instead of a number in the selection zone
  378. if i == 92:
  379. global_warming.append("A")
  380. continue
  381. print("Handling image " + str(i))
  382. img = cv2.imread("path_to_image" + str(i) + ".jpg")
  383. cropped_bot_left, cropped_bot_right, cropped_top_left = split_image(img)
  384. handle_right_table_and_choice(cropped_bot_right)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement