Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2020
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.01 KB | None | 0 0
  1. import cv2
  2. import numpy as np
  3. import pytesseract
  4. from PIL import ImageGrab
  5.  
  6. def ExtractText(img):
  7. return pytesseract.image_to_string(img, config='-c tessedit_char_whitelist=WASD --psm 13')
  8.  
  9. def GetBoxStartPos(img):
  10. for x in range(0, 50):
  11. for y in range(0, 50):
  12. r, g, b = img[x, y]
  13. color = int(r) + int(g) + int(b)
  14. if color < pixel_color_threshold:
  15. return (x+8, y+7)
  16. return (8, 7)
  17.  
  18. def ProcessImage(ind):
  19. # USE BELOW TO GET FILE FROM DISK
  20. # img_ori = cv2.imread('images/bdo_letters{0}.jpg'.format(ind))
  21. # img_ori = img_ori[330:430, 750:1160]
  22. # USE BELOW FOR SCREENSHOT
  23. img_ori = ImageGrab.grab().crop(image_areas['letters']).convert('RGB') # crop image from screenshot
  24. img_ori = np.array(img_ori) # convert to cv2 image
  25. img_ori = img_ori[:, :, ::-1].copy()
  26.  
  27. pos = GetBoxStartPos(img_ori) # get start pos of letters
  28. box = img_ori[pos[0]:pos[0]+y_len, pos[1]:pos[1]+x_len]
  29.  
  30. # join letters to remove gaps between
  31. letters = []
  32. for i in range(0, 10):
  33. x_pos = i*l_len + i*gap_len
  34. letter = box[:, x_pos:x_pos+l_len]
  35. letters.append(letter)
  36. box_format = cv2.hconcat(letters)
  37.  
  38. # box_format = cv2.cvtColor(box_format, cv2.COLOR_BGR2GRAY) # convert to grayscale
  39. # img1 = cv2.threshold(box_format,128,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
  40. # img_gauss = cv2.GaussianBlur(box_format,(3,3),0) # gaussian filter (optional)
  41. # img2 = cv2.threshold(img_gauss,128,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
  42.  
  43. cv2.imwrite('img_original_{0}.jpg'.format(ind), box_format)
  44. # cv2.imwrite('img_processed2_{0}.jpg'.format(ind), img2)
  45.  
  46. text = ExtractText(box_format)
  47. print(text)
  48. # text = ExtractText(img2)
  49. # print(text)
  50.  
  51. image_areas = {
  52. 'bite': (720, 55, 890, 80),
  53. 'letters': (750, 330, 1160, 430),
  54. }
  55.  
  56. pixel_color_threshold = 200
  57. x_len = 355
  58. y_len = 22
  59. l_len = 18
  60. gap_len = 19
  61. pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract'
  62.  
  63. # ProcessImage(8)
  64. for i in range(1, 9):
  65. print('-- PROCESSING IMAGE {0} --'.format(i))
  66. ProcessImage(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement