Advertisement
slik1977

OCR 1.0

Dec 13th, 2021
247
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.56 KB | None | 0 0
  1. from PIL import Image, ImageEnhance
  2. import pytesseract
  3. import cv2
  4. import os
  5.  
  6. pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
  7. image = 'test.png'
  8.  
  9. preprocess = "thresh"
  10.  
  11. #increase the contrast
  12. img1 = Image.open('test.png')
  13. enhancer = ImageEnhance.Contrast(img1)
  14. img1 = enhancer.enhance(2)
  15.  
  16. #cv2 read picture
  17. image = cv2.imread(image)
  18.  
  19. #convert to black+white pic
  20. bw_file = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  21.  
  22. if preprocess == "blur":
  23.     bw_file = cv2.medianBlur(bw_file, 3)
  24.  
  25. #delete noises
  26. elif preprocess == "thresh":
  27.     bw_file = cv2.threshold(bw_file, 0, 255,
  28.                             cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
  29. #recognition
  30. filename = "{}.png".format(os.getpid())
  31. cv2.imwrite(filename, bw_file)
  32. config = r'--oem 3 --psm 12'
  33. text = pytesseract.image_to_string(bw_file, config=config)
  34. os.remove(filename)
  35.  
  36. #write text-answer
  37. with open('WordsFromWindowsScreen.txt', 'w') as f:
  38.     f.write(text)
  39.  
  40. #selection and output of the found words on the picture
  41. data = pytesseract.image_to_data(bw_file, config=config)
  42. for i, item in enumerate(data.splitlines()):
  43.     if i == 0:
  44.         continue
  45.     item = item.split()
  46.     try:
  47.         x, y, w, h = int(item[6]), int(item[7]), int(item[8]), int(item[9])
  48.         cv2.rectangle(bw_file, (x, y), (x + w, y + h), (255, 0, 0), 1)
  49.         cv2.putText(bw_file, item[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 0), 1)
  50.     except IndexError:
  51.         print("missed")
  52.  
  53. #output of the result
  54. cv2.imshow("Output", bw_file)
  55. cv2.waitKey(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement