Guest User

Untitled

a guest
Jun 29th, 2025
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.91 KB | Source Code | 0 0
  1. import cv2
  2. import pytesseract
  3. import re
  4. import numpy as np
  5.  
  6. pytesseract.pytesseract.tesseract_cmd = r'T:\Program Files\Tesse\tesseract.exe'  # Path to tesseract executable
  7.  
  8. VALID_HEX_VALUES = ["55", "1C", "BD", "E9", "7A", "FF"]
  9.  
  10. screenshot = cv2.imread("images/breach_protocol_screenshot.png")
  11. if screenshot is None:
  12.     print("❌ Screenshot not found!")
  13.     exit(1)
  14.  
  15. gray_base = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
  16.  
  17. config = r'--oem 3 --psm 12 -c tessedit_char_whitelist=1579ABCDEF'
  18.  
  19. # Set collecting all recognized unique tokens across all preprocessing methods
  20. all_valid_tokens = set()
  21.  
  22. morphological_closing_test = True
  23.  
  24. def morphological_closing(gray):
  25.     """Apply morphological closing if enabled."""
  26.     global morphological_closing_test
  27.     if not morphological_closing_test:
  28.         return gray
  29.     kernel = np.ones((2, 2), np.uint8)  # Small kernel to close small gaps in letters
  30.     g = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
  31.     return g
  32.  
  33. # Define multiple preprocessing methods
  34. def preprocess_1(gray):
  35.     """Threshold + invert."""
  36.     _, g = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY)
  37.     g = cv2.bitwise_not(g)
  38.     g = morphological_closing(g)
  39.     return g
  40.  
  41. def preprocess_2(gray):
  42.     """Adaptive thresholding."""
  43.     g = cv2.adaptiveThreshold(gray, 255,
  44.                               cv2.ADAPTIVE_THRESH_MEAN_C,
  45.                               cv2.THRESH_BINARY_INV, 11, 2)
  46.     g = morphological_closing(g)
  47.     return g
  48.  
  49. def preprocess_3(gray):
  50.     """Histogram equalization + threshold."""
  51.     g = cv2.equalizeHist(gray)
  52.     _, g = cv2.threshold(g, 140, 255, cv2.THRESH_BINARY_INV)
  53.     g = morphological_closing(g)
  54.     return g
  55.  
  56. def preprocess_4(gray):
  57.     """Light sharpening before thresholding."""
  58.     blur = cv2.GaussianBlur(gray, (3, 3), 1)
  59.     sharp = cv2.addWeighted(gray, 1.5, blur, -0.5, 0)
  60.     _, g = cv2.threshold(sharp, 140, 255, cv2.THRESH_BINARY_INV)
  61.     g = morphological_closing(g)
  62.     return g
  63.  
  64. preprocess_methods = [preprocess_1, preprocess_2, preprocess_3, preprocess_4]
  65.  
  66. unique_tokens = []
  67.  
  68. DIST_THRESHOLD = 20  # Pixel tolerance for considering detections at same position
  69.  
  70. for idx, preprocess in enumerate(preprocess_methods, 1):
  71.     gray = preprocess(gray_base.copy())
  72.     data = pytesseract.image_to_data(gray, config=config, output_type=pytesseract.Output.DICT)
  73.  
  74.     for i in range(len(data['text'])):
  75.         word = data['text'][i].strip().upper()
  76.         if len(word) == 2 and re.fullmatch(r'[0-9A-F]{2}', word) and word in VALID_HEX_VALUES:
  77.             x, y = data['left'][i], data['top'][i]
  78.  
  79.             # Check if this code was already detected nearby
  80.             duplicate_found = False
  81.             for token in unique_tokens:
  82.                 if (token['code'] == word and
  83.                     abs(token['x'] - x) <= DIST_THRESHOLD and
  84.                     abs(token['y'] - y) <= DIST_THRESHOLD):
  85.                     duplicate_found = True
  86.                     break
  87.  
  88.             if not duplicate_found:
  89.                 token_info = {
  90.                     'code': word,
  91.                     'x': x,
  92.                     'y': y,
  93.                     'method': idx,
  94.                 }
  95.                 unique_tokens.append(token_info)
  96.  
  97. print(f"\n✅ Total number of unique recognized hex codes: {len(unique_tokens)}")
  98. print("📃 All unique hex codes:")
  99.  
  100. # Draw rectangles and labels on the original color screenshot
  101. img_out = screenshot.copy()
  102.  
  103. for token in unique_tokens:
  104.     x, y = token['x'], token['y']
  105.     w, h = 40, 40  # Fixed size for drawing rectangles
  106.  
  107.     # Draw rectangle around the detected token
  108.     cv2.rectangle(img_out, (x, y), (x + w, y + h), (0, 0, 255), 2)
  109.  
  110.     # Draw token label above the rectangle
  111.     cv2.putText(img_out, token['code'], (x, y - 10),
  112.                 cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
  113.  
  114. cv2.imshow("Detected unique hex codes", img_out)
  115. cv2.waitKey(0)
  116. cv2.destroyAllWindows()
  117.  
Advertisement
Add Comment
Please, Sign In to add comment