Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import cv2
- import pytesseract
- import re
- import numpy as np
- pytesseract.pytesseract.tesseract_cmd = r'T:\Program Files\Tesse\tesseract.exe' # Path to tesseract executable
- VALID_HEX_VALUES = ["55", "1C", "BD", "E9", "7A", "FF"]
- screenshot = cv2.imread("images/breach_protocol_screenshot.png")
- if screenshot is None:
- print("❌ Screenshot not found!")
- exit(1)
- gray_base = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
- config = r'--oem 3 --psm 12 -c tessedit_char_whitelist=1579ABCDEF'
- # Set collecting all recognized unique tokens across all preprocessing methods
- all_valid_tokens = set()
- morphological_closing_test = True
- def morphological_closing(gray):
- """Apply morphological closing if enabled."""
- global morphological_closing_test
- if not morphological_closing_test:
- return gray
- kernel = np.ones((2, 2), np.uint8) # Small kernel to close small gaps in letters
- g = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
- return g
- # Define multiple preprocessing methods
- def preprocess_1(gray):
- """Threshold + invert."""
- _, g = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY)
- g = cv2.bitwise_not(g)
- g = morphological_closing(g)
- return g
- def preprocess_2(gray):
- """Adaptive thresholding."""
- g = cv2.adaptiveThreshold(gray, 255,
- cv2.ADAPTIVE_THRESH_MEAN_C,
- cv2.THRESH_BINARY_INV, 11, 2)
- g = morphological_closing(g)
- return g
- def preprocess_3(gray):
- """Histogram equalization + threshold."""
- g = cv2.equalizeHist(gray)
- _, g = cv2.threshold(g, 140, 255, cv2.THRESH_BINARY_INV)
- g = morphological_closing(g)
- return g
- def preprocess_4(gray):
- """Light sharpening before thresholding."""
- blur = cv2.GaussianBlur(gray, (3, 3), 1)
- sharp = cv2.addWeighted(gray, 1.5, blur, -0.5, 0)
- _, g = cv2.threshold(sharp, 140, 255, cv2.THRESH_BINARY_INV)
- g = morphological_closing(g)
- return g
- preprocess_methods = [preprocess_1, preprocess_2, preprocess_3, preprocess_4]
- unique_tokens = []
- DIST_THRESHOLD = 20 # Pixel tolerance for considering detections at same position
- for idx, preprocess in enumerate(preprocess_methods, 1):
- gray = preprocess(gray_base.copy())
- data = pytesseract.image_to_data(gray, config=config, output_type=pytesseract.Output.DICT)
- for i in range(len(data['text'])):
- word = data['text'][i].strip().upper()
- if len(word) == 2 and re.fullmatch(r'[0-9A-F]{2}', word) and word in VALID_HEX_VALUES:
- x, y = data['left'][i], data['top'][i]
- # Check if this code was already detected nearby
- duplicate_found = False
- for token in unique_tokens:
- if (token['code'] == word and
- abs(token['x'] - x) <= DIST_THRESHOLD and
- abs(token['y'] - y) <= DIST_THRESHOLD):
- duplicate_found = True
- break
- if not duplicate_found:
- token_info = {
- 'code': word,
- 'x': x,
- 'y': y,
- 'method': idx,
- }
- unique_tokens.append(token_info)
- print(f"\n✅ Total number of unique recognized hex codes: {len(unique_tokens)}")
- print("📃 All unique hex codes:")
- # Draw rectangles and labels on the original color screenshot
- img_out = screenshot.copy()
- for token in unique_tokens:
- x, y = token['x'], token['y']
- w, h = 40, 40 # Fixed size for drawing rectangles
- # Draw rectangle around the detected token
- cv2.rectangle(img_out, (x, y), (x + w, y + h), (0, 0, 255), 2)
- # Draw token label above the rectangle
- cv2.putText(img_out, token['code'], (x, y - 10),
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
- cv2.imshow("Detected unique hex codes", img_out)
- cv2.waitKey(0)
- cv2.destroyAllWindows()
Advertisement
Add Comment
Please, Sign In to add comment