Untitled

import cv2
import pytesseract
import re
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r'T:\Program Files\Tesse\tesseract.exe'  # Path to tesseract executable

VALID_HEX_VALUES = ["55", "1C", "BD", "E9", "7A", "FF"]

screenshot = cv2.imread("images/breach_protocol_screenshot.png")
if screenshot is None:
    print("❌ Screenshot not found!")
    exit(1)

gray_base = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)

config = r'--oem 3 --psm 12 -c tessedit_char_whitelist=1579ABCDEF'

# Set collecting all recognized unique tokens across all preprocessing methods
all_valid_tokens = set()

morphological_closing_test = True

def morphological_closing(gray):
    """Apply morphological closing if enabled."""
    global morphological_closing_test
    if not morphological_closing_test:
        return gray
    kernel = np.ones((2, 2), np.uint8)  # Small kernel to close small gaps in letters
    g = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
    return g

# Define multiple preprocessing methods
def preprocess_1(gray):
    """Threshold + invert."""
    _, g = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY)
    g = cv2.bitwise_not(g)
    g = morphological_closing(g)
    return g

def preprocess_2(gray):
    """Adaptive thresholding."""
    g = cv2.adaptiveThreshold(gray, 255,
                              cv2.ADAPTIVE_THRESH_MEAN_C,
                              cv2.THRESH_BINARY_INV, 11, 2)
    g = morphological_closing(g)
    return g

def preprocess_3(gray):
    """Histogram equalization + threshold."""
    g = cv2.equalizeHist(gray)
    _, g = cv2.threshold(g, 140, 255, cv2.THRESH_BINARY_INV)
    g = morphological_closing(g)
    return g

def preprocess_4(gray):
    """Light sharpening before thresholding."""
    blur = cv2.GaussianBlur(gray, (3, 3), 1)
    sharp = cv2.addWeighted(gray, 1.5, blur, -0.5, 0)
    _, g = cv2.threshold(sharp, 140, 255, cv2.THRESH_BINARY_INV)
    g = morphological_closing(g)
    return g

preprocess_methods = [preprocess_1, preprocess_2, preprocess_3, preprocess_4]

unique_tokens = []

DIST_THRESHOLD = 20  # Pixel tolerance for considering detections at same position

for idx, preprocess in enumerate(preprocess_methods, 1):
    gray = preprocess(gray_base.copy())
    data = pytesseract.image_to_data(gray, config=config, output_type=pytesseract.Output.DICT)

    for i in range(len(data['text'])):
        word = data['text'][i].strip().upper()
        if len(word) == 2 and re.fullmatch(r'[0-9A-F]{2}', word) and word in VALID_HEX_VALUES:
            x, y = data['left'][i], data['top'][i]

            # Check if this code was already detected nearby
            duplicate_found = False
            for token in unique_tokens:
                if (token['code'] == word and
                    abs(token['x'] - x) <= DIST_THRESHOLD and
                    abs(token['y'] - y) <= DIST_THRESHOLD):
                    duplicate_found = True
                    break

            if not duplicate_found:
                token_info = {
                    'code': word,
                    'x': x,
                    'y': y,
                    'method': idx,
                }
                unique_tokens.append(token_info)

print(f"\n✅ Total number of unique recognized hex codes: {len(unique_tokens)}")
print("📃 All unique hex codes:")

# Draw rectangles and labels on the original color screenshot
img_out = screenshot.copy()

for token in unique_tokens:
    x, y = token['x'], token['y']
    w, h = 40, 40  # Fixed size for drawing rectangles

    # Draw rectangle around the detected token
    cv2.rectangle(img_out, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # Draw token label above the rectangle
    cv2.putText(img_out, token['code'], (x, y - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

cv2.imshow("Detected unique hex codes", img_out)
cv2.waitKey(0)
cv2.destroyAllWindows()