Untitled

import re
from collections import defaultdict

def clean_messages(messages):
    # Remove duplicates by converting to a set and back to a list
    unique_messages = list(set(messages))

    # Remove messages containing any characters outside of A-Z or 0-9
    valid_messages = [msg for msg in unique_messages if re.match(r'^[A-Z0-9]+$', msg)]

    return valid_messages

def read_input_file(filename):
    try:
        with open(filename, 'r') as file:
            messages = [line.strip() for line in file.readlines()]
        return messages
    except FileNotFoundError:
        print(f"Error: {filename} not found.")
        return []

def slide_compare_multiple_grouped(messages, offset=None, min_matches=6):
    """
    Perform sliding comparison for each group based on the first two characters.
    - If offset is specified as an integer, only compare with that fixed offset.
    - If offset is None, allow sliding across the entire message.
    - Only report results with at least `min_matches`.
    """
    # Get the cleaned list of messages (no duplicates or invalid characters)
    messages = clean_messages(messages)

    # Group messages by their first two characters (prefix)
    grouped_messages = defaultdict(list)
    for msg in messages:
        prefix = msg[:2]
        grouped_messages[prefix].append(msg)

    # Store results of best matches for each group
    best_match_results = []

    # Compare messages within each group
    for prefix, group in grouped_messages.items():
        print(f"Processing group: {prefix}")

        if len(group) < 2:
            print(f"Not enough messages to compare for prefix {prefix}.")
            continue

        max_len = max(len(msg) for msg in group)

        # Compare each message with every other message in the group
        for i in range(len(group)):
            base_msg = group[i]
            aligned_result = list(base_msg.ljust(max_len, '-'))

            for j in range(i + 1, len(group)):
                msg = group[j]

                len_msg = len(msg)
                best_alignment = ""
                best_offset = 0
                max_matches = 0
                best_match_alignment = []

                # Determine the offsets to use (fixed or sliding)
                if offset is not None:
                    offsets = [offset]  # Fixed offset
                else:
                    offsets = range(-(len_msg - 1), max_len)  # Sliding across all offsets

                for current_offset in offsets:
                    matches = []
                    match_count = 0

                    for k in range(max_len):
                        l = k - current_offset
                        if 0 <= l < len_msg:
                            if aligned_result[k] == msg[l]:
                                # Exact match
                                matches.append(msg[l])
                                match_count += 1
                            else:
                                matches.append('-')
                        else:
                            matches.append('-')

                    aligned_message = ''.join(matches)

                    # Update if this alignment has more matches than the previous best
                    if match_count > max_matches:
                        best_alignment = aligned_message
                        max_matches = match_count
                        best_offset = current_offset
                        best_match_alignment = matches  # Store best alignment as list for later output

                # Only store results with matches greater than or equal to the minimum
                if max_matches >= min_matches:
                    best_match_results.append((base_msg[:6], msg[:6], ''.join(best_match_alignment), best_offset, max_matches))

    return best_match_results

def write_results_to_file(results):
    # Sort results by total matches in descending order
    sorted_results = sorted(results, key=lambda x: x[4], reverse=True)

    with open("OUTPUT.TXT", "w") as file:
        file.write("Best Match for Each Pair of Messages (Grouped by First Two Characters, Sorted by Total Matches):\n")
        for base_serial, compared_serial, best_alignment, best_offset, max_matches in sorted_results:
            file.write(f"\nBase Message: {base_serial}, Compared with: {compared_serial}\n")
            file.write(f"Best Offset: {best_offset}, Total Matches: {max_matches}\n")
            file.write(f"Best Alignment: {best_alignment}\n")

# Read input messages from INPUT.TXT
input_messages = read_input_file("INPUT.TXT")

# Prompt for offset: if input is a number, use it as the offset; if blank, use any offset
offset_input = input("Enter a specific offset (number), or press Enter for sliding offset: ").strip()

# Convert to integer if input is a number, else use None for sliding offset
try:
    offset_value = int(offset_input) if offset_input else None
except ValueError:
    print(f"Invalid input '{offset_input}'. Using sliding offset (any offset).")
    offset_value = None

# Prompt for minimum matches to report, default is 6
min_matches_input = input("Enter minimum number of matches to report, or press Enter for default (6): ").strip()

# Convert to integer if input is a number, else use default (6)
try:
    min_matches_value = int(min_matches_input) if min_matches_input else 6
except ValueError:
    print(f"Invalid input '{min_matches_input}'. Using default of 6 matches.")
    min_matches_value = 6

# Perform sliding comparison for each group based on the first two characters
best_match_results = slide_compare_multiple_grouped(input_messages, offset=offset_value, min_matches=min_matches_value)

# Write the sorted results to OUTPUT.TXT
write_results_to_file(best_match_results)

print(f"Results with at least {min_matches_value} matches have been written to OUTPUT.TXT")