Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from collections import defaultdict
- def clean_messages(messages):
- # Remove duplicates by converting to a set and back to a list
- unique_messages = list(set(messages))
- # Remove messages containing any characters outside of A-Z or 0-9
- valid_messages = [msg for msg in unique_messages if re.match(r'^[A-Z0-9]+$', msg)]
- return valid_messages
- def read_input_file(filename):
- try:
- with open(filename, 'r') as file:
- messages = [line.strip() for line in file.readlines()]
- return messages
- except FileNotFoundError:
- print(f"Error: {filename} not found.")
- return []
- def slide_compare_multiple_grouped(messages, offset=None, min_matches=6):
- """
- Perform sliding comparison for each group based on the first two characters.
- - If offset is specified as an integer, only compare with that fixed offset.
- - If offset is None, allow sliding across the entire message.
- - Only report results with at least `min_matches`.
- """
- # Get the cleaned list of messages (no duplicates or invalid characters)
- messages = clean_messages(messages)
- # Group messages by their first two characters (prefix)
- grouped_messages = defaultdict(list)
- for msg in messages:
- prefix = msg[:2]
- grouped_messages[prefix].append(msg)
- # Store results of best matches for each group
- best_match_results = []
- # Compare messages within each group
- for prefix, group in grouped_messages.items():
- print(f"Processing group: {prefix}")
- if len(group) < 2:
- print(f"Not enough messages to compare for prefix {prefix}.")
- continue
- max_len = max(len(msg) for msg in group)
- # Compare each message with every other message in the group
- for i in range(len(group)):
- base_msg = group[i]
- aligned_result = list(base_msg.ljust(max_len, '-'))
- for j in range(i + 1, len(group)):
- msg = group[j]
- len_msg = len(msg)
- best_alignment = ""
- best_offset = 0
- max_matches = 0
- best_match_alignment = []
- # Determine the offsets to use (fixed or sliding)
- if offset is not None:
- offsets = [offset] # Fixed offset
- else:
- offsets = range(-(len_msg - 1), max_len) # Sliding across all offsets
- for current_offset in offsets:
- matches = []
- match_count = 0
- for k in range(max_len):
- l = k - current_offset
- if 0 <= l < len_msg:
- if aligned_result[k] == msg[l]:
- # Exact match
- matches.append(msg[l])
- match_count += 1
- else:
- matches.append('-')
- else:
- matches.append('-')
- aligned_message = ''.join(matches)
- # Update if this alignment has more matches than the previous best
- if match_count > max_matches:
- best_alignment = aligned_message
- max_matches = match_count
- best_offset = current_offset
- best_match_alignment = matches # Store best alignment as list for later output
- # Only store results with matches greater than or equal to the minimum
- if max_matches >= min_matches:
- best_match_results.append((base_msg[:6], msg[:6], ''.join(best_match_alignment), best_offset, max_matches))
- return best_match_results
- def write_results_to_file(results):
- # Sort results by total matches in descending order
- sorted_results = sorted(results, key=lambda x: x[4], reverse=True)
- with open("OUTPUT.TXT", "w") as file:
- file.write("Best Match for Each Pair of Messages (Grouped by First Two Characters, Sorted by Total Matches):\n")
- for base_serial, compared_serial, best_alignment, best_offset, max_matches in sorted_results:
- file.write(f"\nBase Message: {base_serial}, Compared with: {compared_serial}\n")
- file.write(f"Best Offset: {best_offset}, Total Matches: {max_matches}\n")
- file.write(f"Best Alignment: {best_alignment}\n")
- # Read input messages from INPUT.TXT
- input_messages = read_input_file("INPUT.TXT")
- # Prompt for offset: if input is a number, use it as the offset; if blank, use any offset
- offset_input = input("Enter a specific offset (number), or press Enter for sliding offset: ").strip()
- # Convert to integer if input is a number, else use None for sliding offset
- try:
- offset_value = int(offset_input) if offset_input else None
- except ValueError:
- print(f"Invalid input '{offset_input}'. Using sliding offset (any offset).")
- offset_value = None
- # Prompt for minimum matches to report, default is 6
- min_matches_input = input("Enter minimum number of matches to report, or press Enter for default (6): ").strip()
- # Convert to integer if input is a number, else use default (6)
- try:
- min_matches_value = int(min_matches_input) if min_matches_input else 6
- except ValueError:
- print(f"Invalid input '{min_matches_input}'. Using default of 6 matches.")
- min_matches_value = 6
- # Perform sliding comparison for each group based on the first two characters
- best_match_results = slide_compare_multiple_grouped(input_messages, offset=offset_value, min_matches=min_matches_value)
- # Write the sorted results to OUTPUT.TXT
- write_results_to_file(best_match_results)
- print(f"Results with at least {min_matches_value} matches have been written to OUTPUT.TXT")
Add Comment
Please, Sign In to add comment