Guest User

Untitled

a guest
Jun 14th, 2025
797
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.89 KB | None | 0 0
  1. import re
  2. from collections import defaultdict
  3.  
  4. def clean_messages(messages):
  5. # Remove duplicates by converting to a set and back to a list
  6. unique_messages = list(set(messages))
  7.  
  8. # Remove messages containing any characters outside of A-Z or 0-9
  9. valid_messages = [msg for msg in unique_messages if re.match(r'^[A-Z0-9]+$', msg)]
  10.  
  11. return valid_messages
  12.  
  13. def read_input_file(filename):
  14. try:
  15. with open(filename, 'r') as file:
  16. messages = [line.strip() for line in file.readlines()]
  17. return messages
  18. except FileNotFoundError:
  19. print(f"Error: {filename} not found.")
  20. return []
  21.  
  22. def slide_compare_multiple_grouped(messages, offset=None, min_matches=6):
  23. """
  24. Perform sliding comparison for each group based on the first two characters.
  25. - If offset is specified as an integer, only compare with that fixed offset.
  26. - If offset is None, allow sliding across the entire message.
  27. - Only report results with at least `min_matches`.
  28. """
  29. # Get the cleaned list of messages (no duplicates or invalid characters)
  30. messages = clean_messages(messages)
  31.  
  32. # Group messages by their first two characters (prefix)
  33. grouped_messages = defaultdict(list)
  34. for msg in messages:
  35. prefix = msg[:2]
  36. grouped_messages[prefix].append(msg)
  37.  
  38. # Store results of best matches for each group
  39. best_match_results = []
  40.  
  41. # Compare messages within each group
  42. for prefix, group in grouped_messages.items():
  43. print(f"Processing group: {prefix}")
  44.  
  45. if len(group) < 2:
  46. print(f"Not enough messages to compare for prefix {prefix}.")
  47. continue
  48.  
  49. max_len = max(len(msg) for msg in group)
  50.  
  51. # Compare each message with every other message in the group
  52. for i in range(len(group)):
  53. base_msg = group[i]
  54. aligned_result = list(base_msg.ljust(max_len, '-'))
  55.  
  56. for j in range(i + 1, len(group)):
  57. msg = group[j]
  58.  
  59. len_msg = len(msg)
  60. best_alignment = ""
  61. best_offset = 0
  62. max_matches = 0
  63. best_match_alignment = []
  64.  
  65. # Determine the offsets to use (fixed or sliding)
  66. if offset is not None:
  67. offsets = [offset] # Fixed offset
  68. else:
  69. offsets = range(-(len_msg - 1), max_len) # Sliding across all offsets
  70.  
  71. for current_offset in offsets:
  72. matches = []
  73. match_count = 0
  74.  
  75. for k in range(max_len):
  76. l = k - current_offset
  77. if 0 <= l < len_msg:
  78. if aligned_result[k] == msg[l]:
  79. # Exact match
  80. matches.append(msg[l])
  81. match_count += 1
  82. else:
  83. matches.append('-')
  84. else:
  85. matches.append('-')
  86.  
  87. aligned_message = ''.join(matches)
  88.  
  89. # Update if this alignment has more matches than the previous best
  90. if match_count > max_matches:
  91. best_alignment = aligned_message
  92. max_matches = match_count
  93. best_offset = current_offset
  94. best_match_alignment = matches # Store best alignment as list for later output
  95.  
  96. # Only store results with matches greater than or equal to the minimum
  97. if max_matches >= min_matches:
  98. best_match_results.append((base_msg[:6], msg[:6], ''.join(best_match_alignment), best_offset, max_matches))
  99.  
  100. return best_match_results
  101.  
  102. def write_results_to_file(results):
  103. # Sort results by total matches in descending order
  104. sorted_results = sorted(results, key=lambda x: x[4], reverse=True)
  105.  
  106. with open("OUTPUT.TXT", "w") as file:
  107. file.write("Best Match for Each Pair of Messages (Grouped by First Two Characters, Sorted by Total Matches):\n")
  108. for base_serial, compared_serial, best_alignment, best_offset, max_matches in sorted_results:
  109. file.write(f"\nBase Message: {base_serial}, Compared with: {compared_serial}\n")
  110. file.write(f"Best Offset: {best_offset}, Total Matches: {max_matches}\n")
  111. file.write(f"Best Alignment: {best_alignment}\n")
  112.  
  113. # Read input messages from INPUT.TXT
  114. input_messages = read_input_file("INPUT.TXT")
  115.  
  116. # Prompt for offset: if input is a number, use it as the offset; if blank, use any offset
  117. offset_input = input("Enter a specific offset (number), or press Enter for sliding offset: ").strip()
  118.  
  119. # Convert to integer if input is a number, else use None for sliding offset
  120. try:
  121. offset_value = int(offset_input) if offset_input else None
  122. except ValueError:
  123. print(f"Invalid input '{offset_input}'. Using sliding offset (any offset).")
  124. offset_value = None
  125.  
  126. # Prompt for minimum matches to report, default is 6
  127. min_matches_input = input("Enter minimum number of matches to report, or press Enter for default (6): ").strip()
  128.  
  129. # Convert to integer if input is a number, else use default (6)
  130. try:
  131. min_matches_value = int(min_matches_input) if min_matches_input else 6
  132. except ValueError:
  133. print(f"Invalid input '{min_matches_input}'. Using default of 6 matches.")
  134. min_matches_value = 6
  135.  
  136. # Perform sliding comparison for each group based on the first two characters
  137. best_match_results = slide_compare_multiple_grouped(input_messages, offset=offset_value, min_matches=min_matches_value)
  138.  
  139. # Write the sorted results to OUTPUT.TXT
  140. write_results_to_file(best_match_results)
  141.  
  142. print(f"Results with at least {min_matches_value} matches have been written to OUTPUT.TXT")
  143.  
Add Comment
Please, Sign In to add comment