Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- import base64
- import datetime
- import sys
- # Reverse Caesar shift the alphabetic characters in 'line' by 'amt' places.
- def outer_Caesar_shift(line, amt):
- answer = ""
- for c in line:
- if (ord('A') <= ord(c)) and (ord(c) <= ord('Z')):
- answer += chr(((ord(c) - ord('A') - amt) % 26) + ord('A'))
- elif (ord('a') <= ord(c)) and (ord(c) <= ord('z')):
- answer += chr(((ord(c) - ord('a') - amt) % 26) + ord('a'))
- else:
- answer += c
- return answer
- # Reverse the bits in a byte.
- def reverse_byte(b):
- return int("{:08b}".format(b)[::-1], 2)
- # Convert the external coded message text to a list of correctly flipped bytes.
- # This means that for New messages, the even-numbered bytes are reversed.
- # Arguments:
- # 'clue' is "O" for Old messages, "N" for New messages,
- # "+" for New messages that need to be extended by a zero byte,
- # and "?" for unknown message type, i.e., the sidebar.
- # 'stamplast' is last character of the Unix timestamp, to use for
- # the reverse Caesar shift.
- # 'stuff' is the external coded message text.
- def extract_ordered_inside_bytes(clue, stamplast, stuff):
- shifted_stuff = outer_Caesar_shift(stuff, ord(stamplast) - ord('0'))
- if (len(shifted_stuff) % 4) != 0:
- shifted_stuff += "".join(["=" for _1 in range((-len(shifted_stuff)) % 4)])
- decimal_stuff = base64.b64decode(shifted_stuff)
- message_value = int(decimal_stuff.decode("ascii"))
- byte_string = "{:b}".format(message_value)
- pad_to_whole_byte = "".join(["0" for _1 in range((-len(byte_string)) % 8)])
- byte_string = pad_to_whole_byte + byte_string
- if clue == "+":
- byte_string = "00000000" + byte_string
- byte_list = []
- for i in range(0,len(byte_string),8):
- current_byte = byte_string[i:i+8]
- if (clue in ("N", "+")) and ((i % 16) == 8):
- current_byte = current_byte[::-1]
- byte_list.append(int(current_byte, 2))
- return byte_list
- # Print out a byte list of the message in the standard eight columns.
- # Note: I number my columns like I number my bytes: Right to left, and starting from 0.
- def print_byte_array(byte_list):
- pad_to_64_bits = [None for _1 in range((-len(byte_list)) % 8)]
- byte_list = pad_to_64_bits + byte_list
- column = 8
- for current_byte in byte_list:
- column = (column - 1) % 8
- num_spaces = [1,2,1,3,1,2,1,4][column]
- print("".join([" " for _1 in range(num_spaces)]), end="")
- if current_byte == None:
- print(" ", end="")
- else:
- print("{:08b}".format(current_byte).replace("0","-"), end="")
- if column == 0:
- print()
- # Print out the column headers corresponding to printing out a byte
- # list where the bytes have already been flipped into 70615243 order.
- def print_flipped_array_header(clue, length_byte_list):
- if clue in ("N", "+"):
- if (length_byte_list % 2) == 0:
- print(" FORWARD REVERSED FORWARD REVERSED FORWARD REVERSED FORWARD REVERSED")
- else:
- print(" REVERSED FORWARD REVERSED FORWARD REVERSED FORWARD REVERSED FORWARD ")
- else:
- print(" FORWARD FORWARD FORWARD FORWARD FORWARD FORWARD FORWARD FORWARD ")
- print(" 70615243 70615243 70615243 70615243 70615243 70615243 70615243 70615243")
- # Print out the XOR mask template corresponding to the message.
- def print_flipped_array_footer(clue, length_byte_list):
- if clue in ("N", "+"):
- print(" ijklmnop abcdefgh IJKLMNOP ABCDEFGH ABCDEFGH IJKLMNOP abcdefgh ijklmnop")
- elif clue in ("O",):
- print(" ABCDEFGH IJKLMNOP abcdefgh ijklmnop ABCDEFGH IJKLMNOP abcdefgh ijklmnop")
- # Print out the XOR mask bit values corresponding to the message.
- def print_unmasked_array_header(clue, mask):
- print(" UNMASKED UNMASKED UNMASKED UNMASKED UNMASKED UNMASKED UNMASKED UNMASKED")
- if clue in ("N", "+"):
- print(" {0:08b} {1:08b} {2:08b} {3:08b} {3:08b} {2:08b} {1:08b} {0:08b}".format(*mask))
- elif clue in ("O",):
- print(" {3:08b} {2:08b} {1:08b} {0:08b} {3:08b} {2:08b} {1:08b} {0:08b}".format(*mask))
- # Apply a given mask against the byte list.
- def apply_mask(clue, mask, byte_list):
- unmasked_byte_list = []
- column = len(byte_list) % 8
- for current_byte in byte_list:
- column = (column - 1) % 8
- if clue in ("N", "+"):
- mask_column = min(column, 7-column)
- else:
- mask_column = column % 4
- unmasked_byte_list.append(current_byte ^ mask[mask_column])
- return unmasked_byte_list
- # Print out a byte by byte detailed listing of the decoding.
- # If 'only' is specified, then just list the bytes for that mask value.
- def print_byte_by_byte_detail(clue, mask, byte_list, only=None):
- print(" ORIGINAL COL 70615243 MC MASKBYTE UNMASKED PERMUTED CHARACTER")
- column = len(byte_list) % 8
- reverse_flag = True
- for current_byte in byte_list:
- reverse_flag = not reverse_flag
- column = (column - 1) % 8
- if clue in ("N", "+"):
- mask_column = min(column, 7-column)
- else:
- mask_column = column % 4
- if reverse_flag and (clue in ("N", "+")):
- original_byte = reverse_byte(current_byte)
- reverse_symbol = "-"
- else:
- original_byte = current_byte
- reverse_symbol = "+"
- mask_value = mask[mask_column]
- unmasked_byte = mask_value ^ current_byte
- ustr = "{:08b}".format(unmasked_byte)
- pstr = ustr[0] + ustr[2] + ustr[4] + ustr[6] + ustr[7] + ustr[5] + ustr[3] + ustr[1]
- permuted_byte = int(pstr, 2)
- if permuted_byte == 0x00:
- ch = "NUL"
- elif permuted_byte < 0x20:
- ch = "..."
- elif permuted_byte < 0x7F:
- ch = chr(permuted_byte)
- elif permuted_byte == 0x7F:
- ch = "DEL"
- else:
- ch = "***BAD***"
- if (only == None) and (column == 7):
- print()
- if (only == None) or (only == mask_column):
- print(" {0} {1}{2} {3} {4} {5} {6} {7} 0x{8:02X} {9}".format(
- "{:08b}".format(original_byte).replace("0","-"),
- column,
- reverse_symbol,
- "{:08b}".format(current_byte).replace("0","-"),
- mask_column,
- "{:08b}".format(mask_value),
- "{:08b}".format(unmasked_byte).replace("0","-"),
- "{:08b}".format(permuted_byte).replace("0","-"),
- permuted_byte,
- ch
- ))
- # Perform a special bit by bit analysis of the 0 timestamp message.
- def zero_statistical_analysis(clue, byte_list):
- bit_bias = [0 for _1 in range(8)]
- for current_byte in byte_list:
- for i in range(8):
- if (current_byte & (1 << i)) != 0:
- bit_bias[i] += 1
- print()
- print("-------------------- BEGIN SPECIAL 0 TIMESTAMP MESSAGE ANALYSIS --------------------")
- for i in range(8):
- print(" Bit", i, "bias: ", bit_bias[i] / len(byte_list))
- print("-------------------- END SPECIAL 0 TIMESTAMP MESSAGE ANALYSIS ----------------------")
- return
- # Guess the XOR mask using a quick and dirty bit frequency analysis.
- def quick_statistical_analysis(clue, byte_list):
- counts = [0 for _1 in range(4)]
- bit_frequency = [[0 for _2 in range(8)] for _1 in range(4)]
- column = len(byte_list) % 8
- for current_byte in byte_list:
- column = (column - 1) % 8
- if clue in ("N", "+"):
- mask_column = min(column, 7-column)
- else:
- mask_column = column % 4
- counts[mask_column] += 1
- for i in range(8):
- if (current_byte & (1 << i)) != 0:
- bit_frequency[mask_column][i] += 1
- target = [0, 0, 0, 1, 0, 1, 1, 0]
- mask = [0 for _1 in range(4)]
- for mask_column in range(4):
- for i in range(8):
- bias = (2 * bit_frequency[mask_column][i]) > counts[mask_column]
- if (1 if bias else 0) != target[i]:
- mask[mask_column] += (1 << i)
- return mask
- # Process a single coded message.
- def process_one(index, clue, stamp, posttime, where, byte_list):
- print()
- message_id = "[{:s}] {:s}".format(index, stamp)
- print("[{:s}] {:s} {:s}:".format(index, where, stamp))
- print()
- if stamp[0] in "0123456789":
- if stamp != "0000000000":
- stamp_human = datetime.datetime.fromtimestamp(int(stamp)).strftime('%Y-%m-%d %H:%M:%S')
- else:
- stamp_human = "1970-01-01 00:00:00"
- print(" Unix timestamp decode: {:s}".format(stamp_human))
- else:
- print(" Unix timestamp is missing.")
- print(" Reddit posting time: {:s}".format(posttime.replace("_",":")))
- category = ""
- if clue in ("N", "+"):
- print(" Message type is NEW.", end="")
- category += "NEW"
- elif clue in ("O",):
- print(" Message type is OLD.", end="")
- category += "OLD"
- else:
- print(" Message type is UNKNOWN.", end="")
- category += "UNK"
- if (len(byte_list) % 2) == 0:
- print(" Message size is EVEN.", end="")
- category += " EVEN"
- else:
- print(" Message size is ODD.", end="")
- category += " ODD "
- print(" Message length is {:d} bytes.".format(len(byte_list)))
- category += " {:3d}".format(len(byte_list))
- print()
- print_flipped_array_header(clue, len(byte_list))
- print_byte_array(byte_list)
- print_flipped_array_footer(clue, len(byte_list))
- if clue == "?":
- print()
- return
- if stamp == "0000000000":
- zero_statistical_analysis(clue, byte_list)
- quick_mask = quick_statistical_analysis(clue, byte_list)
- unmasked_byte_list = apply_mask(clue, quick_mask, byte_list)
- print()
- print()
- print(" Using quick-and-dirty statistical mask guess.")
- print()
- print_unmasked_array_header(clue, quick_mask)
- print_byte_array(unmasked_byte_list)
- print()
- print()
- print_byte_by_byte_detail(clue, quick_mask, byte_list)
- if stamp[0] not in "0123456789":
- print()
- return
- stamp_binary = "{:032b}".format(int(stamp))
- mask = quick_mask
- mask_binary = "{:08b}{:08b}{:08b}{:08b}".format(mask[3], mask[2], mask[1], mask[0])
- mask_7_bits = "{:s} {:s} {:s} {:s}".format(mask_binary[0], mask_binary[8], mask_binary[16], mask_binary[24])
- print()
- print(" <<GREPME001>> ", message_id, category, " ", stamp_binary, "", mask_7_bits)
- print(" <<GREPME002>> ", message_id, category, " ", stamp_binary, "", mask_binary)
- print()
- return
- # Process all the coded messages in the f04cb dataset.
- def process_all():
- datafile = open("dataset_for_solving_f04cb.txt", "r")
- for oneline in datafile:
- if oneline[-1] == '\n':
- oneline = oneline[:-1]
- index, clue, stamp, posttime, where, stuff = oneline.split(":")
- if clue != "X":
- byte_list = extract_ordered_inside_bytes(clue, stamp[-1], stuff)
- # Edit the following if-test to only process some of the messages.
- if True:
- process_one(index, clue, stamp, posttime, where, byte_list)
- if __name__ == "__main__":
- process_all()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement