Advertisement
Guest User

program2.py

a guest
Aug 13th, 2017
395
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.46 KB | None | 0 0
  1.  
  2. from __future__ import print_function
  3.  
  4. import base64
  5. import datetime
  6. import sys
  7.  
  8. # Reverse Caesar shift the alphabetic characters in 'line' by 'amt' places.
  9. def outer_Caesar_shift(line, amt):
  10.     answer = ""
  11.     for c in line:
  12.         if (ord('A') <= ord(c)) and (ord(c) <= ord('Z')):
  13.             answer += chr(((ord(c) - ord('A') - amt) % 26) + ord('A'))
  14.         elif (ord('a') <= ord(c)) and (ord(c) <= ord('z')):
  15.             answer += chr(((ord(c) - ord('a') - amt) % 26) + ord('a'))
  16.         else:
  17.             answer += c
  18.     return answer
  19.  
  20. # Reverse the bits in a byte.
  21. def reverse_byte(b):
  22.     return int("{:08b}".format(b)[::-1], 2)
  23.  
  24. # Convert the external coded message text to a list of correctly flipped bytes.
  25. # This means that for New messages, the even-numbered bytes are reversed.
  26. # Arguments:
  27. #    'clue' is "O" for Old messages, "N" for New messages,
  28. #           "+" for New messages that need to be extended by a zero byte,
  29. #           and "?" for unknown message type, i.e., the sidebar.
  30. #    'stamplast' is last character of the Unix timestamp, to use for
  31. #                the reverse Caesar shift.
  32. #    'stuff' is the external coded message text.
  33. def extract_ordered_inside_bytes(clue, stamplast, stuff):
  34.     shifted_stuff = outer_Caesar_shift(stuff, ord(stamplast) - ord('0'))
  35.     if (len(shifted_stuff) % 4) != 0:
  36.         shifted_stuff += "".join(["=" for _1 in range((-len(shifted_stuff)) % 4)])
  37.     decimal_stuff = base64.b64decode(shifted_stuff)
  38.     message_value = int(decimal_stuff.decode("ascii"))
  39.     byte_string = "{:b}".format(message_value)
  40.     pad_to_whole_byte = "".join(["0" for _1 in range((-len(byte_string)) % 8)])
  41.     byte_string = pad_to_whole_byte + byte_string
  42.     if clue == "+":
  43.         byte_string = "00000000" + byte_string
  44.     byte_list = []
  45.     for i in range(0,len(byte_string),8):
  46.         current_byte = byte_string[i:i+8]
  47.         if (clue in ("N", "+")) and ((i % 16) == 8):
  48.             current_byte = current_byte[::-1]
  49.         byte_list.append(int(current_byte, 2))
  50.     return byte_list
  51.  
  52. # Print out a byte list of the message in the standard eight columns.
  53. # Note: I number my columns like I number my bytes: Right to left, and starting from 0.
  54. def print_byte_array(byte_list):
  55.     pad_to_64_bits = [None for _1 in range((-len(byte_list)) % 8)]
  56.     byte_list = pad_to_64_bits + byte_list
  57.     column = 8
  58.     for current_byte in byte_list:
  59.         column = (column - 1) % 8
  60.         num_spaces = [1,2,1,3,1,2,1,4][column]
  61.         print("".join([" " for _1 in range(num_spaces)]), end="")
  62.         if current_byte == None:
  63.             print("        ", end="")
  64.         else:
  65.             print("{:08b}".format(current_byte).replace("0","-"), end="")
  66.         if column == 0:
  67.             print()
  68.  
  69. # Print out the column headers corresponding to printing out a byte
  70. # list where the bytes have already been flipped into 70615243 order.
  71. def print_flipped_array_header(clue, length_byte_list):
  72.     if clue in ("N", "+"):
  73.         if (length_byte_list % 2) == 0:
  74.             print("    FORWARD  REVERSED  FORWARD  REVERSED   FORWARD  REVERSED  FORWARD  REVERSED")
  75.         else:
  76.             print("    REVERSED FORWARD   REVERSED FORWARD    REVERSED FORWARD   REVERSED FORWARD ")
  77.     else:
  78.         print("    FORWARD  FORWARD   FORWARD  FORWARD    FORWARD  FORWARD   FORWARD  FORWARD ")
  79.     print("    70615243 70615243  70615243 70615243   70615243 70615243  70615243 70615243")
  80.  
  81. # Print out the XOR mask template corresponding to the message.
  82. def print_flipped_array_footer(clue, length_byte_list):
  83.     if clue in ("N", "+"):
  84.         print("    ijklmnop abcdefgh  IJKLMNOP ABCDEFGH   ABCDEFGH IJKLMNOP  abcdefgh ijklmnop")
  85.     elif clue in ("O",):
  86.         print("    ABCDEFGH IJKLMNOP  abcdefgh ijklmnop   ABCDEFGH IJKLMNOP  abcdefgh ijklmnop")
  87.  
  88. # Print out the XOR mask bit values corresponding to the message.
  89. def print_unmasked_array_header(clue, mask):
  90.     print("    UNMASKED UNMASKED  UNMASKED UNMASKED   UNMASKED UNMASKED  UNMASKED UNMASKED")
  91.     if clue in ("N", "+"):
  92.         print("    {0:08b} {1:08b}  {2:08b} {3:08b}   {3:08b} {2:08b}  {1:08b} {0:08b}".format(*mask))
  93.     elif clue in ("O",):
  94.         print("    {3:08b} {2:08b}  {1:08b} {0:08b}   {3:08b} {2:08b}  {1:08b} {0:08b}".format(*mask))
  95.  
  96. # Apply a given mask against the byte list.
  97. def apply_mask(clue, mask, byte_list):
  98.     unmasked_byte_list = []
  99.     column = len(byte_list) % 8
  100.     for current_byte in byte_list:
  101.         column = (column - 1) % 8
  102.         if clue in ("N", "+"):
  103.             mask_column = min(column, 7-column)
  104.         else:
  105.             mask_column = column % 4
  106.         unmasked_byte_list.append(current_byte ^ mask[mask_column])
  107.     return unmasked_byte_list
  108.  
  109. # Print out a byte by byte detailed listing of the decoding.
  110. # If 'only' is specified, then just list the bytes for that mask value.
  111. def print_byte_by_byte_detail(clue, mask, byte_list, only=None):
  112.     print("    ORIGINAL  COL 70615243  MC MASKBYTE UNMASKED  PERMUTED CHARACTER")
  113.  
  114.     column = len(byte_list) % 8
  115.     reverse_flag = True
  116.     for current_byte in byte_list:
  117.         reverse_flag = not reverse_flag
  118.         column = (column - 1) % 8
  119.         if clue in ("N", "+"):
  120.             mask_column = min(column, 7-column)
  121.         else:
  122.             mask_column = column % 4
  123.  
  124.         if reverse_flag and (clue in ("N", "+")):
  125.             original_byte = reverse_byte(current_byte)
  126.             reverse_symbol = "-"
  127.         else:
  128.             original_byte = current_byte
  129.             reverse_symbol = "+"
  130.  
  131.         mask_value = mask[mask_column]
  132.         unmasked_byte = mask_value ^ current_byte
  133.  
  134.         ustr = "{:08b}".format(unmasked_byte)
  135.         pstr = ustr[0] + ustr[2] + ustr[4] + ustr[6] + ustr[7] + ustr[5] + ustr[3] + ustr[1]
  136.         permuted_byte = int(pstr, 2)
  137.  
  138.         if permuted_byte == 0x00:
  139.             ch = "NUL"
  140.         elif permuted_byte < 0x20:
  141.             ch = "..."
  142.         elif permuted_byte < 0x7F:
  143.             ch = chr(permuted_byte)
  144.         elif permuted_byte == 0x7F:
  145.             ch = "DEL"
  146.         else:
  147.             ch = "***BAD***"
  148.  
  149.         if (only == None) and (column == 7):
  150.             print()
  151.  
  152.         if (only == None) or (only == mask_column):
  153.             print("    {0}  {1}{2}  {3}  {4}  {5} {6}  {7} 0x{8:02X}  {9}".format(
  154.                   "{:08b}".format(original_byte).replace("0","-"),
  155.                   column,
  156.                   reverse_symbol,
  157.                   "{:08b}".format(current_byte).replace("0","-"),
  158.                   mask_column,
  159.                   "{:08b}".format(mask_value),
  160.                   "{:08b}".format(unmasked_byte).replace("0","-"),
  161.                   "{:08b}".format(permuted_byte).replace("0","-"),
  162.                   permuted_byte,
  163.                   ch
  164.             ))
  165.  
  166. # Perform a special bit by bit analysis of the 0 timestamp message.
  167. def zero_statistical_analysis(clue, byte_list):
  168.     bit_bias = [0 for _1 in range(8)]
  169.     for current_byte in byte_list:
  170.         for i in range(8):
  171.             if (current_byte & (1 << i)) != 0:
  172.                 bit_bias[i] += 1
  173.     print()
  174.     print("-------------------- BEGIN SPECIAL 0 TIMESTAMP MESSAGE ANALYSIS --------------------")
  175.     for i in range(8):
  176.         print("    Bit", i, "bias: ", bit_bias[i] / len(byte_list))
  177.     print("-------------------- END SPECIAL 0 TIMESTAMP MESSAGE ANALYSIS ----------------------")
  178.     return
  179.  
  180. # Guess the XOR mask using a quick and dirty bit frequency analysis.
  181. def quick_statistical_analysis(clue, byte_list):
  182.     counts = [0 for _1 in range(4)]
  183.     bit_frequency = [[0 for _2 in range(8)] for _1 in range(4)]
  184.     column = len(byte_list) % 8
  185.     for current_byte in byte_list:
  186.         column = (column - 1) % 8
  187.         if clue in ("N", "+"):
  188.             mask_column = min(column, 7-column)
  189.         else:
  190.             mask_column = column % 4
  191.         counts[mask_column] += 1
  192.         for i in range(8):
  193.             if (current_byte & (1 << i)) != 0:
  194.                 bit_frequency[mask_column][i] += 1
  195.     target = [0, 0, 0, 1, 0, 1, 1, 0]
  196.     mask = [0 for _1 in range(4)]
  197.     for mask_column in range(4):
  198.         for i in range(8):
  199.             bias = (2 * bit_frequency[mask_column][i]) > counts[mask_column]
  200.             if (1 if bias else 0) != target[i]:
  201.                 mask[mask_column] += (1 << i)
  202.     return mask
  203.  
  204. # Process a single coded message.
  205. def process_one(index, clue, stamp, posttime, where, byte_list):
  206.     print()
  207.     message_id = "[{:s}] {:s}".format(index, stamp)
  208.     print("[{:s}] {:s} {:s}:".format(index, where, stamp))
  209.  
  210.     print()
  211.     if stamp[0] in "0123456789":
  212.         if stamp != "0000000000":
  213.             stamp_human = datetime.datetime.fromtimestamp(int(stamp)).strftime('%Y-%m-%d %H:%M:%S')
  214.         else:
  215.             stamp_human = "1970-01-01 00:00:00"
  216.         print("    Unix timestamp decode:  {:s}".format(stamp_human))
  217.     else:
  218.         print("    Unix timestamp is missing.")
  219.     print("    Reddit posting time:    {:s}".format(posttime.replace("_",":")))
  220.  
  221.     category = ""
  222.     if clue in ("N", "+"):
  223.         print("    Message type is NEW.", end="")
  224.         category += "NEW"
  225.     elif clue in ("O",):
  226.         print("    Message type is OLD.", end="")
  227.         category += "OLD"
  228.     else:
  229.         print("    Message type is UNKNOWN.", end="")
  230.         category += "UNK"
  231.     if (len(byte_list) % 2) == 0:
  232.         print("  Message size is EVEN.", end="")
  233.         category += " EVEN"
  234.     else:
  235.         print("  Message size is ODD.", end="")
  236.         category += " ODD "
  237.     print("  Message length is {:d} bytes.".format(len(byte_list)))
  238.     category += " {:3d}".format(len(byte_list))
  239.  
  240.     print()
  241.     print_flipped_array_header(clue, len(byte_list))
  242.     print_byte_array(byte_list)
  243.     print_flipped_array_footer(clue, len(byte_list))
  244.  
  245.     if clue == "?":
  246.         print()
  247.         return
  248.  
  249.     if stamp == "0000000000":
  250.         zero_statistical_analysis(clue, byte_list)
  251.  
  252.     quick_mask = quick_statistical_analysis(clue, byte_list)
  253.     unmasked_byte_list = apply_mask(clue, quick_mask, byte_list)
  254.  
  255.     print()
  256.     print()
  257.     print("    Using quick-and-dirty statistical mask guess.")
  258.     print()
  259.     print_unmasked_array_header(clue, quick_mask)
  260.     print_byte_array(unmasked_byte_list)
  261.     print()
  262.     print()
  263.     print_byte_by_byte_detail(clue, quick_mask, byte_list)
  264.  
  265.     if stamp[0] not in "0123456789":
  266.         print()
  267.         return
  268.  
  269.     stamp_binary = "{:032b}".format(int(stamp))
  270.  
  271.     mask = quick_mask
  272.     mask_binary = "{:08b}{:08b}{:08b}{:08b}".format(mask[3], mask[2], mask[1], mask[0])
  273.     mask_7_bits = "{:s} {:s} {:s} {:s}".format(mask_binary[0], mask_binary[8], mask_binary[16], mask_binary[24])
  274.     print()
  275.     print("    <<GREPME001>>  ", message_id, category, " ", stamp_binary, "", mask_7_bits)
  276.     print("    <<GREPME002>>  ", message_id, category, " ", stamp_binary, "", mask_binary)
  277.  
  278.     print()
  279.     return
  280.  
  281. # Process all the coded messages in the f04cb dataset.
  282. def process_all():
  283.     datafile = open("dataset_for_solving_f04cb.txt", "r")
  284.     for oneline in datafile:
  285.         if oneline[-1] == '\n':
  286.             oneline = oneline[:-1]
  287.         index, clue, stamp, posttime, where, stuff = oneline.split(":")
  288.         if clue != "X":
  289.             byte_list = extract_ordered_inside_bytes(clue, stamp[-1], stuff)
  290.             # Edit the following if-test to only process some of the messages.
  291.             if True:
  292.                 process_one(index, clue, stamp, posttime, where, byte_list)
  293.  
  294.  
  295. if __name__ == "__main__":
  296.     process_all()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement