Advertisement
Guest User

f04cb_decode.py

a guest
Aug 16th, 2017
1,125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.54 KB | None | 0 0
  1.  
  2. from __future__ import print_function
  3.  
  4. import base64
  5. import datetime
  6. import itertools
  7. import sys
  8.  
  9. # Reverse the bits in a byte.
  10. def reverse_byte(b):
  11.     return int("{:08b}".format(b)[::-1], 2)
  12.  
  13. # Permute the bits in a byte according to a 70615243 pattern.
  14. def permute_byte(b):
  15.     bstr = "{:08b}".format(b)
  16.     pstr = bstr[0] + bstr[2] + bstr[4] + bstr[6] + bstr[7] + bstr[5] + bstr[3] + bstr[1]
  17.     return int(pstr, 2)
  18.  
  19. # Reverse Caesar shift the alphabetic characters in 'line' by 'amt' places.
  20. def outer_Caesar_shift(line, amt):
  21.     answer = ""
  22.     for c in line:
  23.         if (ord('A') <= ord(c)) and (ord(c) <= ord('Z')):
  24.             answer += chr(((ord(c) - ord('A') - amt) % 26) + ord('A'))
  25.         elif (ord('a') <= ord(c)) and (ord(c) <= ord('z')):
  26.             answer += chr(((ord(c) - ord('a') - amt) % 26) + ord('a'))
  27.         else:
  28.             answer += c
  29.     return answer
  30.  
  31. # Convert the external coded message text to a list of bytes.
  32. # Arguments:
  33. #    'clue' is "O" for Old messages, "N" for New messages,
  34. #           "+" for New messages that need to be extended by a zero byte,
  35. #           and "?" for unknown message type, i.e., the sidebar.
  36. #    'stamplast' is last character of the Unix timestamp, to use for
  37. #                the reverse Caesar shift.
  38. #    'stuff' is the external coded message text.
  39. def extract_inside_bytes(clue, stamplast, stuff):
  40.     shifted_stuff = outer_Caesar_shift(stuff, ord(stamplast) - ord('0'))
  41.     if (len(shifted_stuff) % 4) != 0:
  42.         shifted_stuff += "".join(["=" for _1 in range((-len(shifted_stuff)) % 4)])
  43.     decimal_stuff = base64.b64decode(shifted_stuff)
  44.     message_value = int(decimal_stuff.decode("ascii"))
  45.  
  46.     byte_string = "{:b}".format(message_value)
  47.     pad_to_whole_byte = "".join(["0" for _1 in range((-len(byte_string)) % 8)])
  48.     byte_string = pad_to_whole_byte + byte_string
  49.  
  50.     byte_list = [int(byte_string[i:i+8], 2) for i in range(0, len(byte_string), 8)]
  51.     if clue == "+":
  52.         byte_list = [0x00] + byte_list
  53.     return byte_list
  54.  
  55. def permute_stamp_into_mask(stamp_binary, perm):
  56.     # (3, 2, 1, 0) is identity permutation.
  57.     mask_binary = (
  58.           stamp_binary[24-8*perm[0]:28-8*perm[0]] + stamp_binary[28-8*perm[0]:32-8*perm[0]]
  59.         + stamp_binary[24-8*perm[1]:28-8*perm[1]] + stamp_binary[28-8*perm[1]:32-8*perm[1]]
  60.         + stamp_binary[24-8*perm[2]:28-8*perm[2]] + stamp_binary[28-8*perm[2]:32-8*perm[2]]
  61.         + stamp_binary[24-8*perm[3]:28-8*perm[3]] + stamp_binary[28-8*perm[3]:32-8*perm[3]]
  62.     )
  63.     mask = [int(mask_binary[24-8*i:32-8*i], 2) for i in range(4)]
  64.     return mask
  65.  
  66. perm_lookup = [
  67.                 (1, 2, 0, 3),
  68.                 (1, 0, 2, 3),
  69.                 (0, 1, 3, 2),
  70.                 (0, 3, 1, 2),
  71.                 (3, 0, 2, 1),
  72.                 (3, 2, 0, 1),
  73.                 (2, 3, 1, 0),
  74.                 (2, 1, 3, 0),
  75.               ]
  76.  
  77. # Process a single coded message.
  78. def process_one(index, clue, stamp, posttime, where, byte_list):
  79.     print()
  80.     message_id = "[{:s}] {:s}".format(index, stamp)
  81.     print("[{:s}] {:s} {:s}:".format(index, where, stamp))
  82.  
  83.     print()
  84.     if stamp[0] in "0123456789":
  85.         if stamp != "0000000000":
  86.             stamp_human = datetime.datetime.fromtimestamp(int(stamp)).strftime('%Y-%m-%d %H:%M:%S')
  87.         else:
  88.             stamp_human = "1970-01-01 00:00:00"
  89.         print("    Unix timestamp decode:  {:s}".format(stamp_human))
  90.     else:
  91.         print("    Unix timestamp is missing.")
  92.     print("    Reddit posting time:    {:s}".format(posttime.replace("_",":")))
  93.  
  94.     category = ""
  95.     if clue in ("N", "+"):
  96.         print("    Message type is NEW.", end="")
  97.         category += "NEW"
  98.     elif clue in ("O",):
  99.         print("    Message type is OLD.", end="")
  100.         category += "OLD"
  101.     else:
  102.         print("    Message type is UNKNOWN.", end="")
  103.         category += "UNK"
  104.     if (len(byte_list) % 2) == 0:
  105.         print("  Message size is EVEN.", end="")
  106.         category += " EVEN"
  107.     else:
  108.         print("  Message size is ODD.", end="")
  109.         category += " ODD "
  110.     print("  Message length is {:d} bytes.".format(len(byte_list)))
  111.     category += " {:3d}".format(len(byte_list))
  112.  
  113.     if clue == "?":
  114.         return
  115.  
  116.     if stamp[0] not in "0123456789":
  117.         return
  118.  
  119.     stamp_binary = "{:032b}".format(int(stamp))
  120.     perm = perm_lookup[len(byte_list) % 8]
  121.     mask = permute_stamp_into_mask(stamp_binary, perm)
  122.  
  123.     permuted_byte_list = []
  124.     column = len(byte_list) % 8
  125.     reverse_flag = True
  126.     for original_byte in byte_list:
  127.         column = (column - 1) % 8
  128.         reverse_flag = not reverse_flag
  129.         if reverse_flag and (clue in ("N", "+")):
  130.             possflip_byte = reverse_byte(original_byte)
  131.         else:
  132.             possflip_byte = original_byte
  133.         permuted_byte = permute_byte(possflip_byte)
  134.         permuted_byte_list.append(permuted_byte)
  135.  
  136.     exchanged_byte_list = []
  137.     for i in range(0, len(byte_list)-1, 2):
  138.         exchanged_byte_list.append((permuted_byte_list[i] & 0xF0) + (permuted_byte_list[i+1] & 0x0F))
  139.         exchanged_byte_list.append((permuted_byte_list[i+1] & 0xF0) + (permuted_byte_list[i] & 0x0F))
  140.     if (len(byte_list) % 2) == 1:
  141.         exchanged_byte_list.append(permuted_byte_list[-1])
  142.  
  143.     answer = ""
  144.     column = len(byte_list) % 8
  145.     for exchanged_byte in exchanged_byte_list:
  146.         column = (column - 1) % 8
  147.         if clue in ("N", "+"):
  148.             mask_column = min(column, 7-column)
  149.         else:
  150.             mask_column = column % 4
  151.         masking_value = mask[mask_column]
  152.         unmasked_byte = masking_value ^ exchanged_byte
  153.         answer = answer + chr(unmasked_byte)
  154.  
  155.     print("    ")
  156.     print("    ")
  157.     print("    ", end="")
  158.     for i in range(0, len(answer), 2):
  159.         print(answer[i], end="")
  160.     for i in range(len(answer)-1, -1, -1):
  161.         if (i % 2) == 1:
  162.             print(answer[i], end="")
  163.     print()
  164.     return
  165.  
  166. # Process all the coded messages in the f04cb dataset.
  167. def process_all():
  168.     datafile = open("dataset_for_solving_f04cb.txt", "r")
  169.     for oneline in datafile:
  170.         if oneline[-1] == '\n':
  171.             oneline = oneline[:-1]
  172.         index, clue, stamp, posttime, where, stuff = oneline.split(":")
  173.         if clue != "X":
  174.             byte_list = extract_inside_bytes(clue, stamp[-1], stuff)
  175.             # Edit the following if-test to only process some of the messages.
  176.             if clue in ("N", "+"):
  177.                 process_one(index, clue, stamp, posttime, where, byte_list)
  178.  
  179.  
  180. if __name__ == "__main__":
  181.     process_all()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement