f04cb_decode.py


from __future__ import print_function

import base64
import datetime
import itertools
import sys

# Reverse the bits in a byte.
def reverse_byte(b):
    return int("{:08b}".format(b)[::-1], 2)

# Permute the bits in a byte according to a 70615243 pattern.
def permute_byte(b):
    bstr = "{:08b}".format(b)
    pstr = bstr[0] + bstr[2] + bstr[4] + bstr[6] + bstr[7] + bstr[5] + bstr[3] + bstr[1]
    return int(pstr, 2)

# Reverse Caesar shift the alphabetic characters in 'line' by 'amt' places.
def outer_Caesar_shift(line, amt):
    answer = ""
    for c in line:
        if (ord('A') <= ord(c)) and (ord(c) <= ord('Z')):
            answer += chr(((ord(c) - ord('A') - amt) % 26) + ord('A'))
        elif (ord('a') <= ord(c)) and (ord(c) <= ord('z')):
            answer += chr(((ord(c) - ord('a') - amt) % 26) + ord('a'))
        else:
            answer += c
    return answer

# Convert the external coded message text to a list of bytes.
# Arguments:
#    'clue' is "O" for Old messages, "N" for New messages,
#           "+" for New messages that need to be extended by a zero byte,
#           and "?" for unknown message type, i.e., the sidebar.
#    'stamplast' is last character of the Unix timestamp, to use for
#                the reverse Caesar shift.
#    'stuff' is the external coded message text.
def extract_inside_bytes(clue, stamplast, stuff):
    shifted_stuff = outer_Caesar_shift(stuff, ord(stamplast) - ord('0'))
    if (len(shifted_stuff) % 4) != 0:
        shifted_stuff += "".join(["=" for _1 in range((-len(shifted_stuff)) % 4)])
    decimal_stuff = base64.b64decode(shifted_stuff)
    message_value = int(decimal_stuff.decode("ascii"))

    byte_string = "{:b}".format(message_value)
    pad_to_whole_byte = "".join(["0" for _1 in range((-len(byte_string)) % 8)])
    byte_string = pad_to_whole_byte + byte_string

    byte_list = [int(byte_string[i:i+8], 2) for i in range(0, len(byte_string), 8)]
    if clue == "+":
        byte_list = [0x00] + byte_list
    return byte_list

def permute_stamp_into_mask(stamp_binary, perm):
    # (3, 2, 1, 0) is identity permutation.
    mask_binary = (
          stamp_binary[24-8*perm[0]:28-8*perm[0]] + stamp_binary[28-8*perm[0]:32-8*perm[0]]
        + stamp_binary[24-8*perm[1]:28-8*perm[1]] + stamp_binary[28-8*perm[1]:32-8*perm[1]]
        + stamp_binary[24-8*perm[2]:28-8*perm[2]] + stamp_binary[28-8*perm[2]:32-8*perm[2]]
        + stamp_binary[24-8*perm[3]:28-8*perm[3]] + stamp_binary[28-8*perm[3]:32-8*perm[3]]
    )
    mask = [int(mask_binary[24-8*i:32-8*i], 2) for i in range(4)]
    return mask

perm_lookup = [
                (1, 2, 0, 3),
                (1, 0, 2, 3),
                (0, 1, 3, 2),
                (0, 3, 1, 2),
                (3, 0, 2, 1),
                (3, 2, 0, 1),
                (2, 3, 1, 0),
                (2, 1, 3, 0),
              ]

# Process a single coded message.
def process_one(index, clue, stamp, posttime, where, byte_list):
    print()
    message_id = "[{:s}] {:s}".format(index, stamp)
    print("[{:s}] {:s} {:s}:".format(index, where, stamp))

    print()
    if stamp[0] in "0123456789":
        if stamp != "0000000000":
            stamp_human = datetime.datetime.fromtimestamp(int(stamp)).strftime('%Y-%m-%d %H:%M:%S')
        else:
            stamp_human = "1970-01-01 00:00:00"
        print("    Unix timestamp decode:  {:s}".format(stamp_human))
    else:
        print("    Unix timestamp is missing.")
    print("    Reddit posting time:    {:s}".format(posttime.replace("_",":")))

    category = ""
    if clue in ("N", "+"):
        print("    Message type is NEW.", end="")
        category += "NEW"
    elif clue in ("O",):
        print("    Message type is OLD.", end="")
        category += "OLD"
    else:
        print("    Message type is UNKNOWN.", end="")
        category += "UNK"
    if (len(byte_list) % 2) == 0:
        print("  Message size is EVEN.", end="")
        category += " EVEN"
    else:
        print("  Message size is ODD.", end="")
        category += " ODD "
    print("  Message length is {:d} bytes.".format(len(byte_list)))
    category += " {:3d}".format(len(byte_list))

    if clue == "?":
        return

    if stamp[0] not in "0123456789":
        return

    stamp_binary = "{:032b}".format(int(stamp))
    perm = perm_lookup[len(byte_list) % 8]
    mask = permute_stamp_into_mask(stamp_binary, perm)

    permuted_byte_list = []
    column = len(byte_list) % 8
    reverse_flag = True
    for original_byte in byte_list:
        column = (column - 1) % 8
        reverse_flag = not reverse_flag
        if reverse_flag and (clue in ("N", "+")):
            possflip_byte = reverse_byte(original_byte)
        else:
            possflip_byte = original_byte
        permuted_byte = permute_byte(possflip_byte)
        permuted_byte_list.append(permuted_byte)

    exchanged_byte_list = []
    for i in range(0, len(byte_list)-1, 2):
        exchanged_byte_list.append((permuted_byte_list[i] & 0xF0) + (permuted_byte_list[i+1] & 0x0F))
        exchanged_byte_list.append((permuted_byte_list[i+1] & 0xF0) + (permuted_byte_list[i] & 0x0F))
    if (len(byte_list) % 2) == 1:
        exchanged_byte_list.append(permuted_byte_list[-1])

    answer = ""
    column = len(byte_list) % 8
    for exchanged_byte in exchanged_byte_list:
        column = (column - 1) % 8
        if clue in ("N", "+"):
            mask_column = min(column, 7-column)
        else:
            mask_column = column % 4
        masking_value = mask[mask_column]
        unmasked_byte = masking_value ^ exchanged_byte
        answer = answer + chr(unmasked_byte)

    print("    ")
    print("    ")
    print("    ", end="")
    for i in range(0, len(answer), 2):
        print(answer[i], end="")
    for i in range(len(answer)-1, -1, -1):
        if (i % 2) == 1:
            print(answer[i], end="")
    print()
    return

# Process all the coded messages in the f04cb dataset.
def process_all():
    datafile = open("dataset_for_solving_f04cb.txt", "r")
    for oneline in datafile:
        if oneline[-1] == '\n':
            oneline = oneline[:-1]
        index, clue, stamp, posttime, where, stuff = oneline.split(":")
        if clue != "X":
            byte_list = extract_inside_bytes(clue, stamp[-1], stuff)
            # Edit the following if-test to only process some of the messages.
            if clue in ("N", "+"):
                process_one(index, clue, stamp, posttime, where, byte_list)


if __name__ == "__main__":
    process_all()