Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import struct
- import itertools
- from collections import defaultdict
- import pprint
- import pickle
- import math
- import numpy as np
- def cosine_similarity(a,b):
- "compute cosine similarity of v1 to v2: (v1 dot v1)/{||v1||*||v2||)"
- return np.dot(a,b.T)/np.linalg.norm(a)/np.linalg.norm(b)
- #return np.linalg.norm(a-b) # use euc dist
- pp = pprint.PrettyPrinter(indent=4)
- f = open(sys.argv[1], 'rb')
- header_id = struct.unpack('<H', f.read(2))[0]
- print('header id: {}'.format(header_id))
- header_size = struct.unpack('<H', f.read(2))[0]
- print('header size: {}'.format(header_size))
- first_header_size = ord(f.read(1))
- print('first header size: {}'.format(first_header_size))
- version_num = ord(f.read(1))
- print('version: {}'.format(version_num))
- min_req_version = ord(f.read(1))
- print('required version: {}'.format(min_req_version))
- host_os = ord(f.read(1))
- print('host os: {}'.format(host_os))
- #(0x01 = NOT USED)
- #(0x02 = RESERVED)
- #(0x04 = VOLUME_FLAG) indicates presence of succeeding volume
- #(0x08 = NOT USED)
- #(0x10 = PATHSYM_FLAG) indicates archive name translated ("\" changed to "/")
- #(0x20 = BACKUP_FLAG) indicates backup type archive
- flags = ord(f.read(1))
- print('flags: {0:b}'.format(flags))
- res1 = ord(f.read(1)) # reserved
- print('res1: {}'.format(res1))
- file_type = ord(f.read(1))
- print('filetype: {}'.format(file_type) )# 2 = comment header
- res2 = ord(f.read(1)) # reserved
- print('res2: {}'.format(res2))
- created_date = struct.unpack('<L', f.read(4))
- print('created_date: {}'.format(created_date))
- res3 = bytearray(f.read(4*3)) # reserved
- print('res3: {}'.format(repr(res3)))
- filespec_pos_in_filename = f.read(2)
- f.read(8) # unused
- orig_filename = bytearray()
- c = f.read(1)
- while ord(c) != 0:
- orig_filename.append(c)
- c = f.read(1)
- print('orig filename: {}'.format(orig_filename))
- comment = bytearray()
- c = f.read(1)
- while ord(c) != 0:
- comment.append(c)
- c = f.read(1)
- print('comment: {}'.format(comment))
- crc = f.read(4)
- ext_header_size = struct.unpack('<H', f.read(2)) # extended header, should be 0
- print(repr(ext_header_size))
- print('')
- print('#####FILE INFO#####')
- ###### Per-file header
- header_id = struct.unpack('<H', f.read(2))[0]
- print('header id: {}'.format(header_id))
- header_size = struct.unpack('<H', f.read(2))[0]
- print('header size: {}'.format(header_size))
- first_header_size = ord(f.read(1))
- print('first header size: {}'.format(first_header_size))
- version_num = ord(f.read(1))
- print('version: {}'.format(version_num))
- min_req_version = ord(f.read(1))
- print('required version: {}'.format(min_req_version))
- host_os = ord(f.read(1))
- print('host os: {}'.format(host_os))
- #(0x01 = GARBLED) (password protected)
- #(0x02 = RESERVED)
- #(0x04 = VOLUME_FLAG) indicates presence of succeeding volume
- #(0x08 = EXTFILE_FLAG)
- #(0x10 = PATHSYM_FLAG) indicates archive name translated ("\" changed to "/")
- #(0x20 = BACKUP_FLAG) indicates file marked as backup
- flags = ord(f.read(1))
- print('flags: {0:b}'.format(flags))
- method = ord(f.read(1))
- print('method: {}'.format(method))
- ftype = ord(f.read(1))
- print('filetype: {}'.format(ftype))
- xor_val = ord(f.read(1))
- print('xor val: {}'.format(xor_val))
- modified_date = struct.unpack('<L', f.read(4))
- print('modified date: {}'.format(modified_date))
- compressed_size = struct.unpack('<L', f.read(4))[0]
- print('compressed size: {}'.format(compressed_size))
- orig_crc = struct.unpack('<L', f.read(4))
- print('file crc: {}'.format(orig_crc))
- filespec_pos_in_filename = f.read(2)
- file_access_mode = f.read(2)
- host_data = f.read(2)
- extra = bytearray(f.read(16)) # extra data?
- print("\"extra\" data: {}".format(repr(extra)))
- ext_file_start_pos = f.read(4)
- filename = bytearray()
- c = f.read(1)
- while ord(c) != 0:
- filename.append(c)
- c = f.read(1)
- print('orig filename: {}'.format(filename))
- comment = bytearray()
- c = f.read(1)
- while ord(c) != 0:
- comment.append(c)
- c = f.read(1)
- print('comment: {}'.format(comment))
- file_header_crc = f.read(4)
- ext_header_size = struct.unpack('<H', f.read(2)) # usually not used.
- print('ext header size: {}').format(ext_header_size)
- compressed_file = np.array(bytearray(f.read(compressed_size)))
- print(len(compressed_file))
- known_good = []
- for i in range(1, 4):
- known_good.append(pickle.load(open('norm_freqs{}.pkl'.format(i))))
- avg_known_good = []
- for t in zip(*known_good):
- avg_known_good.append(sum(t)/len(t))
- avg_known_good = np.array(avg_known_good)
- #print(avg_known_good)
- for pw_len in range(18, 36, 6):
- print("Trying length {}".format(pw_len))
- pw_guess = "z"
- while len(pw_guess) != pw_len:
- attempts = []
- for pw_try in range(32, 128):
- # 5 or 10 characters long.
- #pw = itertools.cycle('f'+chr(255)*pw_try)
- #pw = itertools.cycle('fmcn'+chr(pw_try)+chr(128)*1)
- #pw = itertools.cycle('fefeVe.e'+chr(pw_try))
- #pw = itertools.cycle('t'+chr(pw_try)+chr(255)+chr(255)+chr(255))
- pw = [ord(c) for c in pw_guess+chr(pw_try)+chr(255)*(pw_len-len(pw_guess)-1)]
- while len(pw) < compressed_size:
- pw += pw
- pwa = np.array(pw[0:compressed_size])
- if flags & 0x01:
- #pw = itertools.cycle(sys.argv[2])
- #print("DECRYPTING WITH PASSWORD {}".format(pw))
- #print("Decrypting.")
- unencrypted_compressed_file = ((pwa + xor_val) % 256) ^ compressed_file # bytearray(p[0] ^ ((p[1] + xor_val) % 256) for p in zip(compressed_file, pw))
- #print(list(compressed_file))
- #print(compressed_file)
- else:
- print("NOT ENCRYPTED.")
- if method > 0 and method < 4:
- #print(list(bytearray(compressed_file)[0:20]))
- # frequency analysis
- #print("Doing freq analysis.")
- norm_freqs , buckets = np.histogram(unencrypted_compressed_file, 256, normed=True)
- #print(norm_freqs)
- #for b in unencrypted_compressed_file:
- # freqs[b] += 1
- #norm_freqs = np.linalg.norm(freqs)
- #pp.pprint(dict(zip(range(0, 256), norm_freqs)))
- #pickle.dump(norm_freqs, open('norm_freqs3.pkl', 'w+'))
- #goodness = (freqs[0]+freqs[255])/sum(freqs[128:130])
- #print("Cosine similarity.")
- goodness = cosine_similarity(norm_freqs, avg_known_good)
- #print("Goodness for {} ({}) is: {}".format(hex(pw_try), chr(pw_try), goodness))
- attempts.append((goodness, pw_try))
- elif method == 0:
- print(list(compressed_file))
- print(compressed_file)
- #print("Good attempts:")
- attempts.sort()
- attempts.reverse()
- best = attempts[0]
- pw_guess += chr(best[1])
- #for b in best:
- # print("{} ({}): {}".format(hex(b[1]), chr(b[1]), b[0]))
- print(pw_guess)
- print(pw_guess)
- f.close()
Advertisement
Add Comment
Please, Sign In to add comment