Untitled

from unicorn import *
from unicorn.x86_const import *
from capstone import *
from capstone.x86 import *
import pefile
import struct
import string
import sys

stacksize = 0x10000
verbose = True

# -------------------------------------------------------------------------------------------
def char_size(s):
    # data to small
    if len(s) < 4:
        return 0
    b = struct.unpack_from('<BBBB', s, 0)
    # first byte not printable
    if b[0] == 0:
        return 0
    # X0X0
    if chr(b[0]) in string.printable and b[1] == 0 and chr(b[2]) in string.printable and b[3] == 0:
        return 2
    # XX00
    if chr(b[0]) in string.printable and chr(b[1]) in string.printable:
        return 1

    return 0

# -------------------------------------------------------------------------------------------
def get_string(data, offset):
    s = str(data[ len(data)-offset : ])
    size = len(s)

    cs = char_size(s)

    if cs == 0:
        return '',0

    if cs == 1:
        f = '<B'
    else:
        f = '<H'

    r = ''
    i = 0
    while True:
        b = struct.unpack_from(f, s, i)[0]
        # 0 byte, end string
        if b == 0:
            break
        # invalid char, discard all
        if chr(b) not in string.printable:
            return '',0

        r += chr(b)
        i += cs

    return r, cs

# -------------------------------------------------------------------------------------------
def str_to_int(s):
    try:
        if s.startswith('0x'):
            return int(s, 16)
        if s.isdigit:
            return int(s)
    except ValueError:
        print "Error, invalid integer string"
        exit(-1)

# -------------------------------------------------------------------------------------------
def str_type(t):
    if t == 1:
        return 'ASCII'
    elif t == 2:
        return 'UTF16'
    else:
        raise Exception("Invalid string type")

# -------------------------------------------------------------------------------------------
def format_disasembly(ins):
    opbytes = ''.join(('%02X'%(b)) for b in ins.bytes)
    instr = '%s' % (ins.mnemonic)
    if len(ins.op_str) > 0:
        instr += ' %s' % (ins.op_str)
    return '%08X  %s  %s' % (ins.address, opbytes.ljust(20, ' '), instr)

# -------------------------------------------------------------------------------------------
def code_analyzer(pe, virtualaddress, max_instructions=128):
    # get the raw offset from the virtualaddress
    a_off = pe.get_offset_from_rva(virtualaddress - pe.OPTIONAL_HEADER.ImageBase)
    # init disassembler lib
    caps = Cs(CS_ARCH_X86, CS_MODE_32)
    caps.detail = True
    # init vars
    code_len = 0
    stack_offsets = []
    jmpfound = False
    # disassemble code and analyze the instructions
    for ins in caps.disasm(pe.__data__[a_off:], virtualaddress, max_instructions):

        # increase code_len with current instruction size
        code_len += ins.size
        if verbose:
            print format_disasembly(ins)

        # process operands
        if ins.operands:
            for ops in ins.operands:
                # memory access operands
                if ops.type == X86_OP_MEM:
                    # ebp base register and disp value not 0
                    if ops.value.mem.base == X86_REG_EBP and ops.value.mem.disp != 0:
                        disp = abs(ops.value.mem.disp)
                        # add new disp value
                        if disp not in stack_offsets:
                            stack_offsets.append(disp)
        # process groups
        if ins.groups:
            # jump types
            if ins.group(CS_GRP_JUMP):
                # JMP backwards
                if ins.id == X86_INS_JMP and int(ins.op_str, 16) < ins.address:
                    jmpfound = True
                    break

            # return types
            elif ins.group(CS_GRP_RET):
                break

    # false if max instructions reached
    if not jmpfound:
        print "End decryption loop not found"
        return 0,[]

    # paranoid mode
    if len(stack_offsets) == 0:
        print "No stack offsets found"
        return 0,[]

    # ...
    for offset in stack_offsets:
        if offset > stacksize:
            print "Stack offset 0x%08x is larger then the stacksize 0x%08x" %(offset, stacksize)
            return 0,[]

    # return code length and stackoffsets sorted descending
    return code_len, sorted(stack_offsets, reverse=True)

# -------------------------------------------------------------------------------------------
def decrypt_string(filename, address):
        # get virtualaddress
        virtualaddress = str_to_int(address)

        # get some needed PE info from target file
        pe = pefile.PE(filename)
        imagebase = pe.OPTIONAL_HEADER.ImageBase
        imagesize = pe.OPTIONAL_HEADER.SizeOfImage
        stackaddress = imagebase + imagesize

        # run the code analyzer to locate and analyse the
        # decryption loop
        code_len, stack_offsets = code_analyzer(pe, virtualaddress)
        if not code_len:
            print "Failed to locate the decryption loop"
            exit(-1)

        try:
            # Initialize emulator
            emu = Uc(UC_ARCH_X86, UC_MODE_32)

            # map memory at the imagebase and copy each section
            # data to it's virtualaddress
            emu.mem_map(imagebase, imagesize + stacksize)
            for section in pe.sections:
                emu.mem_write(imagebase + section.VirtualAddress, section.get_data())

            # initialize stack registers ebp and esp
            emu.reg_write(UC_X86_REG_ESP, stackaddress + stacksize)
            emu.reg_write(UC_X86_REG_EBP, stackaddress + stacksize)

            # start emulator
            emu.emu_start(virtualaddress, virtualaddress + code_len)

            # use the largest stack_offset value to define the min.
            # ammount of stack data to read
            ebp_addr = stackaddress + stacksize - stack_offsets[0]

            # read stack memory, largest stack_offset as size
            data = emu.mem_read(ebp_addr, stack_offsets[0])

            # locate and print strings
            print "offset   type   length   content"
            print 32 * "="
            for offset in stack_offsets:
                s, t = get_string(data, offset)
                if s:
                    print "%06x   %s  %s   %s" % (offset, str_type(t), str(len(s)).rjust(6,' '), s)

        except UcError as e:
            print "Error, %s" % (e)
            return

# -------------------------------------------------------------------------------------------
if __name__ == '__main__':
    decrypt_string(sys.argv[1], sys.argv[2])