class Signature_Element(object): """docstring for sub_seq_factory""" def __init__(self): self.byte_seq_reference = None self.subseq_min_frag_len = None self.subseq_position = None self.subseq_max_offset= None self.subseq_min_offset = None self.sequence = None self.default_shift = None def do_grep_grammer_convert(self,sequence): """converts the sigfile notation for masks e.g. ... to DROID e.g {3}""" grep_mask_length = sequence.count(".") sequence = re.sub ("\.+"," {} ",sequence) grep_insert = "{%s}" % (str(grep_mask_length)) sequence = sequence.replace("{}", grep_insert) return sequence def do_fragmenter(self,sequence): string = 1 token = 0 change_next_loop = 0 reserve_start_list = ["(","[","{","*","?"] reserve_stop_list = [")","]","}","*","?"] seq_word, token_word = [],[] seq_dict = {} sequence = sequence.replace(" ","") last_item = len(sequence) sequence_fragment_counter = 1 seq_written_marker = 1 for i, nibble in enumerate(sequence): print sequence_fragment_counter, nibble #token_word print string, token, change_next_loop ## stream switcher ## if seq_written_marker != sequence_fragment_counter: if string == 1: label = "string::" else: label = "token::" seq_dict[sequence_fragment_counter] = label+token_word seq_written_marker += 1 if nibble in reserve_start_list and change_next_loop == 1: sequence_fragment_counter += 1 token_word = "".join(token_word) seq_dict[sequence_fragment_counter] = "token::"+token_word token_word=[] if i == (last_item-1): if string == 1: seq_word.append(nibble) else: token_word.append(nibble) token_word = "".join(token_word) if token_word != "": seq_dict[sequence_fragment_counter] = "token::"+token_word seq_word = "".join(seq_word) if seq_word != "": seq_dict[sequence_fragment_counter] = "string::"+seq_word else: if change_next_loop == 1 and nibble not in reserve_start_list: string = 1 token = 0 change_next_loop = 0 elif nibble in reserve_start_list: string = 0 token = 1 change_next_loop = 0 if nibble in reserve_stop_list: change_next_loop = 1 ## split stream maker ## if string == 1: seq_word.append(nibble) if token_word != []: token_word = "".join(token_word) seq_dict[sequence_fragment_counter] = "token::"+token_word sequence_fragment_counter += 1 token_word = [] else: token_word.append(nibble) if seq_word != []: seq_word = "".join(seq_word) seq_dict[sequence_fragment_counter] = "string::"+seq_word sequence_fragment_counter += 1 seq_word = [] print seq_dict def main(): sig= Signature_Element() sig.sequence = "524946(46|58){4}434452367672736E" sig.offset = "0" sig.sequence = sig.do_grep_grammer_convert(sig.sequence) sig.sequence = sig.do_fragmenter(sig.sequence) if __name__ == '__main__': main()