Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: <encoding name> -*-
- from __future__ import print_function
- from collections import defaultdict
- from math import log
- import codecs
- from ctypes import c_uint64 as c_trie_node_type
- import sys
- """ Pseudocode from Maly, 1976 Appendix B
- Maly, K. Compressed tries. Commun. ACM 19, 7, 409-415. [1]
- """
- def init_c_trie(num_keys, num_letters):
- c_field_len = 1 + int(log(num_keys)/log(2))
- min_trie_node_size = 2 + c_field_len + num_letters
- if min_trie_node_size > 64:
- raise RuntimeError("Sorry cannot currently handle min_trie_node_size > 64. Your set of strings needs size={}".format(min_trie_node_size))
- return tuple(c_trie_node_type() for i in range(num_keys))
- def _prep_input(keys):
- kl = list(keys)
- kl.sort()
- ls = set()
- for k in kl:
- ls.update(k)
- lsl = list(ls)
- lsl.sort()
- return kl, ''.join(lsl)
- def build_c_tree(keys):
- sorted_keys, all_letters = _prep_input(keys)
- num_letters = len(all_letters)
- curr_trie = init_c_trie(len(sorted_keys), num_letters)
- for key_index, key in enumerate(sorted_keys):
- pass
- print(curr_trie)
- def main(inp_file_name=None):
- if inp_file_name:
- with codecs.open(inp_file_name, 'rU', encoding='utf-8') as inp:
- keys = [line.strip() for line in inp if line.strip()]
- else:
- keys = ["A", "FOR", "IN", "THE",
- "AND", "FROM", "IS", "THIS",
- "ARE", "HAD", "IT", "TO",
- "AS", "HAVE", "NOT", "WAS",
- "AT", "HE", "OF", "WHICH",
- "BE", "HER", "ON", "WITH",
- "BUT", "HIS", "OR", "YOU",
- "BY", "I", "THAT",]
- build_c_tree(keys)
- if __name__ == '__main__':
- main(sys.argv[-1] if len(sys.argv) > 1 else None)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement