Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import re, collections
- cons = 'bcdfghjklmnpqrstvwxz'
- vow = 'aeiouy'
- def rm(clist, excluded):
- return ''.join([c for c in clist if c not in excluded])
- def get_names():
- names = collections.Counter()
- for i in range(1880, 2020):
- f = open('yob' + str(i) + '.txt')
- for line in f:
- fields = line.lower().strip().split(',')
- names[fields[0]] += int(fields[2])
- return names
- def get_syls(names, regexp):
- syls = [collections.Counter(), collections.Counter()]
- for name in names:
- name_syls = re.findall(regexp, name)
- for i, syl in enumerate(name_syls):
- pos = int(i == 0 and name.startswith(syl))
- syls[pos][syl] += names[name]
- return syls
- def results(syls):
- ranked = [[x[0] for x in syls[p].most_common()] for p in range(2)]
- scores = []
- for syl in syls[0]:
- if syl in syls[1]:
- rank = max([ranked[p].index(syl) for p in range(2)])
- scores.append([rank, syl])
- cv = [c+v for c in rm(cons, 'xqzw') for v in vow]
- return cv + [x[1] for x in sorted(scores) if len(x[1])==3][:(256-len(cv))]
- names = get_names()
- allowed_end = 'nrlc'
- allowed_prec = rm(cons, allowed_end)
- final = results(get_syls(names,
- '(?:[{0}]|[{2}][{0}]*)[{1}]+(?:[{3}](?=[{4}]|$))?'.format(cons, vow, allowed_prec, allowed_end, rm(cons, 'h'))
- ))
- print(final)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement