Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import re
- names = {}
- for i in range(1880, 2020):
- f = open('yob' + str(i) + '.txt')
- for line in f:
- fields = line.lower().strip().split(',')
- if fields[0] not in names:
- names[fields[0]] = 0
- names[fields[0]] = names[fields[0]] + int(fields[2])
- syls = [{}, {}]
- for name in names:
- name_syls = re.findall('[bcdfghjklmnpqrstvwxz]+[aeiouy]+', name)
- for i, syl in enumerate(name_syls):
- pos = int(i == 0 and name.startswith(syl))
- if syl not in syls[pos]:
- syls[pos][syl] = 0
- syls[pos][syl] += names[name]
- ranked = [sorted(syls[p].keys(), key=lambda s: -syls[p][s]) for p in range(2)]
- scores = []
- for syl in syls[0]:
- if syl in syls[1]:
- rank = max([ranked[p].index(syl) for p in range(2)])
- scores.append([rank, syl])
- cv = [c+v for c in 'bcdfghjklmnprstvwxz' for v in 'aeiouy']
- final = cv + [x[1] for x in sorted(scores) if len(x[1])==3][:(256-len(cv))]
- print(final)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement