Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import re
- def get_syllables(words):
- syls = [{}, {}]
- for word in words:
- word_syls = re.findall('[bcdfghjklmnpqrstvwxz]+[aeiouy]+', word)
- for i, syl in enumerate(word_syls):
- pos = int(i == 0 and word.startswith(syl))
- if syl not in syls[pos]:
- syls[pos][syl] = 0
- syls[pos][syl] += words[word]
- return syls
- def get_ranked(syls):
- return sorted(syls.keys(), key=lambda s: -syls[s])
- names = {}
- for i in range(1880, 2020):
- f = open('ssa-baby-names/yob' + str(i) + '.txt')
- for line in f:
- fields = line.lower().strip().split(',')
- if fields[0] not in names:
- names[fields[0]] = 0
- names[fields[0]] = names[fields[0]] + int(fields[2])
- words = {}
- f = open('moby-dick/15-0.txt')
- enabled = False
- for line in f:
- if line.startswith('CHAPTER I.'):
- enabled = True
- if line.startswith('End of the Project Gutenberg EBook'):
- enabled = False
- if enabled:
- line_words = re.findall('[A-Za-z][a-z]*', line)
- for word in line_words:
- word = word.lower()
- if word not in words:
- words[word] = 0
- words[word] += 1
- syls = get_syllables(names) + get_syllables(words)
- ranked = [get_ranked(x) for x in syls]
- scores = []
- for syl in syls[0]:
- if all([syl in x for x in syls]):
- rank = max([x.index(syl) for x in ranked])
- scores.append([rank, syl])
- cv = [c+v for c in 'bcdfghjklmnprstvwz' for v in 'aeiouy']
- final = cv + [x[1] for x in sorted(scores) if len(x[1])==3][:(256-len(cv))]
- print(final)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement