Advertisement
Guest User

Untitled

a guest
Sep 17th, 2020
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.52 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import re
  3.  
  4. def get_syllables(words):
  5.   syls = [{}, {}]
  6.   for word in words:
  7.     word_syls = re.findall('[bcdfghjklmnpqrstvwxz]+[aeiouy]+', word)
  8.     for i, syl in enumerate(word_syls):
  9.       pos = int(i == 0 and word.startswith(syl))
  10.       if syl not in syls[pos]:
  11.         syls[pos][syl] = 0
  12.       syls[pos][syl] += words[word]
  13.   return syls
  14.  
  15. def get_ranked(syls):
  16.   return sorted(syls.keys(), key=lambda s: -syls[s])
  17.  
  18. names = {}
  19. for i in range(1880, 2020):
  20.   f = open('ssa-baby-names/yob' + str(i) + '.txt')
  21.   for line in f:
  22.     fields = line.lower().strip().split(',')
  23.     if fields[0] not in names:
  24.       names[fields[0]] = 0
  25.     names[fields[0]] = names[fields[0]] + int(fields[2])
  26.  
  27. words = {}
  28. f = open('moby-dick/15-0.txt')
  29. enabled = False
  30. for line in f:
  31.   if line.startswith('CHAPTER I.'):
  32.     enabled = True
  33.   if line.startswith('End of the Project Gutenberg EBook'):
  34.     enabled = False
  35.   if enabled:
  36.     line_words = re.findall('[A-Za-z][a-z]*', line)
  37.     for word in line_words:
  38.       word = word.lower()
  39.       if word not in words:
  40.         words[word] = 0
  41.       words[word] += 1
  42.  
  43. syls = get_syllables(names) + get_syllables(words)
  44. ranked = [get_ranked(x) for x in syls]
  45.  
  46. scores = []
  47. for syl in syls[0]:
  48.   if all([syl in x for x in syls]):
  49.     rank = max([x.index(syl) for x in ranked])
  50.     scores.append([rank, syl])
  51.  
  52. cv = [c+v for c in 'bcdfghjklmnprstvwz' for v in 'aeiouy']
  53.  
  54. final = cv + [x[1] for x in sorted(scores) if len(x[1])==3][:(256-len(cv))]
  55.  
  56. print(final)
  57.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement