Advertisement
Guest User

Untitled

a guest
Sep 18th, 2020
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.33 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import re, collections
  3.  
  4. cons = 'bcdfghjklmnpqrstvwxz'
  5. vow = 'aeiouy'
  6.  
  7. def rm(clist, excluded):
  8.   return ''.join([c for c in clist if c not in excluded])
  9.  
  10. def get_names():
  11.   names = collections.Counter()
  12.   for i in range(1880, 2020):
  13.     f = open('yob' + str(i) + '.txt')
  14.     for line in f:
  15.       fields = line.lower().strip().split(',')
  16.       names[fields[0]] += int(fields[2])
  17.   return names
  18.  
  19. def get_syls(names, regexp):
  20.   syls = [collections.Counter(), collections.Counter()]
  21.   for name in names:
  22.     name_syls = re.findall(regexp, name)
  23.     for i, syl in enumerate(name_syls):
  24.       pos = int(i == 0 and name.startswith(syl))
  25.       syls[pos][syl] += names[name]
  26.   return syls
  27.  
  28. def results(syls):
  29.   ranked = [[x[0] for x in syls[p].most_common()] for p in range(2)]
  30.  
  31.   scores = []
  32.   for syl in syls[0]:
  33.     if syl in syls[1]:
  34.       rank = max([ranked[p].index(syl) for p in range(2)])
  35.       scores.append([rank, syl])
  36.  
  37.   cv = [c+v for c in rm(cons, 'xqzw') for v in vow]
  38.   return cv + [x[1] for x in sorted(scores) if len(x[1])==3][:(256-len(cv))]
  39.  
  40. names = get_names()
  41.  
  42. allowed_end = 'nrlc'
  43. allowed_prec = rm(cons, allowed_end)
  44.  
  45. final = results(get_syls(names,
  46.   '(?:[{0}]|[{2}][{0}]*)[{1}]+(?:[{3}](?=[{4}]|$))?'.format(cons, vow, allowed_prec, allowed_end, rm(cons, 'h'))
  47. ))
  48. print(final)
  49.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement