Hellerick_Ferlibay

NameImitator.py

Aug 10th, 2014
1,155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.37 KB | None | 0 0
  1. from random import choice
  2. from statistics import mean, stdev
  3. filein = open('NorwayNames.txt', mode='rU', encoding="utf-8")
  4. data = filein.read().split('\n')
  5. # for Python 2:
  6. # data = filein.read().decode('utf8').split('\n')
  7. # encoding="utf-8"
  8. data = [i.lower() for i in data]
  9. filein.close()
  10. if data[-1] == '':
  11.     data = data[:-1]
  12. lengths = [len(i) for i in data]
  13. mastermean = mean (lengths)
  14. masterdev = stdev (lengths)
  15. print (mastermean, masterdev)
  16. stat = dict([])
  17. for w in data:
  18.     for i in range(-1,len(w)+1):
  19.         if i == -1:
  20.             prv = '\n'
  21.             cur = '\n'
  22.             nxt = w[0]
  23.         elif i == len(w):
  24.             prv = w[-1]
  25.             cur = '\n'
  26.             nxt = '\n'
  27.         else:
  28.             cur = w[i]
  29.             if i == 0:
  30.                 prv = '\n'
  31.             else:
  32.                 prv = w[i-1]
  33.             if i == len(w)-1:
  34.                 nxt = '\n'
  35.             else:
  36.                 nxt = w[i+1]
  37.         if prv+cur in stat:
  38.             if nxt in stat[prv+cur]:
  39.                 stat[prv+cur][nxt] += 1
  40.             else:
  41.                 stat[prv+cur][nxt] = 1
  42.         else:
  43.             stat[prv+cur] = {nxt:1}
  44. #for i in stat:
  45. #    print (i, stat[i])
  46. result = []
  47. while len(result)<100:
  48.     s = '\n\n'
  49.     genname = s
  50.     while True:
  51.         nextchars = []
  52.         for i in stat[s]:
  53.             nextchars = nextchars + [i]*stat[s][i]
  54.         chosenchar = choice(nextchars)
  55.         genname = genname + chosenchar
  56.         s = s[1] + chosenchar
  57.         #print ()
  58.         if s == '\n\n':
  59.             break
  60.     #print (',',genname[2:-2],',')
  61.     genname = genname[2:-2]
  62.     if len(result) < 10:
  63.         result= result + [genname]
  64.     else:
  65.         curlen = [len(i) for i in result]
  66.         curdev = stdev(curlen)
  67.         curmean = mean(curlen)
  68.         attres = result + [genname]
  69.         attlen = [len(i) for i in attres]
  70.         attdev = stdev(attlen)
  71.         attmean = mean(attlen)
  72.         if ((abs(curmean-mastermean)-abs(attmean-mastermean)) + (abs(curdev-masterdev)-abs(attdev-masterdev)) > 0 and (abs(attmean-mastermean)<0.5 and abs(attdev-masterdev)<0.1)==False ) or (abs(attmean-mastermean)<0.5 and abs(attdev-masterdev)<0.1):
  73.             result = attres
  74.             print (genname)
  75.  
  76. result = result[10:]
  77. curlen = [len(i) for i in result]
  78. curdev = stdev(curlen)
  79. curmean = mean(curlen)
  80. print (curmean, curdev)
Advertisement
Add Comment
Please, Sign In to add comment