Hellerick_Ferlibay

List imitator.py

Aug 23rd, 2020 (edited)
1,618
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.74 KB | None | 0 0
  1. from random import choice
  2. from statistics import mean, stdev
  3. filein = open(r'/home/hellerick/Documents/Data/Geonames/Italian populated places.txt', mode='rU', encoding="utf-8")
  4. # The list should be a text file with a name in each line, and with no blank line in the end
  5. necessary_length = 10000
  6. data = filein.read().split('\n')
  7. # for Python 2:
  8. # data = filein.read().decode('utf8').split('\n')
  9. # encoding="utf-8"
  10.  
  11. #data = [i.lower() for i in data]
  12. data = [i for i in data]
  13. filein.close()
  14. if data[-1] == '':
  15.     data = data[:-1]
  16. lengths = [len(i) for i in data]
  17. mastermean = mean (lengths)
  18. masterdev = stdev (lengths)
  19. print (mastermean, masterdev)
  20. stat = dict([])
  21. for w in data:
  22.     for i in range(-1,len(w)+1):
  23.         if i == -1:
  24.             prv = '\n'
  25.             cur = '\n'
  26.             nxt = w[0]
  27.         elif i == len(w):
  28.             prv = w[-1]
  29.             cur = '\n'
  30.             nxt = '\n'
  31.         else:
  32.             cur = w[i]
  33.             if i == 0:
  34.                 prv = '\n'
  35.             else:
  36.                 prv = w[i-1]
  37.             if i == len(w)-1:
  38.                 nxt = '\n'
  39.             else:
  40.                 nxt = w[i+1]
  41.         if prv+cur in stat:
  42.             if nxt in stat[prv+cur]:
  43.                 stat[prv+cur][nxt] += 1
  44.             else:
  45.                 stat[prv+cur][nxt] = 1
  46.         else:
  47.             stat[prv+cur] = {nxt:1}
  48. #for i in stat:
  49. #    print (i, stat[i])
  50. result = set()
  51. while len(result)<necessary_length:
  52.     s = '\n\n'
  53.     genname = s
  54.     while True:
  55.         nextchars = []
  56.         for i in stat[s]:
  57.             nextchars = nextchars + [i]*stat[s][i]
  58.         chosenchar = choice(nextchars)
  59.         genname = genname + chosenchar
  60.         s = s[1] + chosenchar
  61.         #print ()
  62.         if s == '\n\n':
  63.             break
  64.     #print (',',genname[2:-2],',')
  65.     genname = genname[2:-2]
  66.     if len(result) < 10:
  67.         result= result | {genname}
  68.     else:
  69.         curlen = [len(i) for i in result]
  70.         curdev = stdev(curlen)
  71.         curmean = mean(curlen)
  72.         attres = result | {genname}
  73.         attlen = [len(i) for i in attres]
  74.         attdev = stdev(attlen)
  75.         attmean = mean(attlen)
  76.         if ((abs(curmean-mastermean)-abs(attmean-mastermean)) + (abs(curdev-masterdev)-abs(attdev-masterdev)) > 0 and (abs(attmean-mastermean)<0.5 and abs(attdev-masterdev)<0.1)==False ) or (abs(attmean-mastermean)<0.5 and abs(attdev-masterdev)<0.1):
  77.             result = attres
  78.             print (genname)
  79.  
  80. curlen = [len(i) for i in result]
  81. curdev = stdev(curlen)
  82. curmean = mean(curlen)
  83. print (curmean, curdev)
  84.  
  85. with open('/home/hellerick/Documents/Data/Geonames/Italian populated places - Imitation.txt', mode='wt', encoding="utf-8") as f:
  86.     for r in result:
  87.         f.write(r+'\n')
  88.  
  89.  
Advertisement
Add Comment
Please, Sign In to add comment