Advertisement
here2share

# corpus_gen_for_my_unique_ML_algorithm.py

May 29th, 2022
878
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.45 KB | None | 0 0
  1. # corpus_gen_for_my_unique_ML_algorithm.py
  2.  
  3. from itertools import combinations, permutations
  4. from random import randint as rndi, sample, choice, shuffle
  5. import webbrowser
  6. import tempfile
  7.  
  8. try:
  9.     # Python2
  10.     from Tkinter import *
  11.     from urllib2 import urlopen
  12. except ImportError:
  13.     # Python3
  14.     from tkinter import *
  15.     from urllib.request import urlopen
  16.    
  17. srcfilename=tempfile.mktemp(".txt", "demo_") # .html
  18.  
  19. web="""<html>
  20. <head></head>
  21. <body>@</body>
  22. </html>"""
  23.  
  24. def index_permu(alist, index):
  25.     alist = alist[:]
  26.     for i in range(len(alist)-1):
  27.         index, j = divmod(index, len(alist)-i)
  28.         alist[i], alist[i+j] = alist[i+j], alist[i]
  29.     return alist
  30.  
  31. def back():
  32.     anew()
  33.     r = rndi(0,12**12)
  34.     t = index_permu(list('cdghjkmnopqrsv'), r)
  35.     while aaa:
  36.         aaa.pop()
  37.     aaa.extend(t[:12])
  38.  
  39. def seq(zzz, eee=[]):
  40.     back()
  41.     if eee:
  42.         for e in eee:
  43.             k,v = e
  44.             if k == vvv:
  45.                 t = choice(arr)
  46.                 arr.remove(t)
  47.                 aaa[t] = choice(list(v))
  48.             else:
  49.                 arr.remove(k)
  50.                 if v:
  51.                     aaa[k] = choice(v)
  52.    
  53.     for z in zzz:
  54.         z = ttt[z]
  55.         a = z[rndi(0,len(z)-1)]
  56.         sss = list(a)
  57.         nnn = sample(arr, len(a))
  58.         nnn.sort()
  59.         for t in nnn:
  60.             arr.remove(t)
  61.             aaa[t] = sss.pop(0)
  62.    
  63.     t = str([yn, ''.join(aaa), ''.join(at)])
  64.     if t in ccc:
  65.         seq(zzz,eee)
  66.     else:
  67.         ccc.append(t) ### at
  68.         print ccc[-1]
  69.  
  70. def anew():
  71.     while arr:
  72.         arr.pop()
  73.     arr.extend(list(range(12)))
  74.    
  75. aaa = []
  76. arr = []
  77. ccc = []
  78. ttt = {}
  79.  
  80. at = ['']
  81.  
  82. ttt[0] = [''.join(z) for z in list(permutations('beau', 4))]
  83. ttt[1] = [''.join(z) for z in list(permutations('beautiful', 9))]
  84. ttt[2] = sum([[''.join(z) for z in list(permutations('beautiful', i))] for i in [1,2,3,5,6,7,8]], [])
  85. ttt[3] = 'XX XY XZ YX YY YZ ZX ZY'.split()
  86. ttt[4] = 'XYX XZX YXY YZY ZXZ ZYZ ZZZ XYZ'.split()
  87. ttt[5] = 'Y Z ZZ XXX XXY XXZ XYY XZY XZZ YXX YXZ YYX YYY YYZ YZX YZZ ZXX ZXY ZYX ZYY ZZX ZZY'.split() + ['']
  88.  
  89. vvv = 'beautiful'
  90.  
  91. iii = 200000
  92. while len(ccc) < iii:
  93.     yn = 1
  94.    
  95.     seq([0,3])
  96.    
  97.     seq([0,4],[[9,'X']])
  98.    
  99.     seq([0,5],[[9,'']])
  100.    
  101.     seq([1,3])
  102.    
  103.     seq([2,3])
  104.    
  105.     seq([2,4])
  106.    
  107.     ### ------------
  108.  
  109.     yn = 0
  110.    
  111.     seq([0,3],[[vvv,'be']])
  112.    
  113.     seq([0,4],[[9,'']])
  114.    
  115.     seq([0,5],[[9,'X']])
  116.    
  117.     seq([1],[[vvv,'be']])
  118.    
  119.     seq([1,5])
  120.  
  121.     seq([2,5])
  122.    
  123. shuffle(ccc)
  124. ccc = ['# yn -- combos -- at'] + ccc
  125.    
  126. urls='\n'.join(ccc) # <br>
  127. temp=open(srcfilename, 'w')
  128. temp.write(urls) # web.replace('@',urls)
  129. temp.close()
  130. webbrowser.open_new_tab(srcfilename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement