Advertisement
Guest User

Untitled

a guest
Aug 20th, 2019
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.35 KB | None | 0 0
  1. import re, random, sys
  2.  
  3. def no_empty(lst):
  4. return filter(bool, lst)
  5.  
  6. def map_filter(f, lst):
  7. return list(no_empty(map(f, lst)))
  8.  
  9. def clean_word(s):
  10. return s.lower().strip(',;«»-"()')
  11.  
  12. def split_sentence(s):
  13. return map_filter(clean_word, re.split('\s+', s.strip()))
  14.  
  15. fin = open('tsar.txt', 'r', encoding="utf8")
  16. data = map_filter(split_sentence, re.split('[\.\?]\s+|\n+', fin.read()))
  17. fin.close()
  18.  
  19. key = {}
  20.  
  21. for sentence in data:
  22. length = len(sentence)
  23. for idx, word in enumerate(sentence):
  24. if not word in key: key[word] = []
  25. key[word].append('<END>' if (idx + 1 == length) else sentence[idx + 1])
  26.  
  27. def generate_aux(key, word, res):
  28. if word in key:
  29. next_word = random.choice(key[word])
  30. if next_word == '<END>':
  31. return res
  32. else:
  33. return generate_aux(key, next_word, res + [next_word])
  34. else:
  35. return res
  36.  
  37. def get_random_word():
  38. return random.choice(list(key.keys()))
  39.  
  40. def get_start():
  41. return get_random_word() if len(sys.argv) < 2 else sys.argv[1]
  42.  
  43. def generate(key, first_word):
  44. return " ".join(generate_aux(key, first_word, [first_word]))
  45.  
  46. def generate_at_least(key, N):
  47. res = ""
  48. while len(res) < N: res = generate(key, get_start())
  49. return res
  50.  
  51. for _ in range(15):
  52. print(generate_at_least(key, 30))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement