Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """Case-study #7 Генерация предложений
- Разработчики:
- Дарбаа Ч.Ю.
- """
- import random
- import re
- def text_generator(filename, n_senstences = 20):
- input_file = open(filename, 'r')
- text = input_file.read()
- text = text.replace("\n", " ")
- text = re.sub('[^a-zA-Zа-яА-Я" "0-9 !.,?]+', '', text)
- text = re.sub(' +', ' ', text)
- text = text.replace(' ?', '?').replace(' !', '!').replace(' .', '.').replace(' ,', ',')
- words = text.split()
- chain = {}
- for i in range(len(words) - 1):
- if words[i] in chain.keys():
- chain[words[i]].append(words[i + 1])
- else:
- chain[words[i]] = [words[i + 1]]
- capital = 'ЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮQWERTYUIOPASDFGHJKLZXCVBNM'
- start_words = [word for word in words if word[0] in capital]
- start_words = list(set(start_words)) #optional
- generated_text = ''
- sentence_end = '.!?'
- for i in range(n_senstences):
- chain_word = random.choice(start_words)
- generated_text += chain_word + ' '
- num_words = random.randint(4, 19)
- for j in range(num_words):
- if chain_word in chain.keys():
- chain_word = random.choice(chain[chain_word])
- else:
- chain_word = random.choice(words)
- generated_text += chain_word + ' '
- if generated_text[-2] == ',':
- generated_text = generated_text[:-2] + random.choice(sentence_end) + " "
- elif generated_text[-2] not in sentence_end:
- generated_text = generated_text[:-1] + random.choice(sentence_end) + " "
- return generated_text
- print(text_generator('text.txt', 5))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement