Advertisement
Guest User

Untitled

a guest
Aug 4th, 2017
251
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.17 KB | None | 0 0
  1. #!/usr/bin/python3.5
  2.  
  3. import markovify
  4. import os
  5. import nltk
  6. from nltk.corpus import gutenberg, webtext, nps_chat, brown, reuters, twitter_samples, movie_reviews, genesis
  7.  
  8. class WordChain(markovify.Text):
  9.     def sentence_split(self, text):
  10.         return text
  11.  
  12.     def word_split(self, sentence):
  13.         return list(sentence)
  14.  
  15.     def word_join(self, sentence):
  16.         return ''.join(sentence)
  17.  
  18. if not os.path.exists('model.json'):
  19.     all_words = []
  20.     for corpus in [gutenberg, webtext, nps_chat, brown, reuters, genesis, movie_reviews]:
  21.         all_words.extend(corpus.words())
  22.    
  23.     flatten = lambda l: [item for sublist in l for item in sublist]
  24.     all_words.extend([word for word in flatten([tweet.split() for tweet in twitter_samples.strings()]) if not word.startswith('@')])
  25.    
  26.     print('Done adding words')
  27.  
  28.     model = WordChain(all_words, state_size=3)
  29.     model_json = model.to_json()
  30.     with open('model.json', 'w') as f:
  31.         f.write(model_json)
  32. else:
  33.     with open('model.json', 'r') as f:
  34.         model = WordChain.from_json(f.read())
  35.  
  36. print('Done making model')
  37.  
  38. for i in range(50):
  39.     print(model.make_sentence(tries=100))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement