Advertisement
grist

Markov Chain Text Generator

Sep 25th, 2015
400
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.14 KB | None | 0 0
  1. #!/usr/bin/env python
  2. from __future__ import division
  3. __doc__ = '''
  4.  
  5. A Markov Text generator. Based on shaney.py by Greg McFarlane
  6.  
  7. 22 Sep 2015 - Initial writing. Basic generator, takes a corpus of text and
  8.    generates a given number of Markov sentences from it.
  9. 29 Sep 2015 - Refactored slightly and added analytics.
  10. '''
  11. __ver__ = 0.02
  12.  
  13.  
  14. import random
  15. from numpy import mean
  16.  
  17. #-------------------------------------------------------------------------------
  18. # Classes
  19. #-------------------------------------------------------------------------------
  20.  
  21.  
  22. #-------------------------------------------------------------------------------
  23. # Functions
  24. #-------------------------------------------------------------------------------
  25. def choice(words):
  26.    ''' Randomly chose a word. Assumes words is non-empty
  27.   '''
  28.    return random.choice(words)
  29.  
  30.  
  31. def get_words(file_name):
  32.     ''' Get all the words in a file and return them as a list
  33.    '''
  34.     file = open(file_name, 'r')
  35.     text = file.read()
  36.     file.close()
  37.     return text.split()
  38.  
  39.  
  40. def make_dictionaries(words):
  41.     ''' Make the dictionary of Markov chains and sentence endings.
  42.    '''
  43.     sentence_ends = []
  44.     markov_dict = {}
  45.     prev1 = prev2 = ''
  46.  
  47.     # generate a dictionary of all the word pairs and their possible next words.
  48.     for word in words:
  49.         if prev1 != '' and prev2 != '':
  50.             key = (prev2, prev1)
  51.             if markov_dict.has_key(key):
  52.                 markov_dict[key].append(word)
  53.             else:
  54.                 markov_dict[key] = [word]
  55.                 if prev1[-1:] in ['.', '?', '!']:
  56.                     sentence_ends.append(key)
  57.         prev2 = prev1
  58.         prev1 = word
  59.  
  60.     if sentence_ends == []:
  61.         print 'Sorry, there are no sentences in the text.'
  62.         return {}
  63.     return markov_dict, sentence_ends
  64.  
  65.  
  66. def generate_text(markov_dict, sentence_ends, count = 10):
  67.     ''' Generate the Markov texts.
  68.    '''
  69.     sentences = [] # to hold the generated sentences
  70.  
  71.     # Make the sentences
  72.     key = ()
  73.     sentence = ''
  74.     while 1:
  75.         if markov_dict.has_key(key):
  76.             word = choice(markov_dict[key])
  77.             sentence += "%s " % word
  78.             key = (key[1], word)
  79.             if key in sentence_ends:
  80.                 sentences.append(sentence)
  81.                 sentence = ''
  82.                 count -= 1
  83.                 key = choice(sentence_ends)
  84.                 if count <= 0:
  85.                     break
  86.         else:
  87.             key = choice(sentence_ends)
  88.  
  89.     return sentences
  90.  
  91.  
  92. def get_variability(markov_dict, sentence_ends):
  93.     ''' Check how well the chosen text will work for making different
  94.    sentences by counting how many variations each word pair has to
  95.    choose from.
  96.    '''
  97.     num_choices = len(markov_dict) # total number of triplet choices available
  98.     # average number of choices each word pair has
  99.     avg_choices = mean([len(l) for l in markov_dict.values()])
  100.     # word pair keys that only have 1 alternate third word
  101.     immutable_fragments = len([l for l in markov_dict.values() if len(l) == 1])
  102.     sen_end_count = len(sentence_ends)
  103.     print '''
  104.    %d total word pair choices found.
  105.    %d of these have only 1 possible third word.
  106.    %d have more than once choice. (%0.2f%%)
  107.    Average number of choices available is %0.4f.
  108.    %d possible sentence endings.\n\n''' % (
  109.         num_choices, immutable_fragments, num_choices - immutable_fragments,
  110.         (num_choices - immutable_fragments)/num_choices * 100, avg_choices, sen_end_count
  111.         )
  112.  
  113.  
  114. #-------------------------------------------------------------------------------
  115. # Code starts here
  116. #-------------------------------------------------------------------------------
  117.  
  118. #sentence_corpus = 'NumerologyTexts.txt'
  119. #sentence_corpus = 'Quote.txt'
  120. sentence_corpus = 'horoscopes.txt'
  121. count = 6 # how many sentences to generate
  122.  
  123. random.seed()
  124.  
  125. words = get_words(sentence_corpus)
  126. markov_dict, sentence_ends = make_dictionaries(words)
  127. get_variability(markov_dict, sentence_ends)
  128. sentences = generate_text(markov_dict, sentence_ends, count)
  129.  
  130. # display what we got
  131. print "\n".join(sentences)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement