Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from __future__ import division
- __doc__ = '''
- A Markov Text generator. Based on shaney.py by Greg McFarlane
- 22 Sep 2015 - Initial writing. Basic generator, takes a corpus of text and
- generates a given number of Markov sentences from it.
- 29 Sep 2015 - Refactored slightly and added analytics.
- '''
- __ver__ = 0.02
- import random
- from numpy import mean
- #-------------------------------------------------------------------------------
- # Classes
- #-------------------------------------------------------------------------------
- #-------------------------------------------------------------------------------
- # Functions
- #-------------------------------------------------------------------------------
- def choice(words):
- ''' Randomly chose a word. Assumes words is non-empty
- '''
- return random.choice(words)
- def get_words(file_name):
- ''' Get all the words in a file and return them as a list
- '''
- file = open(file_name, 'r')
- text = file.read()
- file.close()
- return text.split()
- def make_dictionaries(words):
- ''' Make the dictionary of Markov chains and sentence endings.
- '''
- sentence_ends = []
- markov_dict = {}
- prev1 = prev2 = ''
- # generate a dictionary of all the word pairs and their possible next words.
- for word in words:
- if prev1 != '' and prev2 != '':
- key = (prev2, prev1)
- if markov_dict.has_key(key):
- markov_dict[key].append(word)
- else:
- markov_dict[key] = [word]
- if prev1[-1:] in ['.', '?', '!']:
- sentence_ends.append(key)
- prev2 = prev1
- prev1 = word
- if sentence_ends == []:
- print 'Sorry, there are no sentences in the text.'
- return {}
- return markov_dict, sentence_ends
- def generate_text(markov_dict, sentence_ends, count = 10):
- ''' Generate the Markov texts.
- '''
- sentences = [] # to hold the generated sentences
- # Make the sentences
- key = ()
- sentence = ''
- while 1:
- if markov_dict.has_key(key):
- word = choice(markov_dict[key])
- sentence += "%s " % word
- key = (key[1], word)
- if key in sentence_ends:
- sentences.append(sentence)
- sentence = ''
- count -= 1
- key = choice(sentence_ends)
- if count <= 0:
- break
- else:
- key = choice(sentence_ends)
- return sentences
- def get_variability(markov_dict, sentence_ends):
- ''' Check how well the chosen text will work for making different
- sentences by counting how many variations each word pair has to
- choose from.
- '''
- num_choices = len(markov_dict) # total number of triplet choices available
- # average number of choices each word pair has
- avg_choices = mean([len(l) for l in markov_dict.values()])
- # word pair keys that only have 1 alternate third word
- immutable_fragments = len([l for l in markov_dict.values() if len(l) == 1])
- sen_end_count = len(sentence_ends)
- print '''
- %d total word pair choices found.
- %d of these have only 1 possible third word.
- %d have more than once choice. (%0.2f%%)
- Average number of choices available is %0.4f.
- %d possible sentence endings.\n\n''' % (
- num_choices, immutable_fragments, num_choices - immutable_fragments,
- (num_choices - immutable_fragments)/num_choices * 100, avg_choices, sen_end_count
- )
- #-------------------------------------------------------------------------------
- # Code starts here
- #-------------------------------------------------------------------------------
- #sentence_corpus = 'NumerologyTexts.txt'
- #sentence_corpus = 'Quote.txt'
- sentence_corpus = 'horoscopes.txt'
- count = 6 # how many sentences to generate
- random.seed()
- words = get_words(sentence_corpus)
- markov_dict, sentence_ends = make_dictionaries(words)
- get_variability(markov_dict, sentence_ends)
- sentences = generate_text(markov_dict, sentence_ends, count)
- # display what we got
- print "\n".join(sentences)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement