Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import shelve
- import string
- import sys
- words = shelve.open('word.db')
- class Word(object):
- def __init__(self, word):
- self.word = word
- self.relations = {}
- def __eq__(self, other):
- return self.word == other.word
- def __ne__(self, other):
- return not(self == other)
- def set(self, word):
- if word in self.relations:
- self.relations[word] += 1
- else:
- self.relations[word] = 1
- print '%s : %s + 1 = %d' % (self.word, word, self.relations[word])
- def get(self, n):
- return sorted((list(x)[::-1] for x in self.relations.iteritems()))[::-1][:n]
- def insert(word, succ):
- if word in words:
- w = words[word]
- w.set(succ)
- else:
- w = Word(word)
- w.set(succ)
- words[word] = w
- def noise_reduct(sent):
- symbols = string.lowercase + ' '
- return ''.join([char for char in sent if char.lower() in symbols])
- def analyse(sent):
- sent = noise_reduct(sent).split()
- lens = range(len(sent) - 1)
- for x in lens:
- insert(sent[x].lower(), sent[x + 1].lower())
- def read(path):
- text = open(path, 'r')
- sentences = text.read()
- analyse(sentences)
- def generate(word, size =10):
- sent = []
- if word not in words:
- return
- for x in range(size):
- sent.append(word)
- index = 0
- while True:
- word = random.choice(words[word].get(10))[1]
- if word not in sent:
- break
- return ' '.join(sent)
- def run():
- while True:
- n = raw_input(': ')
- if n in words:
- for x in words[n].get(10):
- print x[1],
- print
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement