Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re, random, sys
- from pathlib import Path
- import pickle
- def format_line(strin):
- out = re.sub('[^a-zA-Z?!àùòèé\'\., ]+', ' ', strin)
- out = re.sub('[ ]+', ' ', out)
- if( out != "" and out != " " ):
- return out
- return None
- def buildchain(messages, dim):
- out = dict()
- for message in messages:
- x = message.split(' ')[:-1]
- for i in range(0, len(x) - dim):
- if tuple(x[i:i+dim]) in out:
- out[tuple(x[i:i+dim])].append(x[i+dim])
- else:
- out[tuple(x[i:i+dim])] = [x[i+dim]]
- return out
- def findnext(chains, text):
- out = ''
- words = text.split(' ')
- if(text == ''):
- out = random.choice(chains[0])
- else:
- for i in range(len(chains)-1, 0, -1):
- if i <= len(words):
- #print(words[-i:])
- if tuple(words[-i:]) in chains[i]:
- out = random.choice(chains[i][tuple(words[-i:])])
- break;
- if out == '':
- out = random.choice(chains[0])
- #print("fallback")
- return out
- def main():
- myDb = Path("markov.db")
- if(myDb.is_file()):
- with open('markov.db', 'rb') as i_f:
- chains = pickle.load(i_f)
- else:
- print("regenerating database...")
- chains = [[]]
- with open('intext') as f:
- content = f.readlines()
- for a in range(0,len(content)):
- content[a] = format_line(content[a])
- if(content[a]):
- for i in content[a].split(' '):
- if(i != ''):
- chains[0].append(i)
- content = [x.lower() for x in content if x] #remove empty lines
- chains.append(buildchain(content, 1))
- chains.append(buildchain(content, 2))
- chains.append(buildchain(content, 3))
- chains.append(buildchain(content, 4))
- chains.append(buildchain(content, 5))
- with open('markov.db', 'wb') as o_f:
- pickle.dump(chains, o_f, pickle.HIGHEST_PROTOCOL)
- print("done")
- if(len(sys.argv) > 1):
- try:
- dim = int(sys.argv[1])
- except:
- dim = 20
- if( dim > 100 or dim < 1 ):
- dim = 20
- else:
- dim = 20
- if(len(sys.argv) > 2):
- text = ' '.join(sys.argv[2:])
- else:
- text = ''
- for i in range(0,dim):
- text += ' ' + findnext(chains,text)
- print(text)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement