Advertisement
tpaper

Untitled

Feb 3rd, 2019
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.63 KB | None | 0 0
  1. import re, random, sys
  2. from pathlib import Path
  3. import pickle
  4.  
  5. def format_line(strin):
  6.     out = re.sub('[^a-zA-Z?!àùòèé\'\., ]+', ' ', strin)
  7.     out = re.sub('[ ]+', ' ', out)
  8.     if( out != "" and out != " " ):
  9.         return out
  10.     return None
  11.  
  12. def buildchain(messages, dim):
  13.     out = dict()
  14.     for message in messages:
  15.         x = message.split(' ')[:-1]
  16.         for i in range(0, len(x) - dim):
  17.             if tuple(x[i:i+dim]) in out:
  18.                 out[tuple(x[i:i+dim])].append(x[i+dim])
  19.             else:
  20.                 out[tuple(x[i:i+dim])] = [x[i+dim]]
  21.                
  22.     return out
  23.  
  24. def findnext(chains, text):
  25.     out = ''
  26.     words = text.split(' ')
  27.     if(text == ''):
  28.         out = random.choice(chains[0])
  29.     else:
  30.         for i in range(len(chains)-1, 0, -1):
  31.             if i <= len(words):
  32.                 #print(words[-i:])
  33.                 if tuple(words[-i:]) in chains[i]:
  34.                     out = random.choice(chains[i][tuple(words[-i:])])
  35.                     break;
  36.                
  37.     if out == '':
  38.         out = random.choice(chains[0])
  39.         #print("fallback")
  40.    
  41.     return out
  42.                
  43.    
  44.  
  45. def main():
  46.     myDb = Path("markov.db")
  47.    
  48.     if(myDb.is_file()):
  49.         with open('markov.db', 'rb') as i_f:
  50.             chains = pickle.load(i_f)
  51.     else:
  52.         print("regenerating database...")
  53.         chains = [[]]
  54.         with open('intext') as f:
  55.             content = f.readlines()
  56.            
  57.         for a in range(0,len(content)):
  58.             content[a] = format_line(content[a])
  59.             if(content[a]):
  60.                 for i in content[a].split(' '):
  61.                     if(i != ''):
  62.                         chains[0].append(i)
  63.                
  64.         content = [x.lower() for x in content if x] #remove empty lines
  65.            
  66.         chains.append(buildchain(content, 1))
  67.         chains.append(buildchain(content, 2))
  68.         chains.append(buildchain(content, 3))
  69.         chains.append(buildchain(content, 4))
  70.         chains.append(buildchain(content, 5))
  71.        
  72.         with open('markov.db', 'wb') as o_f:
  73.             pickle.dump(chains, o_f, pickle.HIGHEST_PROTOCOL)
  74.         print("done")
  75.    
  76.     if(len(sys.argv) > 1):
  77.         try:
  78.             dim = int(sys.argv[1])
  79.         except:
  80.             dim = 20
  81.         if( dim > 100 or dim < 1 ):
  82.             dim = 20
  83.     else:
  84.         dim = 20
  85.            
  86.     if(len(sys.argv) > 2):
  87.         text = ' '.join(sys.argv[2:])
  88.     else:
  89.         text = ''
  90.    
  91.     for i in range(0,dim):
  92.         text += ' ' + findnext(chains,text)
  93.        
  94.     print(text)
  95.        
  96. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement