Advertisement
makyo

Crappy modified twitter archive markov

Feb 17th, 2014
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.67 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import random, re, sys, csv, time
  3. from collections import deque
  4.  
  5. order = 2
  6. source = "tweets.csv" # input text
  7. prefix = deque([" "] * order) # current prefix
  8. suffixes = {} # a dictionary maps prefix to a list of following strings
  9.  
  10. def add(s):
  11.     key = '@!@'.join(prefix)
  12.     if key in suffixes:
  13.         suffixes[key].append(s)
  14.     else:
  15.         suffixes[key] = [ s ]
  16.     prefix.popleft()
  17.     prefix.append(s) # update
  18.  
  19. def gen(n):
  20.     prefix = deque([" "] * order) # clear prefix
  21.     result = ''
  22.     random.seed(time.time())
  23.     # prime with some randomness
  24.     for i in range(n):
  25.         prefix.append(random.choice( suffixes['@!@'.join(prefix)] ))
  26.         prefix.popleft()
  27.     while len(result) < n:
  28.         choice = random.choice( suffixes['@!@'.join(prefix)] )
  29.         if choice == " ":
  30.             break
  31.         if len(result) + len(choice) > n - 1:
  32.             break
  33.         result = result + choice + ' '
  34.         prefix.popleft()
  35.         prefix.append(choice)
  36.     return result
  37.  
  38. if __name__ == "__main__":
  39.     # read and treat input file
  40.     if len(sys.argv) > 1:
  41.         source = sys.argv[1]
  42.     inp = ""
  43.     with open(source, 'rb') as csvfile:
  44.         rows = csv.DictReader(csvfile)
  45.         for row in rows:
  46.             if row['in_reply_to_status_id'] == '':
  47.                 inp = inp + row['text'] + ' '
  48.     inp = re.sub('@\S+', '', inp) # remove usernames.
  49.     inp = re.sub('http:\S+', '', inp) # remove links.
  50.  
  51.     for s in inp.split():
  52.         add(s) # build Markov chain
  53.     add(" ")
  54.     for i in range(1, 10):
  55.         result = '#makyo_ebooks'
  56.         result = gen(140 - len(result)) + result
  57.         print result
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement