Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import random, re, sys, csv, time
- from collections import deque
- order = 2
- source = "tweets.csv" # input text
- prefix = deque([" "] * order) # current prefix
- suffixes = {} # a dictionary maps prefix to a list of following strings
- def add(s):
- key = '@!@'.join(prefix)
- if key in suffixes:
- suffixes[key].append(s)
- else:
- suffixes[key] = [ s ]
- prefix.popleft()
- prefix.append(s) # update
- def gen(n):
- prefix = deque([" "] * order) # clear prefix
- result = ''
- random.seed(time.time())
- # prime with some randomness
- for i in range(n):
- prefix.append(random.choice( suffixes['@!@'.join(prefix)] ))
- prefix.popleft()
- while len(result) < n:
- choice = random.choice( suffixes['@!@'.join(prefix)] )
- if choice == " ":
- break
- if len(result) + len(choice) > n - 1:
- break
- result = result + choice + ' '
- prefix.popleft()
- prefix.append(choice)
- return result
- if __name__ == "__main__":
- # read and treat input file
- if len(sys.argv) > 1:
- source = sys.argv[1]
- inp = ""
- with open(source, 'rb') as csvfile:
- rows = csv.DictReader(csvfile)
- for row in rows:
- if row['in_reply_to_status_id'] == '':
- inp = inp + row['text'] + ' '
- inp = re.sub('@\S+', '', inp) # remove usernames.
- inp = re.sub('http:\S+', '', inp) # remove links.
- for s in inp.split():
- add(s) # build Markov chain
- add(" ")
- for i in range(1, 10):
- result = '#makyo_ebooks'
- result = gen(140 - len(result)) + result
- print result
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement