Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import subprocess as sub
- import d2vec_kmeans as d2v
- import numpy as np
- # pull in text table
- raw = sub.Popen(['cat', '/Users/Nick/Desktop/dtas/ContagiousIndex/DOC2VEC/DATA/rawtextfinal.txt'], stdout=sub.PIPE)
- raw = raw.communicate()[0].split('\n')
- raw = [tuple(ele.split('\t')) for ele in raw if ele != '' and len(ele.split('\t')) == 5]
- # store text
- raw_text = list(zip(*raw))[4]
- raw_text
- ids = list(zip(*raw))[0]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement