Advertisement
Guest User

Untitled

a guest
Jul 21st, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.58 KB | None | 0 0
  1. path= r'C:\Users\vrozenbaum\Documents\Inter_POC\IACC\clust'
  2.  
  3. #tagging the files
  4. class DocIterator(object):
  5. def __init__(self, doc_list, labels_list):
  6. self.labels_list = labels_list
  7. self.doc_list = doc_list
  8.  
  9. def __iter__(self):
  10. for idx, doc in enumerate(self.doc_list):
  11. yield TaggedDocument(words=doc.split(), tags=[self.labels_list[idx]])
  12.  
  13. labels = [f for f in listdir(path) if f.endswith('-out.csv')]
  14. print(labels)
  15. data = []
  16. for doc in labels:
  17. data.append(open(join(path, doc), 'r',errors='ignore').read())
  18.  
  19. it = DocIterator(data, labels)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement