Advertisement
Guest User

Untitled

a guest
May 29th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.26 KB | None | 0 0
  1. def load_data_and_labels():
  2. """
  3. Loads MR polarity data from files, splits the data into words and generates labels.
  4. Returns split sentences and labels.
  5. """
  6. # Load data from files
  7. with open('authors.json') as data_file:
  8. data = json.load(data_file)
  9.  
  10. # pprint(data)
  11.  
  12. list_examples = list()
  13.  
  14. for d in data:
  15. temp_examples = list(open(
  16. "/home/pierluigi/PycharmProjects/Authors_Comparison/cnn-text-classification/data/rt-polaritydata/" + d[
  17. 'file_name']).readlines())
  18. temp_examples = [s.strip() for s in temp_examples]
  19. list_examples.append(temp_examples)
  20.  
  21. labels_lenght = len(list_examples)
  22.  
  23. list_labels = list()
  24. for d in data:
  25. i = 0
  26. temp_label = []
  27. while i < labels_lenght: # inzializzi la label temporanea con tutti 0
  28. temp_label.append(0)
  29. i = i + 1
  30. temp_label[d['index']] = 1
  31. list_labels.append(temp_label)
  32.  
  33. for l in list_labels:
  34. print l
  35.  
  36. x_text = []
  37. for exa in list_examples:
  38. x_text = x_text + exa
  39. x_text = [clean_str(sent) for sent in x_text]
  40. x_text = [s.split(" ") for s in x_text]
  41.  
  42. y = np.concatenate(list_labels, 0)
  43. return [x_text, y]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement