Advertisement
Guest User

Untitled

a guest
May 29th, 2016
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.27 KB | None | 0 0
  1. def load_data_and_labels():
  2. """
  3. Loads MR polarity data from files, splits the data into words and generates labels.
  4. Returns split sentences and labels.
  5. """
  6. # Load data from files
  7.  
  8.  
  9.  
  10.  
  11. with open('authors.json') as data_file:
  12. data = json.load(data_file)
  13.  
  14. # pprint(data)
  15.  
  16. list_examples = list()
  17.  
  18. for d in data:
  19. temp_examples = list(open(
  20. "/home/pierluigi/PycharmProjects/Authors_Comparison/cnn-text-classification/data/rt-polaritydata/" + d[
  21. 'file_name']).readlines())
  22. temp_examples = [s.strip() for s in temp_examples]
  23. list_examples.append(temp_examples)
  24.  
  25. labels_lenght = len(list_examples)
  26.  
  27. list_labels = list()
  28. for d in data:
  29. i = 0
  30. temp_label = []
  31. while i < labels_lenght: # inzializzi la label temporanea con tutti 0
  32. temp_label.append(0)
  33. i = i + 1
  34. temp_label[d['index']] = 1
  35. list_labels.append(temp_label)
  36.  
  37. for l in list_labels:
  38. print l
  39.  
  40. x_text = []
  41. for exa in list_examples:
  42. x_text = x_text + exa
  43. x_text = [clean_str(sent) for sent in x_text]
  44. x_text = [s.split(" ") for s in x_text]
  45.  
  46. y = np.concatenate(list_labels, 0)
  47. return [x_text, y]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement