Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def load_data_and_labels():
- """
- Loads MR polarity data from files, splits the data into words and generates labels.
- Returns split sentences and labels.
- """
- # Load data from files
- with open('authors.json') as data_file:
- data = json.load(data_file)
- # pprint(data)
- list_examples = list()
- for d in data:
- temp_examples = list(open(
- "/home/pierluigi/PycharmProjects/Authors_Comparison/cnn-text-classification/data/rt-polaritydata/" + d[
- 'file_name']).readlines())
- temp_examples = [s.strip() for s in temp_examples]
- list_examples.append(temp_examples)
- labels_lenght = len(list_examples)
- list_labels = list()
- for d in data:
- i = 0
- temp_label = []
- while i < labels_lenght: # inzializzi la label temporanea con tutti 0
- temp_label.append(0)
- i = i + 1
- temp_label[d['index']] = 1
- list_labels.append(temp_label)
- for l in list_labels:
- print l
- x_text = []
- for exa in list_examples:
- x_text = x_text + exa
- x_text = [clean_str(sent) for sent in x_text]
- x_text = [s.split(" ") for s in x_text]
- y = np.concatenate(list_labels, 0)
- return [x_text, y]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement