Untitled

def load_data_and_labels():
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # Load data from files


    with open('authors.json') as data_file:
        data = json.load(data_file)

    # pprint(data)

    list_examples = list()

    for d in data:
        temp_examples = list(open(
            "/home/pierluigi/PycharmProjects/Authors_Comparison/cnn-text-classification/data/rt-polaritydata/" + d[
                'file_name']).readlines())
        temp_examples = [s.strip() for s in temp_examples]
        list_examples.append(temp_examples)

    labels_lenght = len(list_examples)

    list_labels = list()
    for d in data:
        i = 0
        temp_label = []
        while i < labels_lenght:  # inzializzi la label temporanea con tutti 0
            temp_label.append(0)
            i = i + 1
        temp_label[d['index']] = 1
        list_labels.append(temp_label)

    for l in list_labels:
        print l

    x_text = []
    for exa in list_examples:
        x_text = x_text + exa
    x_text = [clean_str(sent) for sent in x_text]
    x_text = [s.split(" ") for s in x_text]

    y = np.concatenate(list_labels, 0)
    return [x_text, y]