Untitled

import numpy as np
import pandas as pd

with open('./holographic.npz') as d:
	indices = d['arr_0']
	X_train = d['arr_1']
	X_val = d['arr_2']
	y_train = d['arr_3']
	y_val = d['arr_4']

"""
While loading data in the dataframe, some lines are incorrectly read, i.e., their tweet length is >140 since multiple tweets are read as single record. I have removed these records as:
"""
data = pd.read_csv("./datasets/train/SemEval2018-T3-train-taskA_emoji.txt", sep="\t")
data = data[data['Tweet text'].map(len)<=140]

# Now you can use the "indices" on the lists in this data dictionary.