# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd np.set_printoptions(threshold=np.nan) # Importing the dataset dataset = pd.read_csv("Data.csv") X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 3].values # Taking care of missing data from sklearn.preprocessing import Imputer imputer = Imputer(missing_values='NaN', strategy='mean', axis=0) imputer = imputer.fit(X[:, 1:3]) X[:, 1:3] = imputer.transform(X[:, 1:3]) # Encoding categorical data from sklearn.preprocessing import LabelEncoder, OneHotEncoder le = LabelEncoder() X[:, 0] = le.fit_transform(X[:, 0]) # Add One Hot Encoder(ohe) ohe = OneHotEncoder(categorical_features=[0]) X = ohe.fit_transform(X).toarray() # Encoding categorical data in y le = LabelEncoder() y = le.fit_transform(y)