Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Importing the libraries
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- np.set_printoptions(threshold=np.nan)
- # Importing the dataset
- dataset = pd.read_csv("Data.csv")
- X = dataset.iloc[:, :-1].values
- y = dataset.iloc[:, 3].values
- # Taking care of missing data
- from sklearn.preprocessing import Imputer
- imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
- imputer = imputer.fit(X[:, 1:3])
- X[:, 1:3] = imputer.transform(X[:, 1:3])
- # Encoding categorical data
- from sklearn.preprocessing import LabelEncoder, OneHotEncoder
- le = LabelEncoder()
- X[:, 0] = le.fit_transform(X[:, 0])
- # Add One Hot Encoder(ohe)
- ohe = OneHotEncoder(categorical_features=[0])
- X = ohe.fit_transform(X).toarray()
- # Encoding categorical data in y
- le = LabelEncoder()
- y = le.fit_transform(y)
Advertisement
Add Comment
Please, Sign In to add comment