Chans

source1

Dec 25th, 2017
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.81 KB | None | 0 0
  1. # Importing the libraries
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import pandas as pd
  5.  
  6. np.set_printoptions(threshold=np.nan)
  7.  
  8. # Importing the dataset
  9. dataset = pd.read_csv("Data.csv")
  10. X = dataset.iloc[:, :-1].values
  11. y = dataset.iloc[:, 3].values
  12.  
  13. # Taking care of missing data
  14. from sklearn.preprocessing import Imputer
  15. imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
  16. imputer = imputer.fit(X[:, 1:3])
  17. X[:, 1:3] = imputer.transform(X[:, 1:3])
  18.  
  19. # Encoding categorical data
  20. from sklearn.preprocessing import LabelEncoder, OneHotEncoder
  21. le = LabelEncoder()
  22. X[:, 0] = le.fit_transform(X[:, 0])
  23.  
  24. # Add One Hot Encoder(ohe)
  25. ohe = OneHotEncoder(categorical_features=[0])
  26. X = ohe.fit_transform(X).toarray()
  27.  
  28. # Encoding categorical data in y
  29. le = LabelEncoder()
  30. y = le.fit_transform(y)
Advertisement
Add Comment
Please, Sign In to add comment