Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.06 KB | None | 0 0
  1. # load the titanic data and then perform one-hot encoding on the feature names
  2.  
  3. import numpy as np
  4. import pandas as pd
  5.  
  6. # Load the dataset
  7. X = pd.read_csv('titanic_data.csv')
  8.  
  9. # Limit to categorical data
  10. X = X.select_dtypes(include=[object])
  11.  
  12. from sklearn.preprocessing import LabelEncoder
  13. from sklearn.preprocessing import OneHotEncoder
  14.  
  15. # Create a LabelEncoder object, which will turn all labels present in each feature to numbers.
  16. le = LabelEncoder()
  17.  
  18. # For each feature in X, apply the LabelEncoder's fit_transform function,
  19. # which will first learn the labels for the feature (fit)
  20. # and then change the labels to numbers (transform).
  21.  
  22. for feature in X:
  23. X[feature] = le.fit_transform(X[feature])
  24.  
  25. # Create a OneHotEncoder object, which will create a feature for each label present in the data.
  26. ohe = OneHotEncoder()
  27.  
  28. # Apply the OneHotEncoder's fit_transform function to all of X, which will first learn
  29. # of all the (now numerical) labels in the data (fit), and then change the data to one-hot encoded entries (transform).
  30. xt = ohe.fit_transform(X)
  31. onehotlabels = xt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement