Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # load the titanic data and then perform one-hot encoding on the feature names
- import numpy as np
- import pandas as pd
- # Load the dataset
- X = pd.read_csv('titanic_data.csv')
- # Limit to categorical data
- X = X.select_dtypes(include=[object])
- from sklearn.preprocessing import LabelEncoder
- from sklearn.preprocessing import OneHotEncoder
- # Create a LabelEncoder object, which will turn all labels present in each feature to numbers.
- le = LabelEncoder()
- # For each feature in X, apply the LabelEncoder's fit_transform function,
- # which will first learn the labels for the feature (fit)
- # and then change the labels to numbers (transform).
- for feature in X:
- X[feature] = le.fit_transform(X[feature])
- # Create a OneHotEncoder object, which will create a feature for each label present in the data.
- ohe = OneHotEncoder()
- # Apply the OneHotEncoder's fit_transform function to all of X, which will first learn
- # of all the (now numerical) labels in the data (fit), and then change the data to one-hot encoded entries (transform).
- xt = ohe.fit_transform(X)
- onehotlabels = xt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement