Untitled

# load the titanic data and then perform one-hot encoding on the feature names

import numpy as np
import pandas as pd

# Load the dataset
X = pd.read_csv('titanic_data.csv')

# Limit to categorical data
X = X.select_dtypes(include=[object])

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# Create a LabelEncoder object, which will turn all labels present in each feature to numbers.
le = LabelEncoder()

# For each feature in X, apply the LabelEncoder's fit_transform function,
# which will first learn the labels for the feature (fit)
# and then change the labels to numbers (transform).

for feature in X:
    X[feature] = le.fit_transform(X[feature])

# Create a OneHotEncoder object, which will create a feature for each label present in the data.
ohe = OneHotEncoder()

# Apply the OneHotEncoder's fit_transform function to all of X, which will first learn
# of all the (now numerical) labels in the data (fit), and then change the data to one-hot encoded entries (transform).
xt = ohe.fit_transform(X)
onehotlabels = xt