Titanic

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Dense


def to_one_hot(column, mappings):
    column_transformed = pd.DataFrame()
    for k in mappings:
        column_transformed['{}_{}'.format(column.name, k)] \
            = column.apply(mappings[k]).astype(int)
    return column_transformed


if __name__ == '__main__':
    np.random.seed(2)
    # Read input
    data_train = pd.read_csv('input/train.csv')
    data_test = pd.read_csv('input/test.csv')
    data = pd.concat([data_train, data_test])
    # Transform features
    data_transformed = pd.concat([
        to_one_hot(data['Pclass'], {
            '1st': lambda v: v == 1,
            '2nd': lambda v: v == 2,
            '3rd': lambda v: v == 3,
        }),
        to_one_hot(data['Sex'], {
            'female': lambda v: v == 'female',
            'male': lambda v: v == 'male',
        }),
        to_one_hot(data['Age'], {
            '0-15': lambda v: v <= 15,
            '16-30': lambda v: 15 < v <= 30,
            '31+': lambda v: 30 < v,
            'N/A': lambda v: v != v,
        }),
        to_one_hot(
            data[['SibSp', 'Parch']].sum(axis=1)
                .rename('Relatives'), {
                '0': lambda v: v == 0,
                '1-3': lambda v: 0 < v <= 3,
                '4+': lambda v: 3 < v,
            }),
        to_one_hot(
            data['Name'].str
                .extract(', ([A-Za-z]+)\.').rename('Title'), {
                'Mrs': lambda v: v == 'Mrs',
                'Miss': lambda v: v == 'Miss',
                'Mr': lambda v: v == 'Mr',
                'Master': lambda v: v == 'Master',
                'Other': lambda v: v not in ['Mrs', 'Miss', 'Mr', 'Master'],
            }),
        to_one_hot(
            data['Fare'], {
                '0-10': lambda v: v <= 10,
                '11-55': lambda v: 10 < v <= 55,
                '55+': lambda v: 55 < v,
            }),
        to_one_hot(
            data['Cabin'], {
                '1+': lambda v: v == v,
                '0': lambda v: v != v,
            }),
        to_one_hot(
            data['Survived'], {
                '0': lambda v: v == 0,
                '1': lambda v: v == 1,
            }),
    ], axis=1)
    # Build model
    training_items = 891
    x = data_transformed \
            .drop('Survived_0', axis=1) \
            .drop('Survived_1', axis=1) \
            .iloc[:training_items]
    y = data_transformed[['Survived_0', 'Survived_1']] \
            .iloc[:training_items]

    model = Sequential()
    model.add(
        Dense(
            len(x.columns),
            activation='relu',
            input_shape=(len(x.columns),),
        )
    )
    model.add(Dense(len(y.columns), activation='softmax'))
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
    )
    # Train model
    history = model.fit(
        x, y,
        verbose=True,
        validation_split=0.3,
        batch_size=32,
        epochs=1000,
        callbacks=[
            EarlyStopping(patience=10)
        ],
    )
    # Visualize training
    pd.DataFrame(history.history).plot(kind='line')
    plt.show()
    # Make predictions
    y_hat = model.predict(
        data_transformed
            .drop('Survived_0', axis=1)
            .drop('Survived_1', axis=1)
            .iloc[training_items:]
    )
    answer = pd.DataFrame()
    answer['PassengerId'] = data_test['PassengerId']
    answer['Survived'] = pd.Series([0 if r[0] > r[1] else 1 for r in y_hat])
    answer.to_csv('output/answer.csv', index=False)

    # Play around
    movie_chrs = pd.DataFrame(columns=[x.columns.values])
    jack = [
        0, 0, 1,  # Pcclass: 3rd
        0, 1,  # Sex: male
        0, 1, 0, 0,  # Age: 20
        1, 0, 0,  # Relatives: 0
        0, 0, 0.5, 0, 0.5,  # Title: Dunno
        1, 0, 0,  # Fare: 0; won in gambling
        0, 1,  # Cabin: No
    ]
    movie_chrs.loc[0] = jack
    rose = [
        1, 0, 0,  # Pclass: 1st class
        1, 0,  # Sex: female
        0, 1, 0, 0,  # Age: 17 yo
        0, 1, 0,  # Relatives: mother and fiancee
        1, 0, 0, 0, 0,  # Title: Mrs
        0, 0.5, 0.5,  # Fare: Dunno
        0.2, 0.8,  # Cabin: Dunno
    ]
    movie_chrs.loc[1] = rose
    movie_chrs_fate = pd.DataFrame()
    movie_chrs_fate['Name'] \
        = ['Jack Dawson', 'Rose DeWitt Bukater']
    print(movie_chrs_fate)
    print(model.predict(movie_chrs))

    movie_chrs_fate['Survived'] = pd.Series(
        [0 if r[0] > r[1] else 1
         for r in model.predict(movie_chrs)]
    )
    print(movie_chrs_fate)