Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- from keras import Sequential
- from keras.callbacks import EarlyStopping
- from keras.layers import Dense
- def to_one_hot(column, mappings):
- column_transformed = pd.DataFrame()
- for k in mappings:
- column_transformed['{}_{}'.format(column.name, k)] \
- = column.apply(mappings[k]).astype(int)
- return column_transformed
- if __name__ == '__main__':
- np.random.seed(2)
- # Read input
- data_train = pd.read_csv('input/train.csv')
- data_test = pd.read_csv('input/test.csv')
- data = pd.concat([data_train, data_test])
- # Transform features
- data_transformed = pd.concat([
- to_one_hot(data['Pclass'], {
- '1st': lambda v: v == 1,
- '2nd': lambda v: v == 2,
- '3rd': lambda v: v == 3,
- }),
- to_one_hot(data['Sex'], {
- 'female': lambda v: v == 'female',
- 'male': lambda v: v == 'male',
- }),
- to_one_hot(data['Age'], {
- '0-15': lambda v: v <= 15,
- '16-30': lambda v: 15 < v <= 30,
- '31+': lambda v: 30 < v,
- 'N/A': lambda v: v != v,
- }),
- to_one_hot(
- data[['SibSp', 'Parch']].sum(axis=1)
- .rename('Relatives'), {
- '0': lambda v: v == 0,
- '1-3': lambda v: 0 < v <= 3,
- '4+': lambda v: 3 < v,
- }),
- to_one_hot(
- data['Name'].str
- .extract(', ([A-Za-z]+)\.').rename('Title'), {
- 'Mrs': lambda v: v == 'Mrs',
- 'Miss': lambda v: v == 'Miss',
- 'Mr': lambda v: v == 'Mr',
- 'Master': lambda v: v == 'Master',
- 'Other': lambda v: v not in ['Mrs', 'Miss', 'Mr', 'Master'],
- }),
- to_one_hot(
- data['Fare'], {
- '0-10': lambda v: v <= 10,
- '11-55': lambda v: 10 < v <= 55,
- '55+': lambda v: 55 < v,
- }),
- to_one_hot(
- data['Cabin'], {
- '1+': lambda v: v == v,
- '0': lambda v: v != v,
- }),
- to_one_hot(
- data['Survived'], {
- '0': lambda v: v == 0,
- '1': lambda v: v == 1,
- }),
- ], axis=1)
- # Build model
- training_items = 891
- x = data_transformed \
- .drop('Survived_0', axis=1) \
- .drop('Survived_1', axis=1) \
- .iloc[:training_items]
- y = data_transformed[['Survived_0', 'Survived_1']] \
- .iloc[:training_items]
- model = Sequential()
- model.add(
- Dense(
- len(x.columns),
- activation='relu',
- input_shape=(len(x.columns),),
- )
- )
- model.add(Dense(len(y.columns), activation='softmax'))
- model.compile(
- optimizer='adam',
- loss='categorical_crossentropy',
- metrics=['accuracy'],
- )
- # Train model
- history = model.fit(
- x, y,
- verbose=True,
- validation_split=0.3,
- batch_size=32,
- epochs=1000,
- callbacks=[
- EarlyStopping(patience=10)
- ],
- )
- # Visualize training
- pd.DataFrame(history.history).plot(kind='line')
- plt.show()
- # Make predictions
- y_hat = model.predict(
- data_transformed
- .drop('Survived_0', axis=1)
- .drop('Survived_1', axis=1)
- .iloc[training_items:]
- )
- answer = pd.DataFrame()
- answer['PassengerId'] = data_test['PassengerId']
- answer['Survived'] = pd.Series([0 if r[0] > r[1] else 1 for r in y_hat])
- answer.to_csv('output/answer.csv', index=False)
- # Play around
- movie_chrs = pd.DataFrame(columns=[x.columns.values])
- jack = [
- 0, 0, 1, # Pcclass: 3rd
- 0, 1, # Sex: male
- 0, 1, 0, 0, # Age: 20
- 1, 0, 0, # Relatives: 0
- 0, 0, 0.5, 0, 0.5, # Title: Dunno
- 1, 0, 0, # Fare: 0; won in gambling
- 0, 1, # Cabin: No
- ]
- movie_chrs.loc[0] = jack
- rose = [
- 1, 0, 0, # Pclass: 1st class
- 1, 0, # Sex: female
- 0, 1, 0, 0, # Age: 17 yo
- 0, 1, 0, # Relatives: mother and fiancee
- 1, 0, 0, 0, 0, # Title: Mrs
- 0, 0.5, 0.5, # Fare: Dunno
- 0.2, 0.8, # Cabin: Dunno
- ]
- movie_chrs.loc[1] = rose
- movie_chrs_fate = pd.DataFrame()
- movie_chrs_fate['Name'] \
- = ['Jack Dawson', 'Rose DeWitt Bukater']
- print(movie_chrs_fate)
- print(model.predict(movie_chrs))
- movie_chrs_fate['Survived'] = pd.Series(
- [0 if r[0] > r[1] else 1
- for r in model.predict(movie_chrs)]
- )
- print(movie_chrs_fate)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement