Advertisement
MGhareeb

Titanic

Dec 16th, 2017
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.73 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import pandas as pd
  4. from keras import Sequential
  5. from keras.callbacks import EarlyStopping
  6. from keras.layers import Dense
  7.  
  8.  
  9. def to_one_hot(column, mappings):
  10.     column_transformed = pd.DataFrame()
  11.     for k in mappings:
  12.         column_transformed['{}_{}'.format(column.name, k)] \
  13.             = column.apply(mappings[k]).astype(int)
  14.     return column_transformed
  15.  
  16.  
  17. if __name__ == '__main__':
  18.     np.random.seed(2)
  19.     # Read input
  20.     data_train = pd.read_csv('input/train.csv')
  21.     data_test = pd.read_csv('input/test.csv')
  22.     data = pd.concat([data_train, data_test])
  23.     # Transform features
  24.     data_transformed = pd.concat([
  25.         to_one_hot(data['Pclass'], {
  26.             '1st': lambda v: v == 1,
  27.             '2nd': lambda v: v == 2,
  28.             '3rd': lambda v: v == 3,
  29.         }),
  30.         to_one_hot(data['Sex'], {
  31.             'female': lambda v: v == 'female',
  32.             'male': lambda v: v == 'male',
  33.         }),
  34.         to_one_hot(data['Age'], {
  35.             '0-15': lambda v: v <= 15,
  36.             '16-30': lambda v: 15 < v <= 30,
  37.             '31+': lambda v: 30 < v,
  38.             'N/A': lambda v: v != v,
  39.         }),
  40.         to_one_hot(
  41.             data[['SibSp', 'Parch']].sum(axis=1)
  42.                 .rename('Relatives'), {
  43.                 '0': lambda v: v == 0,
  44.                 '1-3': lambda v: 0 < v <= 3,
  45.                 '4+': lambda v: 3 < v,
  46.             }),
  47.         to_one_hot(
  48.             data['Name'].str
  49.                 .extract(', ([A-Za-z]+)\.').rename('Title'), {
  50.                 'Mrs': lambda v: v == 'Mrs',
  51.                 'Miss': lambda v: v == 'Miss',
  52.                 'Mr': lambda v: v == 'Mr',
  53.                 'Master': lambda v: v == 'Master',
  54.                 'Other': lambda v: v not in ['Mrs', 'Miss', 'Mr', 'Master'],
  55.             }),
  56.         to_one_hot(
  57.             data['Fare'], {
  58.                 '0-10': lambda v: v <= 10,
  59.                 '11-55': lambda v: 10 < v <= 55,
  60.                 '55+': lambda v: 55 < v,
  61.             }),
  62.         to_one_hot(
  63.             data['Cabin'], {
  64.                 '1+': lambda v: v == v,
  65.                 '0': lambda v: v != v,
  66.             }),
  67.         to_one_hot(
  68.             data['Survived'], {
  69.                 '0': lambda v: v == 0,
  70.                 '1': lambda v: v == 1,
  71.             }),
  72.     ], axis=1)
  73.     # Build model
  74.     training_items = 891
  75.     x = data_transformed \
  76.             .drop('Survived_0', axis=1) \
  77.             .drop('Survived_1', axis=1) \
  78.             .iloc[:training_items]
  79.     y = data_transformed[['Survived_0', 'Survived_1']] \
  80.             .iloc[:training_items]
  81.  
  82.     model = Sequential()
  83.     model.add(
  84.         Dense(
  85.             len(x.columns),
  86.             activation='relu',
  87.             input_shape=(len(x.columns),),
  88.         )
  89.     )
  90.     model.add(Dense(len(y.columns), activation='softmax'))
  91.     model.compile(
  92.         optimizer='adam',
  93.         loss='categorical_crossentropy',
  94.         metrics=['accuracy'],
  95.     )
  96.     # Train model
  97.     history = model.fit(
  98.         x, y,
  99.         verbose=True,
  100.         validation_split=0.3,
  101.         batch_size=32,
  102.         epochs=1000,
  103.         callbacks=[
  104.             EarlyStopping(patience=10)
  105.         ],
  106.     )
  107.     # Visualize training
  108.     pd.DataFrame(history.history).plot(kind='line')
  109.     plt.show()
  110.     # Make predictions
  111.     y_hat = model.predict(
  112.         data_transformed
  113.             .drop('Survived_0', axis=1)
  114.             .drop('Survived_1', axis=1)
  115.             .iloc[training_items:]
  116.     )
  117.     answer = pd.DataFrame()
  118.     answer['PassengerId'] = data_test['PassengerId']
  119.     answer['Survived'] = pd.Series([0 if r[0] > r[1] else 1 for r in y_hat])
  120.     answer.to_csv('output/answer.csv', index=False)
  121.  
  122.     # Play around
  123.     movie_chrs = pd.DataFrame(columns=[x.columns.values])
  124.     jack = [
  125.         0, 0, 1,  # Pcclass: 3rd
  126.         0, 1,  # Sex: male
  127.         0, 1, 0, 0,  # Age: 20
  128.         1, 0, 0,  # Relatives: 0
  129.         0, 0, 0.5, 0, 0.5,  # Title: Dunno
  130.         1, 0, 0,  # Fare: 0; won in gambling
  131.         0, 1,  # Cabin: No
  132.     ]
  133.     movie_chrs.loc[0] = jack
  134.     rose = [
  135.         1, 0, 0,  # Pclass: 1st class
  136.         1, 0,  # Sex: female
  137.         0, 1, 0, 0,  # Age: 17 yo
  138.         0, 1, 0,  # Relatives: mother and fiancee
  139.         1, 0, 0, 0, 0,  # Title: Mrs
  140.         0, 0.5, 0.5,  # Fare: Dunno
  141.         0.2, 0.8,  # Cabin: Dunno
  142.     ]
  143.     movie_chrs.loc[1] = rose
  144.     movie_chrs_fate = pd.DataFrame()
  145.     movie_chrs_fate['Name'] \
  146.         = ['Jack Dawson', 'Rose DeWitt Bukater']
  147.     print(movie_chrs_fate)
  148.     print(model.predict(movie_chrs))
  149.  
  150.     movie_chrs_fate['Survived'] = pd.Series(
  151.         [0 if r[0] > r[1] else 1
  152.          for r in model.predict(movie_chrs)]
  153.     )
  154.     print(movie_chrs_fate)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement