Advertisement
Guest User

Untitled

a guest
Sep 21st, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.linear_model import LogisticRegression
  3. from sklearn.metrics import accuracy_score
  4. from sklearn.model_selection import train_test_split
  5. pd.options.mode.chained_assignment = None # default='warn'
  6.  
  7. def reading_data(path):
  8. return pd.read_csv(path)
  9.  
  10.  
  11. def prepare_data(df):
  12. df['isExpensiveFare'] = df['fare'] > 150
  13. df['isAdult'] = df['age'] > 18
  14. return df
  15.  
  16. if __name__ == '__main__':
  17. # Reading in Data
  18. path = 'data/titanic.csv'
  19. C = 100.0
  20. penalty = 'l2'
  21.  
  22. df = reading_data(path)
  23. df = prepare_data(df)
  24.  
  25. X = df[['isExpensiveFare', 'isAdult', 'parch']]
  26. Y = df['survived']
  27. train_x,test_x,train_y,test_y = train_test_split(X,Y,random_state=42,test_size=.5)
  28.  
  29. # Define Model
  30. model = LogisticRegression(C=C, penalty=penalty)
  31. print("Training on {} rows".format(len(train_x)))
  32. model.fit(train_x,train_y)
  33.  
  34. preds = model.predict(test_x)
  35. score = accuracy_score(test_y,preds)
  36. print("Accuracy : {}%".format(score))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement