Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.linear_model import LogisticRegression
- from sklearn.metrics import accuracy_score
- from sklearn.model_selection import train_test_split
- pd.options.mode.chained_assignment = None # default='warn'
- def reading_data(path):
- return pd.read_csv(path)
- def prepare_data(df):
- df['isExpensiveFare'] = df['fare'] > 150
- df['isAdult'] = df['age'] > 18
- return df
- if __name__ == '__main__':
- # Reading in Data
- path = 'data/titanic.csv'
- C = 100.0
- penalty = 'l2'
- df = reading_data(path)
- df = prepare_data(df)
- X = df[['isExpensiveFare', 'isAdult', 'parch']]
- Y = df['survived']
- train_x,test_x,train_y,test_y = train_test_split(X,Y,random_state=42,test_size=.5)
- # Define Model
- model = LogisticRegression(C=C, penalty=penalty)
- print("Training on {} rows".format(len(train_x)))
- model.fit(train_x,train_y)
- preds = model.predict(test_x)
- score = accuracy_score(test_y,preds)
- print("Accuracy : {}%".format(score))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement