Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn import linear_model
- import sys
- #TODO: check for valid sys.argv (must be 2 files)
- with open(sys.argv[1], 'r') as train_file:
- train_df = pd.read_csv(train_file, sep='\t')
- rows_count = len(train_df.index)
- features_cols = ['x' + str(i).zfill(3) for i in range(30)]
- X = np.array(train_df[features_cols]) #features
- Y = np.array(train_df['y'])
- clf = linear_model.SGDClassifier(loss='log', n_iter=(10**6)/rows_count)
- clf.fit(X, Y)
- with open(sys.argv[2], 'r') as test_file:
- test_df = pd.read_csv(test_file, sep='\t')
- test_X = test_df[features_cols]
- test_Y = np.array(test_df['y'])
- predict_Y = clf.predict(test_X)
- print('predicted:', predict_Y)
- print('real:', test_Y)
- error = np.mean(predict_Y != test_Y)
- print('error:', error)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement