Advertisement
Guest User

Untitled

a guest
Aug 4th, 2016
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.80 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn import linear_model
  4. import sys
  5.  
  6. #TODO: check for valid sys.argv (must be 2 files)
  7.  
  8. with open(sys.argv[1], 'r') as train_file:
  9.     train_df = pd.read_csv(train_file, sep='\t')
  10.  
  11. rows_count = len(train_df.index)
  12. features_cols = ['x' + str(i).zfill(3) for i in range(30)]
  13. X = np.array(train_df[features_cols]) #features
  14. Y = np.array(train_df['y'])
  15. clf = linear_model.SGDClassifier(loss='log', n_iter=(10**6)/rows_count)
  16. clf.fit(X, Y)
  17.  
  18. with open(sys.argv[2], 'r') as test_file:
  19.     test_df = pd.read_csv(test_file, sep='\t')
  20.    
  21. test_X = test_df[features_cols]
  22. test_Y = np.array(test_df['y'])
  23. predict_Y = clf.predict(test_X)
  24.  
  25. print('predicted:', predict_Y)
  26. print('real:', test_Y)
  27.  
  28. error = np.mean(predict_Y != test_Y)
  29. print('error:', error)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement