Guest User

Untitled

a guest
Apr 19th, 2018
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.71 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.model_selection import StratifiedKFold
  3. from sklearn.svm import SVC
  4.  
  5. # We'll use 10-fold cross-validation
  6. n_folds = 10
  7.  
  8. # Create some data with 500 samples and 100 features
  9. N = 500
  10. K = 100
  11. X = np.random.normal(0, 1, size=(N, K))
  12.  
  13. # Create a binary target variable with 500 samples
  14. y = np.repeat([0, 1], repeats=(N / 2))
  15.  
  16. # And a (continuous) confound, again with 500 samples
  17. c = np.random.normal(0, 1, size=(N, 1))
  18.  
  19. # We'll add an intercept to our confound variable
  20. c = np.c_[np.ones(c.shape), c]
  21.  
  22. # Let's define our cross-validation scheme
  23. skf = StratifiedKFold(n_splits=n_folds)
  24.  
  25. # And our model
  26. clf = SVC(kernel='linear')
  27.  
  28. for train_idx, test_idx in skf.split(X, y):
  29. X_train = X[train_idx, :]
  30. X_test = X[test_idx, :]
  31. y_train = y[train_idx]
  32. y_test = y[test_idx]
  33. c_train = c[train_idx, :]
  34. c_test = c[test_idx, :]
  35.  
  36. print("Shape of X_train: %s" % (X_train.shape,))
  37. print("Shape of X_test: %s" % (X_test.shape,))
  38.  
  39. # Now, let's estimate the "confound model" on the train-set only
  40. # We'll add an intercept as well
  41. c_weights_train = np.linalg.lstsq(c_train, X_train, rcond=None)[0]
  42. print("Shape of weights: %s" % (c_weights_train.shape,))
  43.  
  44. # Now, we can regress out c_train from X_train
  45. X_train_corr = X_train - c_train.dot(c_weights_train)
  46. print("Shape of X_train (corrected): %s" % (X_train_corr.shape,))
  47.  
  48. # Now, let's *cross-validate* our confound regression procedure,
  49. # i.e., use c_weights_train for correcting X_test!
  50. X_test_corr = X_test - c_test.dot(c_weights_train)
  51. print("Shape of X_test (corrected): %s" % (X_test_corr.shape,))
  52.  
  53. # Fit model on corrected data
  54. clf.fit(X_train_corr, y_train)
  55. pred = clf.predict(X_test_corr)
Add Comment
Please, Sign In to add comment