Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.85 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from scipy import stats
  4. from sklearn.cross_validation import KFold
  5. male100 = pd.read_csv("data/male100.csv",header=0)
  6.  
  7. # Function for giving best line given data set W
  8. def bestFit(W):
  9.     scaled = scaleData(W,0,0)
  10.     scaled.insert(0,'1s',1)
  11.     X = scaled.as_matrix(["1s","Year"])
  12.     t = W.as_matrix(["Time"])
  13.  
  14.     return (np.linalg.inv(X.transpose().dot(X))).dot(X.transpose()).dot(t)
  15.  
  16. kf = KFold(27, n_folds=4)
  17.  
  18. loss=0
  19. for train_index, test_index in kf:
  20.     def est(x):
  21.         line =  bestFit(male100.loc[(train_index),:])[0] + bestFit(male100.loc[(train_index),:])[1]*x
  22.         return line
  23.     loss += sum( (male100.loc[(test_index),:]["Time"] - est(male100.loc[(test_index),:]["Year"]))**2 )
  24.     # Above is a line estimate. We then need to do the loss function calcs.
  25.    
  26. print (float(1)/27)*loss
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement