Advertisement
Guest User

Untitled

a guest
Jul 15th, 2014
418
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.34 KB | None | 0 0
  1. library(RMySQL)
  2. library(glmnet)
  3. library(nnet)
  4. drv=dbDriver("MySQL")
  5. con <- dbConnect(drv, user="root", password="root",
  6.                  dbname="PitchFX",
  7.                  unix.socket="/Applications/MAMP/tmp/mysql/mysql.sock")
  8.  
  9. data2003<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2003 a") #pull query with year of interest
  10. CS<-data2003$CS # create vector with predictor variable
  11. data2003<-with(data2003, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin)) # create design matrix of independent variables
  12. data2003<-as.matrix(data2003) # convert in data matrix
  13. lambda2003=cv.glmnet(data2003, CS) # cross validation to find lambda value that min(error)
  14. lambda.min.2003 <- lambda2003$lambda.min # store lambda value that yield min(error)
  15. ridge2003=glmnet(data2003, CS, family=c("gaussian"), alpha=0, lambda=lambda.min) #run ridge regression
  16. ridge2003<-data.matrix(coef(ridge2003)) # pull coef for each variable
  17. write.csv(ridge2003, "~/Desktop/ridgeCS2003.csv") # write table on Desktop of results
  18.  
  19. # loop through each year, or chose to query multiple at a time
  20. data2004<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2004 a")
  21. CS<-data2004$CS
  22. data2004<-with(data2004, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  23. data2004<-as.matrix(data2004)
  24. lambda2004=cv.glmnet(data2004, CS)
  25. lambda.min.2004 <- lambda2004$lambda.min
  26. ridge2004=glmnet(data2004, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  27. ridge2004<-data.matrix(coef(ridge2004))
  28. write.csv(ridge2004, "~/Desktop/ridgeCS2004.csv")
  29.  
  30. data2005<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2005 a")
  31. CS<-data2005$CS
  32. data2005<-with(data2005, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  33. data2005<-as.matrix(data2005)
  34. lambda2005=cv.glmnet(data2005, CS)
  35. lambda.min.2005 <- lambda2005$lambda.min
  36. ridge2005=glmnet(data2005, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  37. ridge2005<-data.matrix(coef(ridge2005))
  38. write.csv(ridge2005, "~/Desktop/ridgeCS2005.csv")
  39.  
  40. data2006<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2006 a")
  41. CS<-data2006$CS
  42. data2006<-with(data2006, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  43. data2006<-as.matrix(data2006)
  44. lambda2006=cv.glmnet(data2006, CS)
  45. lambda.min.2006 <- lambda2006$lambda.min
  46. ridge2006=glmnet(data2006, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  47. ridge2006<-data.matrix(coef(ridge2006))
  48. write.csv(ridge2006, "~/Desktop/ridgeCS2006.csv")
  49.  
  50. data2007<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2007 a")
  51. CS<-data2007$CS
  52. data2007<-with(data2007, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  53. data2007<-as.matrix(data2007)
  54. lambda2007=cv.glmnet(data2007, CS)
  55. lambda.min.2007 <- lambda2007$lambda.min
  56. ridge2007=glmnet(data2007, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  57. ridge2007<-data.matrix(coef(ridge2007))
  58. write.csv(ridge2007, "~/Desktop/ridgeCS2007.csv")
  59.  
  60. data2008<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2008 a")
  61. CS<-data2008$CS
  62. data2008<-with(data2008, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  63. data2008<-as.matrix(data2008)
  64. lambda2008=cv.glmnet(data2008, CS)
  65. lambda.min.2008 <- lambda2008$lambda.min
  66. ridge2008=glmnet(data2008, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  67. ridge2008<-data.matrix(coef(ridge2008))
  68. write.csv(ridge2008, "~/Desktop/ridgeCS2008.csv")
  69.  
  70. data2009<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2009 a")
  71. CS<-data2009$CS
  72. data2009<-with(data2009, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  73. data2009<-as.matrix(data2009)
  74. lambda2009=cv.glmnet(data2009, CS)
  75. lambda.min.2009 <- lambda2009$lambda.min
  76. ridge2009=glmnet(data2009, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  77. ridge2009<-data.matrix(coef(ridge2009))
  78. write.csv(ridge2009, "~/Desktop/ridgeCS2009.csv")
  79.  
  80. data2010<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2010 a")
  81. CS<-data2010$CS
  82. data2010<-with(data2010, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  83. data2010<-as.matrix(data2010)
  84. lambda2010=cv.glmnet(data2010, CS)
  85. lambda.min.2010 <- lambda2010$lambda.min
  86. ridge2010=glmnet(data2010, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  87. ridge2010<-data.matrix(coef(ridge2010))
  88. write.csv(ridge2010, "~/Desktop/ridgeCS2010.csv")
  89.  
  90. data2011<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2011 a")
  91. CS<-data2011$CS
  92. data2011<-with(data2011, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  93. data2011<-as.matrix(data2011)
  94. lambda2011=cv.glmnet(data2011, CS)
  95. lambda.min.2011 <- lambda2011$lambda.min
  96. ridge2011=glmnet(data2011, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  97. ridge2011<-data.matrix(coef(ridge2011))
  98. write.csv(ridge2011, "~/Desktop/ridgeCS2011.csv")
  99.  
  100. data2012<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2012 a")
  101. CS<-data2012$CS
  102. data2012<-with(data2012, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  103. data2012<-as.matrix(data2012)
  104. lambda2012=cv.glmnet(data2012, CS)
  105. lambda.min.2012 <- lambda2012$lambda.min
  106. ridge2012=glmnet(data2012, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  107. ridge2012<-data.matrix(coef(ridge2012))
  108. write.csv(ridge2012, "~/Desktop/ridge/ridgeCS2012.csv")
  109.  
  110. data2013<-dbGetQuery(con, "SELECT a.* FROM RidgebaseSBA2013 a")
  111. CS<-data2013$CS
  112. data2013<-with(data2013, data.frame(class.ind(pitcher), class.ind(catcher), class.ind(baserunner), INN_CT, P_hand_R, P_hand_L, BS1__, BS_2_,BS1_3,PK, scoremargin))
  113. data2013<-as.matrix(data2013)
  114. lambda2013=cv.glmnet(data2013, CS)
  115. lambda.min.2013 <- lambda2013$lambda.min
  116. ridge2013=glmnet(data2013, CS, family=c("gaussian"), alpha=0, lambda=lambda.min)
  117. ridge2013<-data.matrix(coef(ridge2013))
  118. write.csv(ridge2013, "~/Desktop/ridge/ridgeCS2013.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement