Guest User

Untitled

a guest
Oct 24th, 2017
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.47 KB | None | 0 0
  1. library(RMySQL)
  2. library(DBI)
  3. library(dplyr)
  4. library(class)
  5. library(datasets)
  6. library(ggplot2)
  7. library(stats)
  8. library(utils)
  9. library(lme4)
  10. library(corrplot)
  11. library(ggplot2)
  12. library(stringi)
  13. library(scales)
  14. library(stats)
  15.  
  16.  
  17.  
  18.  
  19.  
  20. # Connecting to database
  21.  
  22. mydb = dbConnect(MySQL(), user='root', password='Pass1234*', dbname='sportsanalytics', host='localhost')
  23.  
  24. # Visualize tables in the database
  25.  
  26. dbListTables(mydb)
  27.  
  28.  
  29. rav <- function(userId, dat, days) {
  30.  
  31.  
  32. trainingsession_ath %>% subset(date >= (dat-days) & date <= dat &
  33. aspnetusers_id == userId) %>% select(TL) %>%
  34. summarize(sum(TL))
  35.  
  36. }
  37.  
  38. ## Get Dailywellness data
  39.  
  40. dlywns = dbSendQuery(mydb, "select aspnetusers_id,
  41. date,sleepduration,
  42. sleepQuality, Nutrition, Stress,
  43. Fatigue, musclesoreness
  44. from dailywellness")
  45.  
  46. dailywellness = fetch(dlywns, n=-1)
  47. dailywellness$date <- as.Date(dailywellness$date, format = "%Y-%m-%d")
  48.  
  49. ## get player performance
  50.  
  51. pp = dbSendQuery(mydb, "select *
  52. from playerperformance")
  53.  
  54. playerperf = fetch(pp, n=-1)
  55. playerperf$Date <- as.Date(playerperf$Date, format = "%Y-%m-%d")
  56.  
  57.  
  58. ## Get Trainingsessionbyathlete
  59.  
  60. # TL is variable we create. TL or Training Load = (RPE * Duration)
  61.  
  62. tsa = dbSendQuery(mydb, "select aspnetusers_id, date, (RPE * Duration) as TL, RPE, Duration as Athlete_Duration
  63. from trainingsessionbyathlete
  64. group by aspnetusers_id, date
  65. order by date desc")
  66.  
  67. trainingsession_ath = fetch(tsa, n=-1)
  68.  
  69. # Code below adds a date variable
  70.  
  71. trainingsession_ath$date <- as.Date(trainingsession_ath$date, format = "%Y-%m-%d")
  72.  
  73.  
  74. # The below code is creating a comparison between Training Load (TL) for the last 7 days against the last 28 days.
  75. # This is important to see if an athlete is taking an increased load which in turn could lead to injury.
  76.  
  77.  
  78. trainingsession_ath$last7DayTL<- unlist(apply(trainingsession_ath[,c("aspnetusers_id","date")],
  79. 1,
  80. function(y) rav(y["aspnetusers_id"],
  81. as.Date(y["date"], format="%Y-%m-%d"),
  82. 7)))
  83.  
  84. trainingsession_ath$last28DayTL<- unlist(apply(trainingsession_ath[,c("aspnetusers_id","date")],
  85. 1,
  86. function(y) rav(y["aspnetusers_id"],
  87. as.Date(y["date"], format="%Y-%m-%d"),
  88. 28)))
  89.  
  90.  
  91. # acl variable will be addressing recent workload on the athlete
  92. # a high number indicates that workload in the last week is higher than the athlete's normal workload
  93.  
  94. trainingsession_ath$acl <- trainingsession_ath$last7DayTL/ trainingsession_ath$last28DayTL
  95.  
  96.  
  97. ## Get TrainingSession by Trainer
  98.  
  99. trn_inj <- dbSendQuery(mydb, "select * from trainingsessionbytrainer
  100. order by date desc;")
  101.  
  102. trainingsession_trn = fetch(trn_inj, n=-1)
  103. trainingsession_trn$Date <- as.Date(trainingsession_trn$Date, format = "%Y-%m-%d")
  104.  
  105.  
  106. ## Get active roster
  107.  
  108. roster <- dbSendQuery(mydb, "select firstname,lastname,userId from active_roster;")
  109.  
  110. active_roster = fetch(roster, n=-1)
  111.  
  112.  
  113. ###
  114.  
  115. # Below we are merging data frames! p1 we are merging dailywellness with trainingsession by trainer.
  116. # df2 is merging the existing p1 with trainingsession by athlete
  117. # df3 is merging the existing p2 with active_roster
  118.  
  119. df1 <- merge.data.frame( dailywellness, trainingsession_trn, by.x=c("aspnetusers_id","date") ,
  120. by.y = c("aspnetusers_Id","Date"))
  121.  
  122. df2 <- merge.data.frame(df1, trainingsession_ath,by.x=c("aspnetusers_id","date"),
  123. by.y=c("aspnetusers_id","date"))
  124.  
  125. df3 <- merge.data.frame(df2, active_roster, by.x = "aspnetusers_id",
  126. by.y = "userId")
  127.  
  128. # Showing first and last name
  129.  
  130. df3$name <- paste(df3$firstname,df3$lastname, sep = ",")
  131.  
  132. str(df3)
  133.  
  134.  
  135. # Turning names into factors
  136.  
  137. df3$name <- factor(df3$name)
  138.  
  139. df3$Injury <- stri_trans_totitle(df3$Injury) #^^^^^^^^^^^^^^^^^^^^^
  140. df3$Injury<- ifelse(df3$Injury =="Null", "", df3$Injury)
  141. df3$Injury<- ifelse(df3$Injury =="","No Injury", df3$Injury)
  142. df3$Injury<- ifelse(df3$Injury =="New","New Injury", df3$Injury)
  143. df3$Injury<- ifelse(df3$Injury =="Existing","Existing Injury", df3$Injury)
  144. df3$Injury<- ifelse(df3$Injury =="None","No Injury", df3$Injury)
  145. df3$Injury <- ifelse(df3$Injury=="No Injury", "No_Injury", df3$Injury)
  146. df3$Injury <- ifelse(df3$Injury=="New Injury", "New_Injury", df3$Injury)
  147. df3$Injury <- ifelse(df3$Injury=="Existing Injury", "Existing_Injury", df3$Injury)
  148.  
  149.  
  150. table(df3$Injury)
  151.  
  152.  
  153. # Create InjuryFactor variable that is binary - new injury = 1, everything else = 0
  154.  
  155. df3$InjuryFactor <- ifelse(df3$Injury=="New_Injury", 1 , 0)
  156.  
  157. df3$Injury_Type <- stri_trans_totitle(df3$Injury_Type)
  158. df3$Injury_Type <- ifelse(df3$Injury_Type=="Null","", df3$Injury_Type)
  159.  
  160.  
  161. table(df3$Injury)
  162.  
  163. # CLEAN UP df3
  164.  
  165. # Get rid of trainingsessionbytrainerid, firstname, lastname
  166.  
  167. df3 <- df3[, !(names(df3) %in% c("trainingsessionbytrainerid", "firstname", "lastname"))] #^^^^^^^^^^^^
  168.  
  169. # Get rid of sleep greater than or equal to 12
  170.  
  171. df3 <- subset(df3, df3$sleepduration <=12)
  172.  
  173. # Converting df3 dataframe to new dataframe with less unnecessary variables
  174.  
  175. ath.data <- df3[, c("date", "sleepduration", "sleepQuality","Nutrition","Stress","Fatigue","musclesoreness","TL"
  176. ,"Duration", "RPE", "Injury", "Injury_Type", "acl")]
  177.  
  178. # Converting df3 dataframe to new dataframe
  179.  
  180. ath.data.use <- df3[ !(names(df3) %in% c("aspnetusers_id","Vertical_Jump","USG","Body_Weight","date","name"))]
  181.  
  182. ath.data.unscale <- ath.data.use
  183.  
  184.  
  185. # Centering and scaling variables ; Duration, TL, Athlete_Duration for better analysis
  186.  
  187. ath.data.use$Duration <- scale(ath.data.use$Duration,
  188. center=median(ath.data.use$Duration),
  189. scale=mad(ath.data.use$Duration) )
  190.  
  191. ath.data.use$TL <- scale(ath.data.use$TL,
  192. center=median(ath.data.use$TL),
  193. scale=mad(ath.data.use$TL) )
  194.  
  195. ath.data.use$last7DayTL <- scale(ath.data.use$last7DayTL,
  196. center=median(ath.data.use$last7DayTL),
  197. scale=mad(ath.data.use$last7DayTL) )
  198.  
  199. ath.data.use$last28DayTL <- scale(ath.data.use$last28DayTL,
  200. center=median(ath.data.use$last28DayTL),
  201. scale=mad(ath.data.use$last28DayTL) )
  202.  
  203. ath.data.use$Athlete_Duration <- scale(ath.data.use$Athlete_Duration,
  204. center=median(ath.data.use$Athlete_Duration),
  205. scale=mad(ath.data.use$Athlete_Duration) )
  206.  
  207.  
  208. # Added back in date and name variables
  209.  
  210. ath.data.use <- cbind(ath.data.use, df3$date)
  211. ath.data.use <- cbind(ath.data.use, df3$name)
  212. ath.data.use$date <- ath.data.use$`df3$date`
  213. ath.data.use$name <- ath.data.use$`df3$name`
  214.  
  215. # Delete unused name and date variables
  216.  
  217. ath.data.use$`df3$date` <- NULL
  218. ath.data.use$`df3$name` <- NULL
  219.  
  220.  
  221. # Same thing as above
  222.  
  223. ath.data.unscale <- cbind(ath.data.unscale, df3$date)
  224. ath.data.unscale <- cbind(ath.data.unscale, df3$name)
  225. ath.data.unscale$date <- ath.data.unscale$`df3$date`
  226. ath.data.unscale$name <- ath.data.unscale$`df3$name`
  227. ath.data.unscale$`df3$date` <- NULL
  228. ath.data.unscale$`df3$name` <- NULL
  229.  
  230.  
  231. write.csv(ath.data.use, file = "c:\\Capstone Project Share\\R Scripts - MySQL Connection\\ 7 - Cleaning Data for Power BI Connection_John.csv")
Add Comment
Please, Sign In to add comment