Guest User

Untitled

a guest
May 27th, 2018
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.52 KB | None | 0 0
  1. library(baseballr)
  2. library(dplyr)
  3.  
  4. obtain_data = function(type="postgres",
  5. start_date="2017-03-29",
  6. end_date="2017-10-03", infile=NULL) {
  7.  
  8. if (type=="postgres") {
  9. # postgres db connection here
  10. # e.g.
  11. # library(RPostgres)
  12. # library(DBI)
  13. # conn <- dbConnect(RPostgres::Postgres(),
  14. # password=SOMEPASS, user=SOME_USER, port=SOME_PORT_PROBABLY_5432, dbname=SOME_NAME)
  15. # df1 = dbGetQuery(conn, "select * from SOME_TABLE_NAME where SOME_SELECTION_CRITERIA")
  16. } else if (type=="rds") {
  17. readRDS(infile)
  18. } else if (type=="csv") {
  19. read.csv(infile, stringsAsFactors = FALSE)
  20. } else if (type=="scrape") {
  21. date_seq = seq(as.Date(start_date), as.Date(end_date), by=1)
  22. dplyr::bind_rows(lapply(date_seq), function(d) {
  23. baseballr::scrape_statcast_savant(as.character(d), as.character(d))
  24. })
  25. }
  26.  
  27. }
  28.  
  29. statcast_impute_derive = function(statcast_df, inverse_precision=10000) {
  30. # statcast_df must have columns launch_angle, launch_speed, bb_type, events
  31.  
  32. aa = statcast_df %>%
  33. filter(!is.na(launch_speed)) %>%
  34. mutate(ila=round(launch_angle*inverse_precision),
  35. ils=round(launch_speed*inverse_precision))
  36.  
  37. la_ls_count = aa %>% group_by(ila, ils, bb_type, events) %>% summarise(n=n())
  38.  
  39. # use n >= 5 here? some other number? 99.X percentile? this is why it's a heuristic
  40. la_ls_filtered = la_ls_count %>% select(ila, ils, bb_type, events) %>% filter(n>=5)
  41.  
  42. la_ls_filtered %>% write.csv("CSV_FILE_TO_LOAD_LATER.csv", row.names = FALSE)
  43.  
  44. }
Add Comment
Please, Sign In to add comment