Advertisement
Guest User

Untitled

a guest
Jul 10th, 2014
271
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Get the required package
  2. require('plyr')
  3.  
  4. # http://www.baseball-reference.com/data/war_daily_bat.txt
  5. war_bat = read.csv('war_daily_bat.txt',header=TRUE)
  6. # http://www.baseball-reference.com/data/war_daily_pitch.txt
  7. war_pit = read.csv('war_daily_pitch.txt',header=TRUE)
  8.  
  9. # Break out just the columns we want
  10. columns = c("name_common", "player_ID", "year_ID", "WAR")
  11. war = rbind(war_bat[,columns], war_pit[,columns])
  12.  
  13. # Need to turn these columns into the appropriate data types
  14. war$WAR <- as.numeric(as.character(war$WAR))
  15. war$name_common <- as.character(war$name_common)
  16. war$player_ID <- as.character(war$player_ID)
  17.  
  18. # Only get pre-2014 data
  19. mask1 <- war$year_ID < 2014
  20. war <- war[mask1,]
  21.  
  22. # To figure out which names to use
  23. maxWar <- ddply(war,c("year_ID"), function(df)max(df$WAR,na.rm=TRUE))
  24. colnames(maxWar) <- c('year_ID','WAR')
  25. str(maxWar)
  26. m <- join(war,maxWar,type="inner")
  27. arrange(m,year_ID)
  28.  
  29. # Need to split up Ted Williams
  30. masktw1 = (war$year_ID <= 1942 & war$player_ID == 'willite01')
  31. war$player_ID[masktw1] <- 'TW1'
  32.  
  33. masktw2 = (war$year_ID < 1952 & war$player_ID == 'willite01')
  34. war$player_ID[masktw2] <- 'TW2'
  35.  
  36. masktw3 = (war$year_ID > 1953 & war$player_ID == 'willite01')
  37. war$player_ID[masktw3] <- 'TW3'
  38.  
  39. # Lefty Grove's one bad year (due to injury) is pulling him down too much
  40. masklg = (war$year_ID == 1934 & war$player_ID == 'grovele01')
  41. war$WAR[masklg] = 7
  42.  
  43. # Same with Ted Williams
  44. masktw4 = (war$year_ID == 1950 & war$player_ID == 'TW2')
  45. war$WAR[masktw4] = 7
  46.  
  47.  
  48. # Get the names we want, and the associated colors for the plot
  49. names <- c('youngcy01','wagneho01','cobbty01','johnswa01','ruthba01','grovele01',
  50.            'gehrilo01','TW1','TW2','TW3','musiast01',
  51.            'mantlmi01','mayswi01','koufasa01','gibsobo01',
  52.            'schmimi01','morgajo02','clemero02',
  53.            'bondsba01','rodrial01','pujolal01','kershcl01')
  54. colors = c('red','gray','gray','red','red','red',
  55.            'gray','red','red','red','gray',
  56.            'gray','red','gray','red',
  57.            'red','gray','gray',
  58.            'red','gray','red','gray')
  59.  
  60. # Make the initial plot
  61. min_x = 1900; max_x = 2013
  62. xl <- seq(min_x, max_x, 0.1)
  63. yl = xl * 0
  64. plot(0,type='n',axes=TRUE,ann=FALSE,xlim=range(1900,2013),ylim=range(0,15))
  65. axis(side=1,at=seq(1900,2010,10),labels=seq(1900,2010,10))
  66.  
  67. for (i in seq(1,length(names)))
  68. {
  69.   name = names[i]
  70.   color = colors[i]
  71.   mask <- which(war$player_ID == name)
  72.   # We need to add batter and pitcher WAR
  73.   masked_war <- war[mask,]
  74.   myWar <- ddply(masked_war, .(year_ID), summarise, WAR=sum(WAR))
  75.   x <- myWar$year_ID
  76.   y <- myWar$WAR
  77.   lo <- loess(y~x,span=0.7)
  78.   xl <- seq(min(x),max(x), 0.1)
  79.   p <- lines(xl, predict(lo,xl), col=color, lwd=2)
  80.   #text(locator(), labels = name)
  81. }
  82.  
  83. # Add dashed lines for Ted Williams's war periods
  84. x = c(1942,1946)
  85. y = c(10.61,10.87)
  86. lines(x,y,pch=3,lty='dotted',col='red',lwd=2)
  87.  
  88. x = c(1951,1954)
  89. y = c(7.22,7.75)
  90. lines(x,y,pch=3,lty='dotted',col='red',lwd=2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement