Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Get the required package
- require('plyr')
- # http://www.baseball-reference.com/data/war_daily_bat.txt
- war_bat = read.csv('war_daily_bat.txt',header=TRUE)
- # http://www.baseball-reference.com/data/war_daily_pitch.txt
- war_pit = read.csv('war_daily_pitch.txt',header=TRUE)
- # Break out just the columns we want
- columns = c("name_common", "player_ID", "year_ID", "WAR")
- war = rbind(war_bat[,columns], war_pit[,columns])
- # Need to turn these columns into the appropriate data types
- war$WAR <- as.numeric(as.character(war$WAR))
- war$name_common <- as.character(war$name_common)
- war$player_ID <- as.character(war$player_ID)
- # Only get pre-2014 data
- mask1 <- war$year_ID < 2014
- war <- war[mask1,]
- # To figure out which names to use
- maxWar <- ddply(war,c("year_ID"), function(df)max(df$WAR,na.rm=TRUE))
- colnames(maxWar) <- c('year_ID','WAR')
- str(maxWar)
- m <- join(war,maxWar,type="inner")
- arrange(m,year_ID)
- # Need to split up Ted Williams
- masktw1 = (war$year_ID <= 1942 & war$player_ID == 'willite01')
- war$player_ID[masktw1] <- 'TW1'
- masktw2 = (war$year_ID < 1952 & war$player_ID == 'willite01')
- war$player_ID[masktw2] <- 'TW2'
- masktw3 = (war$year_ID > 1953 & war$player_ID == 'willite01')
- war$player_ID[masktw3] <- 'TW3'
- # Lefty Grove's one bad year (due to injury) is pulling him down too much
- masklg = (war$year_ID == 1934 & war$player_ID == 'grovele01')
- war$WAR[masklg] = 7
- # Same with Ted Williams
- masktw4 = (war$year_ID == 1950 & war$player_ID == 'TW2')
- war$WAR[masktw4] = 7
- # Get the names we want, and the associated colors for the plot
- names <- c('youngcy01','wagneho01','cobbty01','johnswa01','ruthba01','grovele01',
- 'gehrilo01','TW1','TW2','TW3','musiast01',
- 'mantlmi01','mayswi01','koufasa01','gibsobo01',
- 'schmimi01','morgajo02','clemero02',
- 'bondsba01','rodrial01','pujolal01','kershcl01')
- colors = c('red','gray','gray','red','red','red',
- 'gray','red','red','red','gray',
- 'gray','red','gray','red',
- 'red','gray','gray',
- 'red','gray','red','gray')
- # Make the initial plot
- min_x = 1900; max_x = 2013
- xl <- seq(min_x, max_x, 0.1)
- yl = xl * 0
- plot(0,type='n',axes=TRUE,ann=FALSE,xlim=range(1900,2013),ylim=range(0,15))
- axis(side=1,at=seq(1900,2010,10),labels=seq(1900,2010,10))
- for (i in seq(1,length(names)))
- {
- name = names[i]
- color = colors[i]
- mask <- which(war$player_ID == name)
- # We need to add batter and pitcher WAR
- masked_war <- war[mask,]
- myWar <- ddply(masked_war, .(year_ID), summarise, WAR=sum(WAR))
- x <- myWar$year_ID
- y <- myWar$WAR
- lo <- loess(y~x,span=0.7)
- xl <- seq(min(x),max(x), 0.1)
- p <- lines(xl, predict(lo,xl), col=color, lwd=2)
- #text(locator(), labels = name)
- }
- # Add dashed lines for Ted Williams's war periods
- x = c(1942,1946)
- y = c(10.61,10.87)
- lines(x,y,pch=3,lty='dotted',col='red',lwd=2)
- x = c(1951,1954)
- y = c(7.22,7.75)
- lines(x,y,pch=3,lty='dotted',col='red',lwd=2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement