SHOW:
|
|
- or go back to the newest paste.
1 | # Get the required package | |
2 | require('plyr') | |
3 | ||
4 | # http://www.baseball-reference.com/data/war_daily_bat.txt | |
5 | war_bat = read.csv('war_daily_bat.txt',header=TRUE) | |
6 | # http://www.baseball-reference.com/data/war_daily_pitch.txt | |
7 | war_pit = read.csv('war_daily_pitch.txt',header=TRUE) | |
8 | ||
9 | # Break out just the columns we want | |
10 | columns = c("name_common", "player_ID", "year_ID", "WAR") | |
11 | war = rbind(war_bat[,columns], war_pit[,columns]) | |
12 | ||
13 | # Need to turn these columns into the appropriate data types | |
14 | war$WAR <- as.numeric(as.character(war$WAR)) | |
15 | war$name_common <- as.character(war$name_common) | |
16 | war$player_ID <- as.character(war$player_ID) | |
17 | ||
18 | # Only get pre-2014 data | |
19 | mask1 <- war$year_ID < 2014 | |
20 | war <- war[mask1,] | |
21 | ||
22 | # To figure out which names to use | |
23 | maxWar <- ddply(war,c("year_ID"), function(df)max(df$WAR,na.rm=TRUE)) | |
24 | colnames(maxWar) <- c('year_ID','WAR') | |
25 | str(maxWar) | |
26 | m <- join(war,maxWar,type="inner") | |
27 | arrange(m,year_ID) | |
28 | ||
29 | - | masktw1 = (war$year_ID <= 1942 & war$name_common == 'Ted Williams') |
29 | + | |
30 | - | war$name_common[masktw1] <- 'TW1' |
30 | + | masktw1 = (war$year_ID <= 1942 & war$player_ID == 'willite01') |
31 | war$player_ID[masktw1] <- 'TW1' | |
32 | - | masktw2 = (war$year_ID < 1952 & war$name_common == 'Ted Williams') |
32 | + | |
33 | - | war$name_common[masktw2] <- 'TW2' |
33 | + | masktw2 = (war$year_ID < 1952 & war$player_ID == 'willite01') |
34 | war$player_ID[masktw2] <- 'TW2' | |
35 | - | masktw3 = (war$year_ID > 1953 & war$name_common == 'Ted Williams') |
35 | + | |
36 | - | war$name_common[masktw3] <- 'TW3' |
36 | + | masktw3 = (war$year_ID > 1953 & war$player_ID == 'willite01') |
37 | war$player_ID[masktw3] <- 'TW3' | |
38 | ||
39 | - | masklg = (war$year_ID == 1934 & war$name_common == 'Lefty Grove') |
39 | + | |
40 | masklg = (war$year_ID == 1934 & war$player_ID == 'grovele01') | |
41 | war$WAR[masklg] = 7 | |
42 | ||
43 | - | masktw4 = (war$year_ID == 1950 & war$name_common == 'TW2') |
43 | + | |
44 | masktw4 = (war$year_ID == 1950 & war$player_ID == 'TW2') | |
45 | war$WAR[masktw4] = 7 | |
46 | ||
47 | ||
48 | - | names <- c('Cy Young','Honus Wagner','Ty Cobb','Walter Johnson','Babe Ruth','Lefty Grove', |
48 | + | |
49 | - | 'Lou Gehrig','TW1','TW2','TW3','Stan Musial', |
49 | + | names <- c('youngcy01','wagneho01','cobbty01','johnswa01','ruthba01','grovele01', |
50 | - | 'Mickey Mantle','Willie Mays','Sandy Koufax','Bob Gibson', |
50 | + | 'gehrilo01','TW1','TW2','TW3','musiast01', |
51 | - | 'Mike Schmidt','Joe Morgan','Cal Ripken', |
51 | + | 'mantlmi01','mayswi01','koufasa01','gibsobo01', |
52 | - | 'Barry Bonds','Alex Rodriguez','Albert Pujols','Clayton Kershaw') |
52 | + | 'schmimi01','morgajo02','clemero02', |
53 | 'bondsba01','rodrial01','pujolal01','kershcl01') | |
54 | colors = c('red','gray','gray','red','red','red', | |
55 | 'gray','red','red','red','gray', | |
56 | 'gray','red','gray','red', | |
57 | 'red','gray','gray', | |
58 | 'red','gray','red','gray') | |
59 | ||
60 | # Make the initial plot | |
61 | min_x = 1900; max_x = 2013 | |
62 | xl <- seq(min_x, max_x, 0.1) | |
63 | yl = xl * 0 | |
64 | plot(0,type='n',axes=TRUE,ann=FALSE,xlim=range(1900,2013),ylim=range(0,15)) | |
65 | axis(side=1,at=seq(1900,2010,10),labels=seq(1900,2010,10)) | |
66 | ||
67 | for (i in seq(1,length(names))) | |
68 | { | |
69 | name = names[i] | |
70 | - | mask <- which(war$name_common == name) |
70 | + | |
71 | mask <- which(war$player_ID == name) | |
72 | # We need to add batter and pitcher WAR | |
73 | masked_war <- war[mask,] | |
74 | myWar <- ddply(masked_war, .(year_ID), summarise, WAR=sum(WAR)) | |
75 | x <- myWar$year_ID | |
76 | y <- myWar$WAR | |
77 | lo <- loess(y~x,span=0.7) | |
78 | xl <- seq(min(x),max(x), 0.1) | |
79 | p <- lines(xl, predict(lo,xl), col=color, lwd=2) | |
80 | - | #plot(x,y,type="l",xlim=range(1900,2013),ylim=range(0,15)) |
80 | + | |
81 | } | |
82 | ||
83 | # Add dashed lines for Ted Williams's war periods | |
84 | x = c(1942,1946) | |
85 | y = c(10.61,10.87) | |
86 | lines(x,y,pch=3,lty='dotted',col='red',lwd=2) | |
87 | ||
88 | x = c(1951,1954) | |
89 | y = c(7.22,7.75) | |
90 | lines(x,y,pch=3,lty='dotted',col='red',lwd=2) |