View difference between Paste ID: fsDng9Uz and Xhjiibpk
SHOW: | | - or go back to the newest paste.
1
# Get the required package
2
require('plyr')
3
4
# http://www.baseball-reference.com/data/war_daily_bat.txt
5
war_bat = read.csv('war_daily_bat.txt',header=TRUE)
6
# http://www.baseball-reference.com/data/war_daily_pitch.txt
7
war_pit = read.csv('war_daily_pitch.txt',header=TRUE)
8
9
# Break out just the columns we want
10
columns = c("name_common", "player_ID", "year_ID", "WAR")
11
war = rbind(war_bat[,columns], war_pit[,columns])
12
13
# Need to turn these columns into the appropriate data types
14
war$WAR <- as.numeric(as.character(war$WAR))
15
war$name_common <- as.character(war$name_common)
16
war$player_ID <- as.character(war$player_ID)
17
18
# Only get pre-2014 data
19
mask1 <- war$year_ID < 2014
20
war <- war[mask1,]
21
22
# To figure out which names to use
23
maxWar <- ddply(war,c("year_ID"), function(df)max(df$WAR,na.rm=TRUE))
24
colnames(maxWar) <- c('year_ID','WAR')
25
str(maxWar)
26
m <- join(war,maxWar,type="inner")
27
arrange(m,year_ID)
28
29-
masktw1 = (war$year_ID <= 1942 & war$name_common == 'Ted Williams')
29+
30-
war$name_common[masktw1] <- 'TW1'
30+
masktw1 = (war$year_ID <= 1942 & war$player_ID == 'willite01')
31
war$player_ID[masktw1] <- 'TW1'
32-
masktw2 = (war$year_ID < 1952 & war$name_common == 'Ted Williams')
32+
33-
war$name_common[masktw2] <- 'TW2'
33+
masktw2 = (war$year_ID < 1952 & war$player_ID == 'willite01')
34
war$player_ID[masktw2] <- 'TW2'
35-
masktw3 = (war$year_ID > 1953 & war$name_common == 'Ted Williams')
35+
36-
war$name_common[masktw3] <- 'TW3'
36+
masktw3 = (war$year_ID > 1953 & war$player_ID == 'willite01')
37
war$player_ID[masktw3] <- 'TW3'
38
39-
masklg = (war$year_ID == 1934 & war$name_common == 'Lefty Grove')
39+
40
masklg = (war$year_ID == 1934 & war$player_ID == 'grovele01')
41
war$WAR[masklg] = 7
42
43-
masktw4 = (war$year_ID == 1950 & war$name_common == 'TW2')
43+
44
masktw4 = (war$year_ID == 1950 & war$player_ID == 'TW2')
45
war$WAR[masktw4] = 7
46
47
48-
names <- c('Cy Young','Honus Wagner','Ty Cobb','Walter Johnson','Babe Ruth','Lefty Grove',
48+
49-
           'Lou Gehrig','TW1','TW2','TW3','Stan Musial',
49+
names <- c('youngcy01','wagneho01','cobbty01','johnswa01','ruthba01','grovele01',
50-
           'Mickey Mantle','Willie Mays','Sandy Koufax','Bob Gibson',
50+
           'gehrilo01','TW1','TW2','TW3','musiast01',
51-
           'Mike Schmidt','Joe Morgan','Cal Ripken',
51+
           'mantlmi01','mayswi01','koufasa01','gibsobo01',
52-
           'Barry Bonds','Alex Rodriguez','Albert Pujols','Clayton Kershaw')
52+
           'schmimi01','morgajo02','clemero02',
53
           'bondsba01','rodrial01','pujolal01','kershcl01')
54
colors = c('red','gray','gray','red','red','red',
55
           'gray','red','red','red','gray',
56
           'gray','red','gray','red',
57
           'red','gray','gray',
58
           'red','gray','red','gray')
59
60
# Make the initial plot
61
min_x = 1900; max_x = 2013
62
xl <- seq(min_x, max_x, 0.1)
63
yl = xl * 0
64
plot(0,type='n',axes=TRUE,ann=FALSE,xlim=range(1900,2013),ylim=range(0,15))
65
axis(side=1,at=seq(1900,2010,10),labels=seq(1900,2010,10))
66
67
for (i in seq(1,length(names)))
68
{
69
  name = names[i]
70-
  mask <- which(war$name_common == name)
70+
71
  mask <- which(war$player_ID == name)
72
  # We need to add batter and pitcher WAR
73
  masked_war <- war[mask,]
74
  myWar <- ddply(masked_war, .(year_ID), summarise, WAR=sum(WAR))
75
  x <- myWar$year_ID
76
  y <- myWar$WAR
77
  lo <- loess(y~x,span=0.7)
78
  xl <- seq(min(x),max(x), 0.1)
79
  p <- lines(xl, predict(lo,xl), col=color, lwd=2)
80-
  #plot(x,y,type="l",xlim=range(1900,2013),ylim=range(0,15))
80+
81
}
82
83
# Add dashed lines for Ted Williams's war periods
84
x = c(1942,1946)
85
y = c(10.61,10.87)
86
lines(x,y,pch=3,lty='dotted',col='red',lwd=2)
87
88
x = c(1951,1954)
89
y = c(7.22,7.75)
90
lines(x,y,pch=3,lty='dotted',col='red',lwd=2)