Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Make data with biased train2 set
- d <- data.frame(x = runif(300, 0, 5))
- d$y <- d$x + rnorm(300)
- d$g <- c('train1', 'train2', 'test')
- d$y[d$g == 'train2'] <- d$y[d$g == 'train2'] + 5
- plot(d$x, d$y, col = factor(d$g))
- # Fit models
- m1 <- lm(y ~ x, subset(d, d$g == 'train1'))
- m2 <- lm(y ~ x, subset(d, d$g == 'train2'))
- # Make predictions
- p1 <- predict(m1, newdata = subset(d, d$g == 'test'))
- p2 <- predict(m2, newdata = subset(d, d$g == 'test'))
- # Is it clear that m2 is biased?
- cor(p1, d$y[d$g == 'test'])
- cor(p2, d$y[d$g == 'test'])
- # Is it clear that m2 is biased?
- mean(abs(p1 - d$y[d$g == 'test']))
- mean(abs(p2 - d$y[d$g == 'test']))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement