Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Pre-class work
- dat=data.frame(x=c(1,2,3,4,5,6),
- y=c(1,3,5,6,8,12))
- min.RSS <- function(data, par) {
- with(data, sum((par[1] + par[2] * x - y)^2))
- }
- result <- optim(par = c(0, 1), min.RSS, data = dat)
- library(Matching)
- data(lalonde)
- optim(par=c(0,1),min.RSS,data=data.frame(y=lalonde$re78,x=lalonde$educ))
- lm(re78~educ, data=lalonde)
- 't
- Optimization:
- $par
- [1] 920.8695 429.5908
- $value
- [1] 19262169077
- $counts
- function gradient
- 123 NA
- $convergence
- [1] 0
- $message
- NULL
- Regression:
- lm(formula = re78 ~ educ, data = lalonde)
- Coefficients:
- (Intercept) educ
- 918.2 429.9
- 't
- claw <- function(xx) {
- x <- xx[1]
- y <- (0.46*(dnorm(x,-1.0,2.0/3.0) + dnorm(x,1.0,2.0/3.0)) +
- (1.0/300.0)*(dnorm(x,-0.5,.01) + dnorm(x,-1.0,.01) + dnorm(x,-1.5,.01)) +
- (7.0/300.0)*(dnorm(x,0.5,.07) + dnorm(x,1.0,.07) + dnorm(x,1.5,.07)))
- return(y)
- }
- #Claw
- #clawx=sample((-200:200),replace=1)
- #clawdata=data.frame(y=sapply(clawx,claw),x=clawx)
- invclaw=function(x){-claw(x)}
- optim(par = -2, invclaw)
- optim(par = 0, invclaw)
- optim(par = 2, invclaw)
- #optim(par=2,claw,control=c(5,-1,0.01,1e-3,100,0,1e-8,0,10,1,5,1e7,0,10,10))
- optimize(claw,c(-2,2),maximum=1)
- #Contaminated data
- set.seed(123)
- # Define the x values
- x <- c(1:12)
- # Define the y values for the first 12 observations, in terms of x
- # y = -x + 10 + error term
- y <- c(-1*x[1:12] + 10 + rnorm(12, sd = 1))
- lm(y~x)
- min.reg <- function(par) {
- loss <- mean((y - (par[1]*x + par[2]))^2)
- return(loss)
- }
- result.reg <- optim(par = c(1,1), min.reg)
- result.reg$par
- result.reg$value
- # Introduce a totally different data generating process for 1 observation
- # (Maybe you have some small 'data contamination'): (21, 21)
- x <- c(x,21)
- y <- c(y, 21)
- 't
- Use R to obtain the coefficients for the simple regression
- Use optim to reproduce those coefficents
- Plot the regression line
- lm1 <- reg(y~x)
- plot(x,y)
- abline(lm1, col = “blue”, lwd = 3)
- Consider what’s wrong with the regression line
- Use optim() to run a robust regression (robust to data contamination) that minimizes the median squared residual instead of the mean of the squared residuals. Try it with the default optim() method, and then experiment with different starting values. Then try it with the “SANN” method, repeating the process with different starting values.
- Using your best results, add a robust regression line to your plot using a different color than the one you used in (3) above.
- t'
Add Comment
Please, Sign In to add comment