Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(truncdist)
- xm = 0
- xM = 10
- N = 500
- num_runs = 1000
- f = function(x) 0.1*x^4 - 0.5*x + 10
- ## Determine optimal parameters of linear model w.r.t. ptarget(x)=uniform(0,10)
- # we want to minimize int((f2(x)-(ax+b))^2
- # = int((0.1*x^4-(a+0.5)*x+10-b)^2
- # = int((0.01*x^8-2*0.1*(a+0.5)*x^5+2*(10-b)*0.1*x^4 +
- # (a+0.5)^2*x^2-2*(a+0.5)*(10-b)*x + (10-b)^2)
- # = [0.01/9*x^9 - 1/30*(a+0.5)*x^6 + 1/25*(10-b)*x^5 + 1/3*(a+0.5)^2*x^3
- # -(a+0.5)*(10-b)*x^2 + (10-b)^2*x]
- #
- # d/da = [-1/30*x^6 + 2/3*a*x^3 + 1/3*x^3 - (10-b)*x^2] != 0
- # d/db = [-1/25*x^5 + (a+0.5)*x^2 - 20x + 2bx] != 0
- A = array(c(2*(xM-xm), xM^2-xm^2, xM^2-xm^2, 2/3*(xM^3-xm^3)), dim=c(2,2))
- rhs = array(c(1/25*(xM^5-xm^5) - 0.5*(xM^2-xm^2) + 20*(xM-xm),
- 1/30*(xM^6-xm^6) - 1/3*(xM^3-xm^3) + 10*(xM^2-xm^2)), dim=c(2, 1))
- ideal_fit = lm(rhs~A-1)
- ideal_model = function(x) ideal_fit$coefficients[1] + x*ideal_fit$coefficients[2]
- # Verify that these parameters are in fact optimal: any slight perturbation should yield higher mse
- xtest = seq(0, 10, .01)
- mse_fun = function(params) mean((cbind(matrix(1, length(xtest)), xtest) %*% params - f(xtest))^2)
- stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(0.99 * ideal_fit$coefficients))
- stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(1.01 * ideal_fit$coefficients))
- # Now run OLS and IWLS on 1000 realizations of the data
- ols_params = array(dim=c(num_runs, 2))
- iwls_params = array(dim=c(num_runs, 2))
- for (run_idx in 1:num_runs) {
- xs = rtrunc(N, 'gamma', shape=2, scale=1, a=0, b=10)
- ys = f(xs)
- zs = ys + rnorm(N, 0, 40)
- ols_fit = lm(zs~xs)
- ols_params[run_idx, ] = ols_fit$coefficients
- iwls_fit = lm(zs~xs, weights=0.1/dgamma(xs, shape=2, scale=1))
- iwls_params[run_idx, ] = iwls_fit$coefficients
- # all of these parameters must yield higher mse than the ideal solution
- stopifnot(mse_fun(ols_fit$coefficients) > mse_fun(ideal_fit$coefficients))
- stopifnot(mse_fun(iwls_fit$coefficients) > mse_fun(ideal_fit$coefficients))
- }
- avg_ols_params = colMeans(ols_params)
- avg_ols_model = function(x) avg_ols_params[1] + avg_ols_params[2] * x
- avg_iwls_params = colMeans(iwls_params)
- avg_iwls_model = function(x) avg_iwls_params[1] + avg_iwls_params[2] * x
- dev.new()
- plot(xs, zs, col=1)
- lines(xtest, f(xtest), col=1)
- lines(xtest, ideal_model(xtest), col=2)
- lines(xtest, avg_ols_model(xtest), col=3)
- lines(xtest, avg_iwls_model(xtest), col=4)
- legend('topleft', legend=c('Data (single realization)',
- 'f(x)',
- 'ideal',
- 'OLS (avg over 1000 realizations)',
- 'IWLS (avg over 1000 realizations)'),
- col=c(1, 1, 2, 3, 4),
- lty = c(0, 1, 1, 1, 1),
- pch = c(1, NA, NA, NA, NA))
Advertisement
Add Comment
Please, Sign In to add comment