Guest User

Finite-sample biasedness of IWLS

a guest
Aug 8th, 2021
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.78 KB | None | 0 0
  1. library(truncdist)
  2. xm = 0
  3. xM = 10
  4. N = 500
  5. num_runs = 1000
  6.  
  7. f = function(x) 0.1*x^4 - 0.5*x + 10
  8.  
  9. ## Determine optimal parameters of linear model w.r.t. ptarget(x)=uniform(0,10)
  10. # we want to minimize int((f2(x)-(ax+b))^2
  11. # = int((0.1*x^4-(a+0.5)*x+10-b)^2
  12. # = int((0.01*x^8-2*0.1*(a+0.5)*x^5+2*(10-b)*0.1*x^4 +
  13. # (a+0.5)^2*x^2-2*(a+0.5)*(10-b)*x + (10-b)^2)
  14. # = [0.01/9*x^9 - 1/30*(a+0.5)*x^6 + 1/25*(10-b)*x^5 + 1/3*(a+0.5)^2*x^3
  15. #   -(a+0.5)*(10-b)*x^2 + (10-b)^2*x]
  16. #
  17. # d/da = [-1/30*x^6 + 2/3*a*x^3 + 1/3*x^3 - (10-b)*x^2] != 0
  18. # d/db = [-1/25*x^5 + (a+0.5)*x^2 - 20x + 2bx] != 0
  19.                                            
  20. A = array(c(2*(xM-xm), xM^2-xm^2, xM^2-xm^2, 2/3*(xM^3-xm^3)), dim=c(2,2))
  21. rhs = array(c(1/25*(xM^5-xm^5) - 0.5*(xM^2-xm^2) + 20*(xM-xm),
  22.               1/30*(xM^6-xm^6) - 1/3*(xM^3-xm^3) + 10*(xM^2-xm^2)), dim=c(2, 1))
  23. ideal_fit = lm(rhs~A-1)
  24. ideal_model = function(x) ideal_fit$coefficients[1] + x*ideal_fit$coefficients[2]
  25.  
  26. # Verify that these parameters are in fact optimal: any slight perturbation should yield higher mse
  27. xtest = seq(0, 10, .01)
  28. mse_fun = function(params) mean((cbind(matrix(1, length(xtest)), xtest) %*% params - f(xtest))^2)
  29. stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(0.99 * ideal_fit$coefficients))
  30. stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(1.01 * ideal_fit$coefficients))
  31.  
  32. # Now run OLS and IWLS on 1000 realizations of the data
  33. ols_params = array(dim=c(num_runs, 2))
  34. iwls_params = array(dim=c(num_runs, 2))
  35.  
  36. for (run_idx in 1:num_runs) {
  37.   xs = rtrunc(N, 'gamma', shape=2, scale=1, a=0, b=10)
  38.   ys = f(xs)
  39.   zs = ys + rnorm(N, 0, 40)
  40.   ols_fit = lm(zs~xs)
  41.   ols_params[run_idx, ] = ols_fit$coefficients
  42.   iwls_fit = lm(zs~xs, weights=0.1/dgamma(xs, shape=2, scale=1))
  43.   iwls_params[run_idx, ] = iwls_fit$coefficients
  44.  
  45.   # all of these parameters must yield higher mse than the ideal solution
  46.   stopifnot(mse_fun(ols_fit$coefficients) > mse_fun(ideal_fit$coefficients))
  47.   stopifnot(mse_fun(iwls_fit$coefficients) > mse_fun(ideal_fit$coefficients))
  48. }
  49.  
  50. avg_ols_params = colMeans(ols_params)
  51. avg_ols_model = function(x) avg_ols_params[1] + avg_ols_params[2] * x
  52. avg_iwls_params = colMeans(iwls_params)
  53. avg_iwls_model = function(x) avg_iwls_params[1] + avg_iwls_params[2] * x
  54. dev.new()
  55. plot(xs, zs, col=1)
  56. lines(xtest, f(xtest), col=1)
  57. lines(xtest, ideal_model(xtest), col=2)
  58. lines(xtest, avg_ols_model(xtest), col=3)
  59. lines(xtest, avg_iwls_model(xtest), col=4)
  60. legend('topleft', legend=c('Data (single realization)',
  61.                            'f(x)',
  62.                            'ideal',
  63.                            'OLS (avg over 1000 realizations)',
  64.                            'IWLS (avg over 1000 realizations)'),
  65.        col=c(1, 1, 2, 3, 4),
  66.        lty = c(0, 1, 1, 1, 1),
  67.        pch = c(1, NA, NA, NA, NA))
  68.  
Advertisement
Add Comment
Please, Sign In to add comment