Finite-sample biasedness of IWLS

library(truncdist)
xm = 0
xM = 10
N = 500
num_runs = 1000

f = function(x) 0.1*x^4 - 0.5*x + 10

## Determine optimal parameters of linear model w.r.t. ptarget(x)=uniform(0,10)
# we want to minimize int((f2(x)-(ax+b))^2
# = int((0.1*x^4-(a+0.5)*x+10-b)^2
# = int((0.01*x^8-2*0.1*(a+0.5)*x^5+2*(10-b)*0.1*x^4 +
# (a+0.5)^2*x^2-2*(a+0.5)*(10-b)*x + (10-b)^2)
# = [0.01/9*x^9 - 1/30*(a+0.5)*x^6 + 1/25*(10-b)*x^5 + 1/3*(a+0.5)^2*x^3
#   -(a+0.5)*(10-b)*x^2 + (10-b)^2*x]
#
# d/da = [-1/30*x^6 + 2/3*a*x^3 + 1/3*x^3 - (10-b)*x^2] != 0
# d/db = [-1/25*x^5 + (a+0.5)*x^2 - 20x + 2bx] != 0

A = array(c(2*(xM-xm), xM^2-xm^2, xM^2-xm^2, 2/3*(xM^3-xm^3)), dim=c(2,2))
rhs = array(c(1/25*(xM^5-xm^5) - 0.5*(xM^2-xm^2) + 20*(xM-xm),
              1/30*(xM^6-xm^6) - 1/3*(xM^3-xm^3) + 10*(xM^2-xm^2)), dim=c(2, 1))
ideal_fit = lm(rhs~A-1)
ideal_model = function(x) ideal_fit$coefficients[1] + x*ideal_fit$coefficients[2]

# Verify that these parameters are in fact optimal: any slight perturbation should yield higher mse
xtest = seq(0, 10, .01)
mse_fun = function(params) mean((cbind(matrix(1, length(xtest)), xtest) %*% params - f(xtest))^2)
stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(0.99 * ideal_fit$coefficients))
stopifnot(mse_fun(ideal_fit$coefficients) < mse_fun(1.01 * ideal_fit$coefficients))

# Now run OLS and IWLS on 1000 realizations of the data
ols_params = array(dim=c(num_runs, 2))
iwls_params = array(dim=c(num_runs, 2))

for (run_idx in 1:num_runs) {
  xs = rtrunc(N, 'gamma', shape=2, scale=1, a=0, b=10)
  ys = f(xs)
  zs = ys + rnorm(N, 0, 40)
  ols_fit = lm(zs~xs)
  ols_params[run_idx, ] = ols_fit$coefficients
  iwls_fit = lm(zs~xs, weights=0.1/dgamma(xs, shape=2, scale=1))
  iwls_params[run_idx, ] = iwls_fit$coefficients

  # all of these parameters must yield higher mse than the ideal solution
  stopifnot(mse_fun(ols_fit$coefficients) > mse_fun(ideal_fit$coefficients))
  stopifnot(mse_fun(iwls_fit$coefficients) > mse_fun(ideal_fit$coefficients))
}

avg_ols_params = colMeans(ols_params)
avg_ols_model = function(x) avg_ols_params[1] + avg_ols_params[2] * x
avg_iwls_params = colMeans(iwls_params)
avg_iwls_model = function(x) avg_iwls_params[1] + avg_iwls_params[2] * x
dev.new()
plot(xs, zs, col=1)
lines(xtest, f(xtest), col=1)
lines(xtest, ideal_model(xtest), col=2)
lines(xtest, avg_ols_model(xtest), col=3)
lines(xtest, avg_iwls_model(xtest), col=4)
legend('topleft', legend=c('Data (single realization)',
                           'f(x)',
                           'ideal',
                           'OLS (avg over 1000 realizations)',
                           'IWLS (avg over 1000 realizations)'),
       col=c(1, 1, 2, 3, 4),
       lty = c(0, 1, 1, 1, 1),
       pch = c(1, NA, NA, NA, NA))