Untitled

n<-50
covL<-seq(0,1,length.out=n)
covp<-seq(0,1,length.out=n)
trueMeans<-exp(1.5-2*covL)
probability<-exp(-1.5+covp*2)/(1+exp(-1.5+covp*2))
U<-runif(n,0,1)
y <- rpois(n,trueMeans)
y[U<probability] <- 0

#initialize values
initial.l<-glm(y[y>0] ~ covL[y>0], family = "poisson")$coefficients
Beta0.l<-as.numeric(initial.l[1])
Beta1.l<-as.numeric(initial.l[2])

Beta0.p<-(sum(y==0)-sum(exp(Beta0.l+Beta1.l*covL)))/length(y)
Beta1.p<-0

#going to use the EM algorithm, so zhat is going to be my latent variable, initialize it here.

zhat <- rep(0,length(y))
for (i in 1:100) {
#rep through the EM algorithm 100 times, I know this is excessive


#E step
zhat[y==0]<-((1+exp(-(Beta0.p+Beta1.p*covp)-exp(Beta0.l+Beta1.l*covL)))^(-1))[y==0]


  #M step for the coefficients that estimate the mean (lambda)
 lValues<-glm(y ~ covL, family = "poisson",weights=1-zhat)$coefficients
 Beta0.l<-as.numeric(lValues[1])
 Beta1.l<-as.numeric(lValues[2])


#M step for the coefficients that estimate the p value
y_star<-as.numeric(y!=0)
y_star<-c(y_star,y[y==0])
weightsV<-c(1-zhat,zhat[y==0])
covariates<-c(covp,covp[y==0])

 pValues<- glm(y_star~covariates,family=binomial ,weights=weightsV)$coefficients

 Beta0.p<-as.numeric(pValues[1])
 Beta1.p<-as.numeric(pValues[2])

 cat("Iteration: ",i, "  Beta0.l: ",Beta0.l, "  Beta1.l: ",Beta1.l,"  Beta0.p: ", Beta0.p,"  Beta1.p: ", Beta1.p,"n")
}

set.seed(1071)
n<-50
covL<-seq(0,1,length.out=n)
covp<-seq(0,1,length.out=n)
trueMeans<-exp(1.5-2*covL)
probability<-exp(-1.5+covp*2)/(1+exp(-1.5+covp*2))
U<-runif(n,0,1)
y <- rpois(n,trueMeans)
y[U<probability] <- 0

nll_zip <- function(par) {
  lambda <- exp(par[1] + par[2] * covL)
  p <- exp(par[3] + par[4] * covp) / (1 + exp(par[3] + par[4] * covp))
  lik <- p * (y == 0) + (1 - p) * dpois(y, lambda)
  -sum(log(lik))
}

opt <- optim(rep(0, 4), nll_zip, hessian = TRUE)

library("pscl")
m <- zeroinfl(y ~ covL | covp)

cbind("True" = c(1.5, -2, -1.5, 2), "zeroinfl" = coef(m), "optim" = opt$par)
##                   True  zeroinfl     optim
## count_(Intercept)  1.5  1.350510  1.350757
## count_covL        -2.0 -1.877199 -1.877871
## zero_(Intercept)  -1.5 -1.452235 -1.453011
## zero_covp          2.0  1.739965  1.739817

cbind("zeroinfl" = sqrt(diag(vcov(m))), "optim" = sqrt(diag(solve(opt$hessian))))
##                    zeroinfl     optim
## count_(Intercept) 0.2566790 0.2566963
## count_covL        0.7814104 0.7816395
## zero_(Intercept)  0.8410982 0.8414687
## zero_covp         1.8843705 1.8856983