Untitled

# simulated data
require(Matrix)
n = 200
x = 1:n
npeaks = 20
set.seed(123)
u = sample(x, npeaks, replace=FALSE) # peak locations, which are assumed to be known here
peakhrange = c(10,1E3) # peak height range
h = 10^runif(npeaks, min=log10(min(peakhrange)), max=log10(max(peakhrange))) # peak heights, which are assumed to be known here
a = rep(0, n) # locations of spikes of simulated spike train, which are assumed to be known here
a[u] = h
gauspeak = function(x, u, w, h=1) h*exp(((x-u)^2)/(-2*(w^2))) # simulated peak shape, assumed to be unknown
bM = do.call(cbind, lapply(1:n, function (u) gauspeak(x, u=u, w=5, h=1) )) # banded matrix with theoretical peak shape function used
p = 50 # desired size of peak shape model
g_theor = gauspeak(x=1:p, u=p/2, w=5, h=1) # theoretical unknown peak shape function
y_nonoise = as.vector(bM %*% a) # noiseless simulated signal = linear convolution of spike train with peak shape function
y = rpois(n, y_nonoise) # simulated signal with random poisson noise on it
G = matrix(0, n + p - 1, p)
G = Matrix(G, sparse=TRUE)
for (k in 1:p) G[(1:n) + k - 1, k] = a
G = G[(1:n) + floor(p/2) - 1, ]
par(mfrow=c(1,1))
plot(y, type="l", ylab="Signal", xlab="x", main="Known spike train (red) convoluted with unknown blur kernel and Poisson noise")
lines(a, type="h", col="red")

# peak shape function estimated using weighted nnls ridge regression via nnls with a row augmented covariate matrix
weights = 1/(y+1) # 1/variance obs weights to account for Poisson noise in weighted nonnegative least square approximation
weights = n*weights/sum(weights)
library(nnls)
lambda = 1E4 # regularization parameter for ridge penalty
library(microbenchmark)
microbenchmark(g_wnnls <- nnls(A=as.matrix(rbind(G*sqrt(weights), diag(sqrt(lambda),p))), b=c(y*sqrt(weights),rep(0,p)))$x) # 1.4 ms
g_wnnls = (g_wnnls-min(g_wnnls))/(max(g_wnnls)-min(g_wnnls))
dev.off()
par(mfrow=c(2,1))
plot(g_theor, type="l", lwd=7, col="grey", main="Ground truth (grey), nonnegative weighted LS ridge estimate (red)", ylab="Peak shape", xlab="x")
lines(g_wnnls, col="red", lwd=2)

# peak shape function estimated using nonnegative Poisson adaptive ridge regression via nnpois with a row augmented covariate matrix
library(addreg)
lambda = 1E6 # regularization parameter for ridge penalty
adpenweights = (1/(g_wnnls^2+1E-5)) # adaptive penalty weights to do adaptive ridge regression using wnnls estimates as truncated Gaussian prior
adpenweights = n*adpenweights/sum(adpenweights)
lambdas = lambda*adpenweights # adaptive penalties for adaptive ridge penalty
system.time(g_nnpoisridge <- nnpois(y=c(y,rep(0,p)), # nonnegative identity link Poisson adaptive ridge regression, solved using EM algo
                       x=as.matrix(rbind(G, diag(sqrt(lambdas),p))),
                       standard=rep(1,p),
                       offset=0,
                       start=rep(1E-5, p), # or we can use g_wnnls+1E-5
                       accelerate="squarem")$coefficients) # 0.02
g_nnpoisridge = (g_nnpoisridge-min(g_nnpoisridge))/(max(g_nnpoisridge)-min(g_nnpoisridge))
plot(gauspeak(x=1:p, u=25, w=5, h=1), type="l", lwd=7, col="grey", main="Ground truth (grey), nonnegative Poisson adaptive ridge estimate (red)", ylab="Peak shape", xlab="x")
lines(g_nnpoisridge, col="red", lwd=2)
# PS: instead of using nnpois we could also use nnlm (NNLM package) with loss="mkl" (Kullback-Leibler) which is then solved using coordinate descent and gives almost the same result

# check that nonnegative poisson adaptive ridge regression is calculated correctly using ML maximization with port algo (Quasi-Newton BFGS)
# negative log-likelihood for identity link poisson ridge regression
NLL_RIDGEPOIS = function(coefs, X, y, lambdas) {
  preds = X %*% coefs
  LLs = stats::dpois(y, lambda=preds, log=TRUE) # log likelihood contributions of each observation
  n = nrow(X)
  -(1/n)*sum(LLs) - lambdas*sum(coefs^2)
}
g_nnpoisridgeML = nlminb(start = g_nnpoisridge, # we use our estimates above as starting values
                         objective = NLL_RIDGEPOIS, X = as.matrix(G), y = y, lambdas=lambdas,
                         control = list(iter.max=1000, abs.tol=1E-20, rel.tol=1E-15),
                         lower = rep(0,p) # we use nonnegativity constraints
)$par
max(g_nnpoisridgeML) # not sure why I get such a large value here, 6.269765e+68, should be around 1 as above - maybe mistake here?
# as a quick fix I rescale coefficients
g_nnpoisridgeML = (g_nnpoisridgeML-min(g_nnpoisridgeML))/(max(g_nnpoisridgeML)-min(g_nnpoisridgeML))
max(abs(g_nnpoisridgeML-g_nnpoisridge)) # 0.01726672 - ML estimate is close to solution above, but not identical - could be optimizer issue though or mistake in code...
par(mfrow=c(1,1))
plot(g_theor,col="grey", type="l", lwd=5, main="Ground truth (grey), nonnegative Poisson adaptive ridge estimate (red),n nonnegative Poisson adaptive ridge ML estimate (blue)", ylab="Peak shape", xlab="x")
lines(g_nnpoisridge,col="red",lwd=2)
lines(g_nnpoisridgeML,col="blue",lwd=2)

library(h2o)
library(data.table)
h2o.init()
df = data.frame(X=as.matrix(G), y=y)
lambda = 1E6 # regularization parameter for ridge penalty, note that h2o does not support adaptive penalty weights for the moment
system.time(data_h2o <- as.h2o(df)) # 0.47s - really slow...
system.time(nnpois_h2o <- h2o.glm(y = "y", training_frame = data_h2o,
        family = "poisson", link = "identity", solver = "L_BFGS", lambda = lambda, objective_epsilon=1E-20, non_negative=TRUE,
        standardize = FALSE, intercept = FALSE)) # 0.21s - really slow...
g_nnpoisridge_h2o = as.vector(as.data.frame(nnpois_h2o@model$coefficients_table)[-1,2])
g_nnpoisridge_h2o = (g_nnpoisridge_h2o-min(g_nnpoisridge_h2o))/(max(g_nnpoisridge_h2o)-min(g_nnpoisridge_h2o))
plot(g_theor,col="grey", type="l", lwd=5, main="Ground truth (grey), nonnegative Poisson adaptive ridge estimate (red),n nonnegative Poisson adaptive ridge h2o estimate (blue)", ylab="Peak shape", xlab="x")
lines(g_nnpoisridge,col="red",lwd=2)
lines(g_nnpoisridge_h2o,col="blue",lwd=2)