Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ########
- ## About myself...
- ##########
- ## how does one call a function programatically?
- ## with call and do.call
- ## say I have a function to add two numbers
- ```{r}
- myf <- function(x, y) x + y
- myf(1, 1)
- ```
- ## do.call evaluates the function "what", with the arguments "args" (supplied as a list)
- ```{r}
- args(do.call)
- do.call(myf, list(1, 1))
- ```
- ## call prepares it but does not evaluate it, and instead takes arguments separately through the ... argument
- args(call)
- (c1 <- call("myf", 1, 1))
- eval(c1)
- ## I'll focus on do.call
- ## For better or for worse, I also will ignore the "envir" option which allows the call to be executed in a particular environment.
- ## For better, because it would take longer than I have to address this in enough detail to be meaningful.
- ## For worse because every so often really weird things can happen if you don't understand what this means.
- ##########
- ## most common use (perhaps) is as part of split-apply-combine, as so named and made famous by Hadley Wickham
- ## this is what we used to do before he wrote the plyr package, and still is occassionally useful
- ## say we have data that we want to split by group and apply a function to
- ## here's a data frame with five groups, 10 observations of a and b for each group
- set.seed(5)
- d <- data.frame(group=rep(LETTERS[1:5], each=10), a=10 + round(rnorm(50, 1)), b=11 + round(rnorm(50,1)))
- ## split it by group
- ds <- split(d, d$group)
- ## apply a function to each part that gets mean a, mean b, confidence interval of difference for paired t test
- out <- lapply(ds, function(di) with(di, c(mean.a=mean(a), mean.b=mean(b), t.confint=t.test(a, b, paired=TRUE)$conf.int)))
- do.call(rbind, out)
- #####################
- ## more on how it works, with lm (because it shows you the call itself in the output)
- ## let's make and plot some data (avoiding dollar signs...)
- set.seed(1)
- d <- data.frame(x=round(runif(50, 0, 10), 1))
- d <- within(d, {
- y <- round(1 + 0.9*x + rnorm(n, 0, 4), 1)
- })
- with(d, plot(x, y))
- ## normally would fit model like this
- lm(y ~ x, data=d)
- ## so with do.call, let's try this...
- do.call(lm, list(y~x, data=d))
- ## wow, what was that!
- ## two things: 1) function call is passed in as value of argument, and 2) parameters are evaluated before sending to do.call
- ## instead send the function as a character and wrap parameters we don't want evaluated first in "quote", which simply returns its
- ## argument when evaluated
- do.call("lm", list(y~x, data=quote(d)))
- ## Both, however, give the correct result and can be used later with summary, coef, etc, as needed
- ## The fact that parameters are evaluated first, though, is potentially really handy!
- #######
- ## this is my second main use for do.call
- ## to programatically fit different formulas
- ## can store formula in a character variable,
- ## could be a vector of options that we loop through
- f <- "y ~ x"
- ## will work without do.call, but call refers to "f"
- (m1 <- lm(f, data=d))
- ## do. call instead evaluates the f first, so call is as expected
- (m2 <- do.call("lm", list(as.formula(f), data=quote(d))))
- ## doesn't matter in most contexts, but makes me a little nervous sometimes
- ## say we change f, then update the model (not that I use update a lot...)
- f <- "y ~ 1"
- update(m1)
- update(m2)
- ## there are situations when you might want either, I guess, but it feels cleaner to
- ## me to have the actual formula I used stored rather than a generic "f".
- ## Though if you think about it, I prefer the opposite for the data argument, so...
- #################
- ## using do.call for optimal use of optim
- ## see also: stats4::mle and bbmle::mle2
- ## which makes some (possibly all) of this unneeded
- ## still, handy to see how parameters can be passed around from function to function
- ## making data in global environment
- ## in real life, this would be in a data frame
- ## could explore how to write functions that use variables from within data frame but not today...
- rm(list=ls())
- set.seed(1)
- n <- 50
- x <- round(runif(n, 0, 10), 1)
- y <- round(1 + 0.9*x + rnorm(n, 0, 4), 1)
- plot(x, y)
- ## say we want to find the best linear fit, and standard deviation, assuming
- ## data is normally distributed around linear fit with constant variance
- ## y_i ~ N(a + bx_i, s^2)
- ## want to get parameters a, b, and s that maximize corresponding likelihood
- m1 <- lm(y ~ x)
- summary(m1)
- ## I'll report log(s) instead of s itself, so that range can be over whole real line
- setNames(c(coef(m1), log(sqrt(sum(residuals(m1)^2)/n))), c("int", "slope", "logsd"))
- ## say we want to do this "by hand"
- ## as an example of what to do if had more complex likelihood function
- ## we want to minimize the negative log likelihood
- negloglik <- function(int, slope, logsd, x, y) {
- -sum(dnorm(y, mean=int + slope * x, sd=exp(logsd), log=TRUE))
- }
- ## using optim is an option to do this
- ## but optim wants the parameters as one parameter at the beginning
- args(optim)
- ## method one
- ## positional, lengthy
- nll1 <- function(par, x, y) {
- negloglik(int=par[1], slope=par[2], logsd=par[3], x=x, y=y)
- }
- optim(par=c(int=1, slope=1, logsd=log(4)), nll1, x=x, y=y)$par
- ## method two
- ## uses do.call to put them in call automatically
- nll2 <- function(par, x, y) {
- do.call(negloglik, c(as.list(par), list(x=x, y=y)))
- }
- optim(par=c(int=1, slope=1, logsd=log(4)), nll2, x=x, y=y)$par
- #####
- ## aside: combining lists
- ## combining lists seems like it should be straightforward
- ## here are two lists, let's combine them
- a1 <- list(x=1, y=2)
- a2 <- list(a=c(10,11), b=c(11,12))
- c(a1, a2)
- ## but this only works if both of the things are lists,
- ## if not, it doesn't do what we want
- d <- 1:3
- c(a2, d=d)
- list(a2, d=d)
- c(a2, list(d=d))
- ## the other function I used is `as.list`, which takes a vector
- ## and makes it into a list instead. Contrast with just `list`.
- a <- c(x=1, y=2)
- as.list(a)
- list(a)
- #####
- ## method three
- ## uses do.call to optimize over some, not others
- ## this is really why I learned about this, to be able to profile the likelihood more easily
- nll3 <- function(par, x, y, ...) {
- do.call(negloglik, c(as.list(par), list(x=x, y=y), ...))
- }
- optim(par=c(int=1, slope=1), nll3, x=x, y=y, logsd=log(4))$par
- ## method four
- ## more generic, as all parameters could get wrapped into the ...
- nll4 <- function(par, ...) {
- do.call(negloglik, c(as.list(par), list(...)))
- }
- optim(par=c(int=1, slope=1), nll4, x=x, y=y, logsd=log(4))$par
- #######
- ## OK, now we're going to "show off..."
- ## not that you would choose these for this example
- ## but might be handy in other kinds of circumstances
- ## method five
- ## a helper function for optim
- optimhelp <- function(par, FUN, ...) {
- do.call(FUN, c(as.list(par), list(...)))
- }
- optim(par=c(int=1, slope=1), optimhelp, FUN=negloglik, x=x, y=y, logsd=log(4))$par
- ## method six factory to make really generic
- ## ignoring issues with environments and closures right now...
- optfactory <- function(FUN) {
- function(par, ...) { do.call(FUN, c(as.list(par), list(...))) }
- }
- nllpar <- optfactory(negloglik)
- optim(par=c(int=1, slope=1), nllpar, x=x, y=y, logsd=log(4))$par
- ## method seven, build a new version of optim
- ## this automatically allows extra parameters to be sent to optim as well!
- optim2 <- function(par, fn, ...) {
- ff <- function(par, ...) { do.call(fn, c(as.list(par), list(...))) }
- optim(par=par, fn=ff, ...)
- }
- optim2(par=c(int=1, slope=1), fn=negloglik, x=x, y=y, logsd=log(4))$par
- ## I suppose you could oneliner this too...
- optim(par=c(int=1, slope=1), fn=function(par, ...) { do.call(negloglik, c(as.list(par), list(...))) }, x=x, y=y, logsd=log(4))$par
- ## or, can just include them as parameters,
- ## probably the clearest way to write the code for users
- optim2b <- function(par, fn, gr=NULL, ...,
- method = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent"),
- lower = -Inf, upper = Inf, control = list(), hessian = FALSE) {
- ff <- function(par, ...) { do.call(fn, c(as.list(par), list(...))) }
- optim(par=par, fn=ff, gr=gr, ..., method=method, lower=lower, upper=upper, control=control, hessian=hessian)
- }
- optim2b(par=c(int=1, slope=1), fn=negloglik, x=x, y=y, logsd=log(4), hessian=TRUE)[c("par", "hessian")]
- ## or, for kicks, could show how to separate parameters for the two functions
- optim2c <- function(par, fn, extra, ...) {
- ff <- function(par, extra) { do.call(fn, c(as.list(par), extra)) }
- optim(par=par, fn=ff, extra=extra, ...)
- }
- optim2c(par=c(int=1, slope=1), fn=negloglik, extra=list(x=x, y=y, logsd=log(4)), hessian=TRUE)[c("par", "hessian")]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement