Advertisement
Guest User

Untitled

a guest
Jul 1st, 2016
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.97 KB | None | 0 0
  1. # Function to make ML estimate of EPP rate p plus 95% profile likelihood confidence intervals,
  2. # taking into account that for pairs with mismatches multiple EPP events could have occured
  3. #
  4. # input is
  5. # x=vector of booleans or 0 and 1s specifying if there was a mismatch or not (1 or TRUE = mismatch), n=vector with nr of meioses per pair
  6. # optional arguments:
  7. # plotit = make plot of log likelihood and Chi squared statistic as a function of p
  8. # pmax=max EPP prob in plots
  9. # output is named list with est = ML estimate and LCL and UCL = lower and upper 95% C.L.s of EPP rate
  10.  
  11. estimateP = function(x, n , plotit=TRUE, pmax=0.05) {
  12. if (!is.logical(x[[1]])) x = (x==1)
  13. logLp = function(p, x, n) sum((log(1 - (1-p)^n))[x]) + sum((n*log(1-p))[!x]) # see http://stats.stackexchange.com/questions/152111/censored-binomial-model-log-likelihood
  14. Chisq = function (p0, phat, x, n) 2*(logLp(phat, x, n) - logLp(p0, x, n)) # chi square LRT statistic of model with p0 vs one with best ML estimate phat
  15. phat = optimize(function(p) -2*logLp(p, x, n), lower=1e-16, upper=1-1e-16, tol=1e-8)$minimum # ML estimate of p
  16. # or using f <- function(q, x, n) sum((n / (1 - q^n))[x]) - sum(n)
  17. # phat = 1-optimize(function(q) f(q, x, n)^2, lower=1e-16, upper=1-1e-16, tol=1e-8)$minimum
  18. pUCL = optimize(function(p0) (Chisq(p0, phat, x, n)-qchisq(0.95, df = 1))^2, lower=phat, upper=1-1e-16, tol=1e-16)$minimum
  19. pLCL = optimize(function(p0) (Chisq(p0, phat, x, n)-qchisq(0.95, df = 1))^2, lower=1e-16, upper=phat, tol=1e-16)$minimum
  20. if (plotit) { pvals = seq(0, pmax, length.out=10000)
  21. plot(pvals, sapply(pvals,function(p) logLp(p, x, n)), type="l", ylab="Log likelihood", xlab="p")
  22. plot(pvals, sapply(pvals,function(p0) Chisq(p0, phat, x, n)), type="l", ylab="Chi square LRT statistic", xlab="p", ylim=c(0,qchisq(0.95, df = 1)))
  23. abline(h = qchisq(0.95, df = 1), col="red", lty=2) # corresponding to 95% C.L.s on p
  24. }
  25.  
  26. return(list(est=phat, LCL=pLCL, UCL=pUCL))
  27. }
  28.  
  29. n = c(7, 7, 7, 7, 7, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 23, 23, 24, 24, 25, 27, 31) # number of generations/meioses that separate presumed common paternal ancestor
  30. x = c(rep(0,6), 1, rep(0,7), 1, 1, 1, 0, 1, rep(0,20), 1, rep(0,13), 1, 1, rep(0,5)) # whether pair of individuals had non-matching Y chromosomal genotypes
  31.  
  32. est = estimateP(x,n)
  33. c(est$est, est$LCL, est$UCL)*100
  34. 0.9096007 0.4165108 1.6877681
  35.  
  36. # ML estimation, assuming that EPP rate p shows a temporal trend
  37. # where log(p/(1-p))=a+b*n
  38. # here we make a ML estimate of parameters a and b
  39. # and we use package bbmle for convenience
  40.  
  41. estimatePtemp = function(x, n) {
  42. if (!is.logical(x[[1]])) x = (x==1)
  43. pfun = function(a, b, n) exp(a+b*n)/(1+exp(a+b*n)) # we now write p as a function of a, b and n
  44. logL = function(a, b, x, n) sum((log(1 - (1-pfun(a, b, n))^n))[x]) +
  45. sum((n*log(1-pfun(a, b, n)))[!x]) # a and b are params to be estimated, modified from http://stats.stackexchange.com/questions/152111/censored-binomial-model-log-likelihood
  46. neglogL = function(a, b, x, n) -logL(a, b, x, n) # negative log-likelihood
  47. require(bbmle)
  48. fit = mle2(neglogL, start=list(a=-3, b=-0.1), data=list(x=x, n=n))
  49. return(fit)
  50. }
  51.  
  52. # fitted coefficients
  53. estfit = estimatePtemp(x, n)
  54. cbind(coef(estfit),confint(estfit)) # parameter estimates and profile likelihood confidence intervals
  55. # 2.5 % 97.5 %
  56. # a -3.09054167 -5.3191406 -1.12078519
  57. # b -0.09870851 -0.2396262 0.02848305
  58. summary(estfit)
  59. # Coefficients:
  60. # Estimate Std. Error z value Pr(z)
  61. # a -3.090542 1.057382 -2.9228 0.003469 **
  62. # b -0.098709 0.067361 -1.4654 0.142819
  63.  
  64. pfun = function(a, b, n) exp(a+b*n)/(1+exp(a+b*n))
  65. xvals=1:max(n)
  66. par(mfrow=c(1,1))
  67. plot(xvals,sapply(xvals,function (n) pfun(coef(estfit)[1], coef(estfit)[2], n)),
  68. type="l", xlab="Generations ago (n)", ylab="EPP rate (p)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement