Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Example of regression to the mean
- set.seed(123)
- ctrl <- matrix(rnorm(0.5e5, mean = 0, sd = 0.5), ncol = 5)
- treat1 <- matrix(rnorm(0.5e5, mean = 0, sd = 0.5), ncol = 5)
- treat2 <- matrix(rnorm(0.5e5, mean = 0, sd = 0.5), ncol = 5)
- # seed 200 differentially expressed genes
- diff_genes <- sample(seq.int(nrow(treat1)), 200)
- treat1[diff_genes, ] <- treat1[diff_genes, ] + 0.75
- treat2[diff_genes, ] <- treat2[diff_genes, ] + 0.75
- m <- cbind(ctrl, treat1, treat2)
- # the mean expression scores are uncorrelated between the groups
- panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
- {
- usr <- par("usr"); on.exit(par(usr))
- par(usr = c(0, 1, 0, 1))
- r <- abs(cor(x, y))
- txt <- format(c(r, 0.123456789), digits = digits)[1]
- txt <- paste0(prefix, txt)
- if (missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
- text(0.5, 0.5, txt, cex = cex.cor * r)
- }
- panel.smooth <- function(x, y, show_points = TRUE, ...) {
- smoothScatter(x = x, y = y, ..., nbin = 64,
- nrpoints = 0,
- add = TRUE, gap = 0.2)
- if (show_points == TRUE) {
- points(x[diff_genes], y[diff_genes],
- col = adjustcolor("darkgreen", 0.2), pch = 19)
- }
- abline(h = 0, v = 0, lty = 2)
- }
- # fig.width = 5.5, fig.height = 5.5
- pairs(
- cbind(
- rowMeans(m[, 1:5]),
- rowMeans(m[, 6:10]),
- rowMeans(m[, 11:15])
- ),
- labels = c("Ctrl", "Treatment1", "Treatment2"),
- lower.panel = panel.smooth, upper.panel = panel.cor
- )
- # calculating 'treatment effects' compared to THE SAME control group
- m1 <- rowMeans(m[, 6:10]) - rowMeans(m[, 1:5])
- m2 <- rowMeans(m[, 11:15]) - rowMeans(m[, 1:5])
- # increases the correlation between the effects
- # fig.width = 5.5, fig.height = 5.5
- pairs(
- cbind(m1[-diff_genes], m2[-diff_genes]),
- labels = c("Treament 1 vs Ctrl", "Treament 2 vs Ctrl"),
- lower.panel = panel.smooth, upper.panel = panel.cor,
- show_points = FALSE
- )
- title(
- main = "Treatment effects*", line = 3
- )
- title(
- sub = "*Differentially expressed genes were excluded", line = 4
- )
- # pick genes based on the first comparision
- pvals <- apply(m, 1, function(z) {
- t.test(x = z[6:10], y = z[1:5])$p.value
- })
- picked <- head(order(pvals), n = 100)
- # these genes show
- # - still an effect in the second comparison (because of the shared denominator)
- # - smaller effects in the second comparison (because of regression to the mean)
- summary(m1[picked])
- summary(m2[picked])
- # fig.width = 5.5, fig.height = 5.5
- smoothScatter(m1, m2, xlab = "Treament 1 vs Ctrl",
- ylab = "Treament 2 vs Ctrl",
- main = "Differentially expressed genes")
- points(m1[diff_genes], m2[diff_genes], pch = 21, col = "darkgrey",
- bg = adjustcolor("darkgreen", 0.5))
- abline(a = 0, b = 1, h = 0, v = 0, lty = 2)
- # fig.width = 5.5, fig.height = 5.5
- smoothScatter(m1, m2, xlab = "Treament 1 vs Ctrl",
- ylab = "Treament 2 vs Ctrl",
- main = "Genes picked based on Treatment 1 vs Ctrl")
- points(m1[picked], m2[picked], pch = 21, col = "darkgrey",
- bg = adjustcolor("red", 0.5))
- abline(a = 0, b = 1, h = 0, v = 0, lty = 2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement