Advertisement
Guest User

Untitled

a guest
Mar 18th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 12.24 KB | None | 0 0
  1. ########################################################################
  2. # Title: Gene Expression, DNA Methylation and Environment
  3. # Author: Leticia Spindola
  4. # Date: 23/05/2018
  5. # Purpose: Analise dos dados de metilacao, expressao e dados ambientais
  6. #######################################################################
  7.  
  8. library(minfi)
  9. require(dplyr)
  10. library(ggplot2)
  11. library(DMRcate)
  12. library(limma)
  13. library(RColorBrewer)
  14. library(Gviz)
  15.  
  16. dataDirectory <- "~/Documents/ILS_INPD_48"
  17. setwd("~/Documents/ILS_INPD_48")
  18.  
  19. ############################################################
  20. ################### Dados de Expressao  ####################
  21. ############################################################
  22. # Importando os dados de expressao genica
  23. eset_met <- readRDS("eset_met.RDS")
  24.  
  25. # salvando os dados brutos de expressao em uma matrix
  26. d_matrix <- exprs(eset_met)
  27. pdata <- pData(eset_met)
  28.  
  29. # arrumar_IDs <- colnames(d_matrix)
  30. # sampleID <- pdata$sampleID
  31. # identical(arrumar_IDs,sampleID) # [1] TRUE
  32.  
  33. colnames(d_matrix) <- c("I211_W0","I211_W1","I175_W0","I175_W1","I070_W0","I070_W1","I051_W0",
  34.                         "I051_W1","I117_W0","I117_W1","I144_W0","I144_W1","I069_W0","I069_W1",
  35.                         "I197_W0","I197_W1","I209_W0","I209_W1","I227_W0","I227_W1","I224_W0",
  36.                         "I224_W1","I242_W0","I242_W1","I310_W0","I310_W1","I236_W0","I236_W1",
  37.                         "I378_W0","I378_W1","I272_W0","I272_W1","I281_W0","I281_W1","I338_W0",
  38.                         "I338_W1","I339_W0","I339_W1","I309_W0","I309_W1","I330_W0","I330_W1",
  39.                         "I331_W0","I331_W1")
  40.  
  41. probes <- fData(eset_met)
  42.  
  43. # Importando a lista para o R
  44. DEG <- read.csv2(file = "Genes_diff_expressos_Incidentes2.csv",header = F)
  45. # View(DEG)
  46. # class(DEG) # [1] "data.frame"
  47.  
  48. # Transformando em um vetor
  49. DEG <- as.character(DEG$V1)
  50. DEG
  51.  
  52. # Tirando genes duplicados
  53. DEG[duplicated(DEG)] # character(0)
  54. length(DEG) # [1] 66
  55.  
  56. # Transformando as informacoes dos genes em uma string so
  57. g <- paste(DEG,collapse = "|")
  58.  
  59. # Criando uma coluna com informacao de row.names em probes
  60. probes$row_names <- rownames(probes)
  61.  
  62. # Selecionando so as probes dos genes diferencialmente expressos nos incidentes
  63. genes_exp_incidentes <- filter(probes,grepl(g, ProbeID))
  64. dim(genes_exp_incidentes) # [1] 72 30
  65. x <- genes_exp_incidentes$SYMBOL
  66.  
  67. x[duplicated(x)]
  68. # [1] "PEX16"
  69.  
  70. keep <- genes_exp_incidentes$row_names
  71.  
  72. d_matrix_select <- d_matrix[keep,]
  73. dim(d_matrix_select) # [1] 72 44
  74.  
  75. ### Vector necessario para as analises: identificando quem eh quem (pair)
  76. n <- 2
  77. pair<- as.factor(rep(1:22,each=n))
  78.  
  79. ############################################################
  80. ################### Dados de Metilacao  ####################
  81. ############################################################
  82. # Importando os dados do EPIC
  83. rgset_HRC_norm_Flt <- readRDS("rgset_HRC_norm_Flt.RDS")
  84.  
  85. # Pegando as informacoes de annotacao de probe do EPIC
  86. annotation <- rgset_HRC_norm_Flt %>% getAnnotation %>% as.data.frame
  87.  
  88. # Importando as informacoes de phenotype
  89. targets_HRC <- read.csv2(file = "/Users/leticiaspindola/Documents/ILS_INPD_48/pdata_INPDmeth_final_25_07_2018.csv",
  90.                          header = T)
  91. targets_HRC <- targets_HRC[order(targets_HRC$sampleID),] # # ordenando as linhas do genetic ID
  92. targets_HRC_44_total <- targets_HRC[-c(27,28,33,34),] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  93.  
  94. rownames(targets_HRC_44_total) <- targets_HRC_44_total$sampleID
  95. targets_HRC_44 <-targets_HRC_44_total [,c("Un_ev_2_NEW","Inter_2_NEW","Cont_chan_2_NEW",
  96.                                           "School_2_NEW","Health_2_NEW","Env_stress_2_NEW","TIME")]
  97. targets_HRC_44$GeneticID <- rownames(targets_HRC_44)
  98. targets_HRC_44 <- filter(targets_HRC_44,TIME=="Followup")
  99. targets_HRC_44 <- select(targets_HRC_44,-TIME)
  100. rownames(targets_HRC_44) <- targets_HRC_44$GeneticID
  101. targets_HRC_44 <- select(targets_HRC_44,-GeneticID)
  102.  
  103. # calculate M-values for statistical analysis
  104. mvalues <- getM(rgset_HRC_norm_Flt)
  105. mvalues <- mvalues[,order(colnames(mvalues))] # ordenando as colunas dos dados de metilacao
  106. mvalues <- mvalues[,-c(27,28,33,34)] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  107.  
  108. # calculate Beta-values for plot and interpretation of statistical analysis
  109. betas <- getBeta(rgset_HRC_norm_Flt)
  110. betas <- betas[,order(colnames(betas))] # ordenando as colunas dos dados de metilacao
  111. betas <- betas[,-c(27,28,33,34)] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  112.  
  113. # Ordenado as colunas dos dados de expressao
  114. d_matrix_select <- d_matrix_select[,order(colnames(d_matrix_select))]
  115.  
  116. a <- colnames(mvalues)
  117. b <- colnames(d_matrix_select)
  118. c <- colnames(betas)
  119. d <- as.character(targets_HRC_44_total$sampleID)
  120. identical(a,b) # [1] TRUE
  121. identical(a,c) # [1] TRUE
  122. identical(b,c) # [1] TRUE
  123. identical(a,d) # [1] TRUE
  124. identical(b,d) # [1] TRUE
  125. identical(c,d) # [1] TRUE
  126.  
  127. # Para as analises abaixo, preciso dos objetos:
  128. # annotation, mvalues, betas, d_matrix_select, targets_HRC_44 e targets_HRC_44_total
  129. rm(d_matrix,eset_met,pdata,probes,rgset_HRC_norm_Flt,targets_HRC)
  130. gc()
  131.  
  132. ############ Selecionando CpGs do gene SERTAD2 #############
  133. # Verificando quantas CpGs tem no gene analisado
  134. SERTAD2_cpgs <- filter(annotation,grepl("NM_014755", UCSC_RefGene_Accession))
  135. dim(SERTAD2_cpgs) # [1] 28 46
  136.  
  137. keep <- SERTAD2_cpgs$Name
  138.  
  139. SERTAD2_mvalues <- mvalues[keep,]
  140. dim(SERTAD2_mvalues) # [1] 28 44
  141.  
  142. SERTAD2_betas <- betas[keep,]
  143. dim(SERTAD2_betas) # [1] 28 44
  144.  
  145. # Pegando os dados de expressao genica
  146. SERTAD2 <- as.numeric(d_matrix_select["xorMF.VUSF4ie5f9zk",])
  147.  
  148. # y = b + ax
  149. # y = dados de expressao
  150. # x = dados de metilacao
  151.  
  152. # Criando FOR
  153. # Criar uma data frame com 6 colunas e nrow(SERTAD2_mvalues) linhas:
  154. n <- nrow(SERTAD2_mvalues)
  155. SERTAD2_exp_met <- data.frame(id=rep(0, n), estimate=rep(0, n), SE=rep(0, n),
  156.                               t=rep(0, n), p=rep(0, n), r.sq=rep(0, n))
  157.  
  158. for (i in 1:nrow(SERTAD2_mvalues)) { # mudar aqui
  159.   row <- SERTAD2_mvalues[i,] # mudar aqui
  160.   id <- rownames(SERTAD2_mvalues)[i] # mudar aqui
  161.   x <- lm(SERTAD2~row+pair)  # Mudar aqui
  162.   estimate <- summary(x)$coefficients[2,1]
  163.   SE <- summary(x)$coefficients[2,2]
  164.   t <- summary(x)$coefficients[2,3]
  165.   p <- summary(x)$coefficients[2,4]
  166.   r.sq <- summary(x)$r.squared
  167.   SERTAD2_exp_met[i,] <- c(id,estimate,SE,t,p,r.sq) # mudar aqui
  168. }
  169.  
  170. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  171. cols <- c(2, 3, 4, 5, 6)    
  172. SERTAD2_exp_met[,cols] <- apply(SERTAD2_exp_met[,cols], 2, function(x) as.numeric(as.character(x)))
  173.  
  174. # Correcao de BONFERRONI
  175. SERTAD2_exp_met <- SERTAD2_exp_met[order(SERTAD2_exp_met$p),]
  176. SERTAD2_exp_met$adj.p <- SERTAD2_exp_met$p * nrow(SERTAD2_exp_met)
  177.  
  178. # Salvando cpg_genes:
  179. write.csv2(SERTAD2_exp_met, file = "Paper2_SERTAD2_y_EXP_x_METH_mvalues.csv",
  180.            quote = F, row.names = T)
  181. write.table(SERTAD2_exp_met,file="Paper2_SERTAD2_y_EXP_x_METH_mvalues.txt", sep="\t",
  182.             row.names=T,quote=FALSE)
  183.  
  184. # Betas
  185. # Criar uma data frame com 6 colunas e nrow(SERTAD2_betas) linhas:
  186. n <- nrow(SERTAD2_betas)
  187. SERTAD2_exp_met <- data.frame(id=rep(0, n), estimate=rep(0, n), SE=rep(0, n),
  188.                               t=rep(0, n), p=rep(0, n), r.sq=rep(0, n))
  189.  
  190. for (i in 1:nrow(SERTAD2_betas)) { # mudar aqui
  191.   row <- SERTAD2_betas[i,] # mudar aqui
  192.   id <- rownames(SERTAD2_betas)[i] # mudar aqui
  193.   x <- lm(SERTAD2~row+pair)  # Mudar aqui
  194.   estimate <- summary(x)$coefficients[2,1]
  195.   SE <- summary(x)$coefficients[2,2]
  196.   t <- summary(x)$coefficients[2,3]
  197.   p <- summary(x)$coefficients[2,4]
  198.   r.sq <- summary(x)$r.squared
  199.   SERTAD2_exp_met[i,] <- c(id,estimate,SE,t,p,r.sq) # mudar aqui
  200. }
  201.  
  202. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  203. SERTAD2_exp_met[,cols] <- apply(SERTAD2_exp_met[,cols], 2, function(x) as.numeric(as.character(x)))
  204.  
  205. # Correcao de BONFERRONI
  206. SERTAD2_exp_met <- SERTAD2_exp_met[order(SERTAD2_exp_met$p),]
  207. SERTAD2_exp_met$adj.p <- SERTAD2_exp_met$p * nrow(SERTAD2_exp_met)
  208.  
  209. # Salvando cpg_genes:
  210. write.csv2(SERTAD2_exp_met, file = "Paper2_SERTAD2_y_EXP_x_METH_beta.csv",
  211.            quote = F, row.names = T)
  212. write.table(SERTAD2_exp_met,file="Paper2_SERTAD2_y_EXP_x_METH_beta.txt", sep="\t",
  213.             row.names=T,quote=FALSE)
  214.  
  215. ############ Metilacao e Trauma ####
  216. # Verificando se metilacao de cg10184289 foi influenciada por trauma
  217. # y = b + ax
  218. # y = dados de metilacao (delta)
  219. # x = dados de trauma
  220. SERTAD2_cg <- as.data.frame(SERTAD2_mvalues["cg10184289",])
  221. colnames(SERTAD2_cg) <- c("SERTAD2_cg")
  222. SERTAD2_cg$pair <- pair
  223. SERTAD2_cg$ID <- rownames(SERTAD2_cg)
  224.  
  225. SERTAD2_cg <- SERTAD2_cg %>%
  226.   group_by(pair) %>%
  227.   mutate(delta_cg = SERTAD2_cg - lag(SERTAD2_cg))
  228.  
  229. SERTAD2_cg <- SERTAD2_cg[,c(3,4)]
  230. SERTAD2_cg <- na.omit(SERTAD2_cg)
  231. rownames(SERTAD2_cg) <- SERTAD2_cg$ID
  232.  
  233. a <- rownames(SERTAD2_cg)
  234. b <- rownames(targets_HRC_44)
  235. identical(a,b) # [1] TRUE
  236.  
  237. SERTAD2_cg <- as.character(SERTAD2_cg$delta_cg)
  238.  
  239. n <- ncol(targets_HRC_44)
  240. SERTAD2_meth_trauma <- data.frame(id=rep(0, n),
  241.                                   estimate=rep(0, n),
  242.                                   SE=rep(0, n),
  243.                                   t=rep(0, n),
  244.                                   p=rep(0, n),
  245.                                   r.sq=rep(0, n))
  246.  
  247. for (i in 1:ncol(targets_HRC_44)) {
  248.   col <- targets_HRC_44[,i]
  249.   id <- colnames(targets_HRC_44)[i]
  250.   x <- lm(SERTAD2_cg~col) # mudar aqui
  251.   estimate <- summary(x)$coefficients[2,1]
  252.   SE <- summary(x)$coefficients[2,2]
  253.   t <- summary(x)$coefficients[2,3]
  254.   p <- summary(x)$coefficients[2,4]
  255.   r.sq <- summary(x)$r.squared
  256.   SERTAD2_meth_trauma[i,] <- c(id,estimate,SE,t,p,r.sq)
  257. }
  258.  
  259. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  260. SERTAD2_meth_trauma[,cols] = apply(SERTAD2_meth_trauma[,cols], 2, function(x) as.numeric(as.character(x)))
  261.  
  262. # Correcao de BONFERRONI
  263. SERTAD2_meth_trauma <- SERTAD2_meth_trauma[order(SERTAD2_meth_trauma$p),]
  264. SERTAD2_meth_trauma$adj.p <- SERTAD2_meth_trauma$p * nrow(SERTAD2_meth_trauma)
  265. SERTAD2_meth_trauma
  266.  
  267. # Salvando:
  268. write.csv2(SERTAD2_meth_trauma, file = "Paper2_SERTAD2_y_METH_x_TRAUMA_mvalues.csv",
  269.            quote = F, row.names = T)
  270. write.table(SERTAD2_meth_trauma,file="Paper2_SERTAD2_y_METH_x_TRAUMA_mvalues.txt", sep="\t",
  271.             row.names=T,quote=FALSE)
  272.  
  273. # Beta
  274. SERTAD2_cg <- as.data.frame(SERTAD2_betas["cg10184289",])
  275. colnames(SERTAD2_cg) <- c("SERTAD2_cg")
  276. SERTAD2_cg$pair <- pair
  277. SERTAD2_cg$ID <- rownames(SERTAD2_cg)
  278.  
  279. SERTAD2_cg <- SERTAD2_cg %>%
  280.   group_by(pair) %>%
  281.   mutate(delta_cg = SERTAD2_cg - lag(SERTAD2_cg))
  282.  
  283. SERTAD2_cg <- SERTAD2_cg[,c(3,4)]
  284. SERTAD2_cg <- na.omit(SERTAD2_cg)
  285. rownames(SERTAD2_cg) <- SERTAD2_cg$ID
  286.  
  287. a <- rownames(SERTAD2_cg)
  288. b <- rownames(targets_HRC_44)
  289. identical(a,b) # [1] TRUE
  290.  
  291. SERTAD2_cg <- as.character(SERTAD2_cg$delta_cg)
  292.  
  293. n <- ncol(targets_HRC_44)
  294. SERTAD2_meth_trauma <- data.frame(id=rep(0, n),
  295.                                   estimate=rep(0, n),
  296.                                   SE=rep(0, n),
  297.                                   t=rep(0, n),
  298.                                   p=rep(0, n),
  299.                                   r.sq=rep(0, n))
  300.  
  301. for (i in 1:ncol(targets_HRC_44)) {
  302.   col <- targets_HRC_44[,i]
  303.   id <- colnames(targets_HRC_44)[i]
  304.   x <- lm(SERTAD2_cg~col) # mudar aqui
  305.   estimate <- summary(x)$coefficients[2,1]
  306.   SE <- summary(x)$coefficients[2,2]
  307.   t <- summary(x)$coefficients[2,3]
  308.   p <- summary(x)$coefficients[2,4]
  309.   r.sq <- summary(x)$r.squared
  310.   SERTAD2_meth_trauma[i,] <- c(id,estimate,SE,t,p,r.sq)
  311. }
  312.  
  313. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  314. SERTAD2_meth_trauma[,cols] = apply(SERTAD2_meth_trauma[,cols], 2, function(x) as.numeric(as.character(x)))
  315.  
  316. # Correcao de BONFERRONI
  317. SERTAD2_meth_trauma <- SERTAD2_meth_trauma[order(SERTAD2_meth_trauma$p),]
  318. SERTAD2_meth_trauma$adj.p <- SERTAD2_meth_trauma$p * nrow(SERTAD2_meth_trauma)
  319. SERTAD2_meth_trauma
  320.  
  321. # Salvando:
  322. write.csv2(SERTAD2_meth_trauma, file = "Paper2_SERTAD2_y_METH_x_TRAUMA_beta.csv",
  323.            quote = F, row.names = T)
  324. write.table(SERTAD2_meth_trauma,file="Paper2_SERTAD2_y_METH_x_TRAUMA_beta.txt", sep="\t",
  325.             row.names=T,quote=FALSE)
  326.  
  327. rm(SERTAD2,SERTAD2_betas,SERTAD2_exp_met,SERTAD2_cpgs,
  328.    SERTAD2_meth_trauma,SERTAD2_mvalues,SERTAD2_cg10184289,SERTAD2_cg)
  329. gc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement