Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.97 KB | None | 0 0
  1. ########################################################################
  2. # Title: Gene Expression, DNA Methylation and Environment
  3. # Author: Leticia Spindola
  4. # Date: 23/05/2018
  5. # Purpose: Analise dos dados de metilacao, expressao e dados ambientais
  6. #######################################################################
  7.  
  8. library(minfi)
  9. require(dplyr)
  10. library(ggplot2)
  11. library(limma)
  12.  
  13. dataDirectory <- "~/Documents/ILS_INPD_48"
  14. setwd("~/Documents/ILS_INPD_48")
  15.  
  16. ############################################################
  17. ################### Dados de Expressao  ####################
  18. ############################################################
  19. # Importando os dados de expressao genica
  20. eset_met <- readRDS("eset_met.RDS")
  21.  
  22. # salvando os dados brutos de expressao em uma matrix
  23. d_matrix <- exprs(eset_met)
  24. pdata <- pData(eset_met)
  25.  
  26. # arrumar_IDs <- colnames(d_matrix)
  27. # sampleID <- pdata$sampleID
  28. # identical(arrumar_IDs,sampleID) # [1] TRUE
  29.  
  30. colnames(d_matrix) <- c("I211_W0","I211_W1","I175_W0","I175_W1","I070_W0","I070_W1","I051_W0",
  31.                         "I051_W1","I117_W0","I117_W1","I144_W0","I144_W1","I069_W0","I069_W1",
  32.                         "I197_W0","I197_W1","I209_W0","I209_W1","I227_W0","I227_W1","I224_W0",
  33.                         "I224_W1","I242_W0","I242_W1","I310_W0","I310_W1","I236_W0","I236_W1",
  34.                         "I378_W0","I378_W1","I272_W0","I272_W1","I281_W0","I281_W1","I338_W0",
  35.                         "I338_W1","I339_W0","I339_W1","I309_W0","I309_W1","I330_W0","I330_W1",
  36.                         "I331_W0","I331_W1")
  37.  
  38. probes <- fData(eset_met)
  39.  
  40. # Importando a lista para o R
  41. DEG <- read.csv2(file = "Genes_diff_expressos_Incidentes2.csv",header = F)
  42. # View(DEG)
  43. # class(DEG) # [1] "data.frame"
  44.  
  45. # Transformando em um vetor
  46. DEG <- as.character(DEG$V1)
  47. DEG
  48.  
  49. # Tirando genes duplicados
  50. DEG[duplicated(DEG)] # character(0)
  51. length(DEG) # [1] 66
  52.  
  53. # Transformando as informacoes dos genes em uma string so
  54. g <- paste(DEG,collapse = "|")
  55.  
  56. # Criando uma coluna com informacao de row.names em probes
  57. probes$row_names <- rownames(probes)
  58.  
  59. # Selecionando so as probes dos genes diferencialmente expressos nos incidentes
  60. genes_exp_incidentes <- filter(probes,grepl(g, ProbeID))
  61. dim(genes_exp_incidentes) # [1] 72 30
  62. x <- genes_exp_incidentes$SYMBOL
  63.  
  64. x[duplicated(x)]
  65. # [1] "PEX16"
  66.  
  67. keep <- genes_exp_incidentes$row_names
  68.  
  69. d_matrix_select <- d_matrix[keep,]
  70. dim(d_matrix_select) # [1] 72 44
  71.  
  72. ### Vector necessario para as analises: identificando quem eh quem (pair)
  73. n <- 2
  74. pair<- as.factor(rep(1:22,each=n))
  75.  
  76. ############################################################
  77. ################### Dados de Metilacao  ####################
  78. ############################################################
  79. # Importando os dados do EPIC
  80. rgset_HRC_norm_Flt <- readRDS("rgset_HRC_norm_Flt.RDS")
  81.  
  82. # Pegando as informacoes de annotacao de probe do EPIC
  83. annotation <- rgset_HRC_norm_Flt %>% getAnnotation %>% as.data.frame
  84.  
  85. # Importando as informacoes de phenotype
  86. targets_HRC <- read.csv2(file = "/Users/leticiaspindola/Documents/ILS_INPD_48/pdata_INPDmeth_final_25_07_2018.csv",
  87.                          header = T)
  88. targets_HRC <- targets_HRC[order(targets_HRC$sampleID),] # # ordenando as linhas do genetic ID
  89. targets_HRC_44_total <- targets_HRC[-c(27,28,33,34),] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  90.  
  91. rownames(targets_HRC_44_total) <- targets_HRC_44_total$sampleID
  92. targets_HRC_44 <-targets_HRC_44_total [,c("Un_ev_2_NEW","Inter_2_NEW","Cont_chan_2_NEW",
  93.                                           "School_2_NEW","Health_2_NEW","Env_stress_2_NEW","TIME")]
  94. targets_HRC_44$GeneticID <- rownames(targets_HRC_44)
  95. targets_HRC_44 <- filter(targets_HRC_44,TIME=="Followup")
  96. targets_HRC_44 <- select(targets_HRC_44,-TIME)
  97. rownames(targets_HRC_44) <- targets_HRC_44$GeneticID
  98. targets_HRC_44 <- select(targets_HRC_44,-GeneticID)
  99.  
  100. # calculate M-values for statistical analysis
  101. mvalues <- getM(rgset_HRC_norm_Flt)
  102. mvalues <- mvalues[,order(colnames(mvalues))] # ordenando as colunas dos dados de metilacao
  103. mvalues <- mvalues[,-c(27,28,33,34)] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  104.  
  105. # calculate Beta-values for plot and interpretation of statistical analysis
  106. betas <- getBeta(rgset_HRC_norm_Flt)
  107. betas <- betas[,order(colnames(betas))] # ordenando as colunas dos dados de metilacao
  108. betas <- betas[,-c(27,28,33,34)] # remover as linhas 27 e 28 (I250) e 33 e 34 (I295)
  109.  
  110. # Ordenado as colunas dos dados de expressao
  111. d_matrix_select <- d_matrix_select[,order(colnames(d_matrix_select))]
  112.  
  113. a <- colnames(mvalues)
  114. b <- colnames(d_matrix_select)
  115. c <- colnames(betas)
  116. d <- as.character(targets_HRC_44_total$sampleID)
  117. identical(a,b) # [1] TRUE
  118. identical(a,c) # [1] TRUE
  119. identical(b,c) # [1] TRUE
  120. identical(a,d) # [1] TRUE
  121. identical(b,d) # [1] TRUE
  122. identical(c,d) # [1] TRUE
  123.  
  124. # Para as analises abaixo, preciso dos objetos:
  125. # annotation, mvalues, betas, d_matrix_select, targets_HRC_44 e targets_HRC_44_total
  126. rm(d_matrix,eset_met,pdata,probes,rgset_HRC_norm_Flt,targets_HRC)
  127. gc()
  128.  
  129. ############ Selecionando CpGs do gene SERTAD2 #############
  130. # Verificando quantas CpGs tem no gene analisado
  131. SERTAD2_cpgs <- filter(annotation,grepl("NM_014755", UCSC_RefGene_Accession))
  132. dim(SERTAD2_cpgs) # [1] 28 46
  133.  
  134. keep <- SERTAD2_cpgs$Name
  135.  
  136. SERTAD2_mvalues <- mvalues[keep,]
  137. dim(SERTAD2_mvalues) # [1] 28 44
  138.  
  139. # Pegando os dados de expressao genica
  140. SERTAD2 <- as.numeric(d_matrix_select["xorMF.VUSF4ie5f9zk",])
  141.  
  142. # y = b + ax
  143. # y = dados de expressao
  144. # x = dados de metilacao
  145.  
  146. # Criando FOR
  147. # Criar uma data frame com 6 colunas e nrow(SERTAD2_mvalues) linhas:
  148. n <- nrow(SERTAD2_mvalues)
  149. SERTAD2_exp_met <- data.frame(id=rep(0, n), estimate=rep(0, n), SE=rep(0, n),
  150.                               t=rep(0, n), p=rep(0, n), r.sq=rep(0, n))
  151.  
  152. for (i in 1:nrow(SERTAD2_mvalues)) { # mudar aqui
  153.   row <- SERTAD2_mvalues[i,] # mudar aqui
  154.   id <- rownames(SERTAD2_mvalues)[i] # mudar aqui
  155.   x <- lm(SERTAD2~row+pair)  # Mudar aqui
  156.   estimate <- summary(x)$coefficients[2,1]
  157.   SE <- summary(x)$coefficients[2,2]
  158.   t <- summary(x)$coefficients[2,3]
  159.   p <- summary(x)$coefficients[2,4]
  160.   r.sq <- summary(x)$r.squared
  161.   SERTAD2_exp_met[i,] <- c(id,estimate,SE,t,p,r.sq) # mudar aqui
  162. }
  163.  
  164. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  165. cols <- c(2, 3, 4, 5, 6)    
  166. SERTAD2_exp_met[,cols] <- apply(SERTAD2_exp_met[,cols], 2, function(x) as.numeric(as.character(x)))
  167.  
  168. # Correcao de BONFERRONI
  169. SERTAD2_exp_met <- SERTAD2_exp_met[order(SERTAD2_exp_met$p),]
  170. SERTAD2_exp_met$adj.p <- SERTAD2_exp_met$p * nrow(SERTAD2_exp_met)
  171.  
  172. # Salvando cpg_genes:
  173. write.csv2(SERTAD2_exp_met, file = "Paper2_SERTAD2_y_EXP_x_METH_mvalues.csv",
  174.            quote = F, row.names = T)
  175. write.table(SERTAD2_exp_met,file="Paper2_SERTAD2_y_EXP_x_METH_mvalues.txt", sep="\t",
  176.             row.names=T,quote=FALSE)
  177.  
  178. ############ Metilacao e Trauma ####
  179. # Verificando se metilacao de cg10184289 foi influenciada por trauma
  180. # y = b + ax
  181. # y = dados de metilacao (delta)
  182. # x = dados de trauma
  183. SERTAD2_cg <- as.data.frame(SERTAD2_mvalues["cg10184289",])
  184. colnames(SERTAD2_cg) <- c("SERTAD2_cg")
  185. SERTAD2_cg$pair <- pair
  186. SERTAD2_cg$ID <- rownames(SERTAD2_cg)
  187.  
  188. SERTAD2_cg <- SERTAD2_cg %>%
  189.   group_by(pair) %>%
  190.   mutate(delta_cg = SERTAD2_cg - lag(SERTAD2_cg))
  191.  
  192. SERTAD2_cg <- SERTAD2_cg[,c(3,4)]
  193. SERTAD2_cg <- na.omit(SERTAD2_cg)
  194. rownames(SERTAD2_cg) <- SERTAD2_cg$ID
  195.  
  196. a <- rownames(SERTAD2_cg)
  197. b <- rownames(targets_HRC_44)
  198. identical(a,b) # [1] TRUE
  199.  
  200. SERTAD2_cg <- as.character(SERTAD2_cg$delta_cg)
  201.  
  202. n <- ncol(targets_HRC_44)
  203. SERTAD2_meth_trauma <- data.frame(id=rep(0, n),
  204.                                   estimate=rep(0, n),
  205.                                   SE=rep(0, n),
  206.                                   t=rep(0, n),
  207.                                   p=rep(0, n),
  208.                                   r.sq=rep(0, n))
  209.  
  210. for (i in 1:ncol(targets_HRC_44)) {
  211.   col <- targets_HRC_44[,i]
  212.   id <- colnames(targets_HRC_44)[i]
  213.   x <- lm(SERTAD2_cg~col) # mudar aqui
  214.   estimate <- summary(x)$coefficients[2,1]
  215.   SE <- summary(x)$coefficients[2,2]
  216.   t <- summary(x)$coefficients[2,3]
  217.   p <- summary(x)$coefficients[2,4]
  218.   r.sq <- summary(x)$r.squared
  219.   SERTAD2_meth_trauma[i,] <- c(id,estimate,SE,t,p,r.sq)
  220. }
  221.  
  222. # Trasformar as colunas estimate, SE, t, p e r.sq de character para numeric
  223. SERTAD2_meth_trauma[,cols] = apply(SERTAD2_meth_trauma[,cols], 2, function(x) as.numeric(as.character(x)))
  224.  
  225. # Correcao de BONFERRONI
  226. SERTAD2_meth_trauma <- SERTAD2_meth_trauma[order(SERTAD2_meth_trauma$p),]
  227. SERTAD2_meth_trauma$adj.p <- SERTAD2_meth_trauma$p * nrow(SERTAD2_meth_trauma)
  228. SERTAD2_meth_trauma
  229.  
  230. # Salvando:
  231. write.csv2(SERTAD2_meth_trauma, file = "Paper2_SERTAD2_y_METH_x_TRAUMA_mvalues.csv",
  232.            quote = F, row.names = T)
  233. write.table(SERTAD2_meth_trauma,file="Paper2_SERTAD2_y_METH_x_TRAUMA_mvalues.txt", sep="\t",
  234.             row.names=T,quote=FALSE)
  235.  
  236. rm(SERTAD2,SERTAD2_exp_met,SERTAD2_cpgs,
  237.    SERTAD2_meth_trauma,SERTAD2_mvalues,SERTAD2_cg10184289,SERTAD2_cg)
  238. gc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement