Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2016
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.50 KB | None | 0 0
  1. library(WGCNA)
  2. library(DESeq2)
  3. library(cluster)
  4. options(stringsAsFactors = FALSE);
  5. setwd(dir = '/home/nlong/Public/clc')
  6. x=read.csv("ccllcc", sep='\t', header=T)
  7. datExpr = as.data.frame(x[, -c(1)]);
  8. #names(datExpr) = names(x)[, -c(1)]
  9. rownames(datExpr) = x$gene
  10. datExpr=t(datExpr)
  11. gsg = goodSamplesGenes(datExpr, verbose = 3);
  12. gsg$allOK
  13. if (!gsg$allOK)
  14. {
  15. # Optionally, print the gene and sample names that were removed:
  16. if (sum(!gsg$goodGenes)>0)
  17. printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")));
  18. if (sum(!gsg$goodSamples)>0)
  19. printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")));
  20. # Remove the offending genes and samples from the data:
  21. datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]
  22. }
  23. datExpr<-as.matrix(t(datExpr))
  24. condition<-(c(rep("norm",4), rep("still",4), rep("norm",4), rep("still",4)))
  25.  
  26. coldata<-data.frame(row.names=colnames(datExpr), condition)
  27. dds=DESeqDataSetFromMatrix(datExpr, colData=coldata, design=~condition)
  28. dds2=DESeq(dds,betaPrior=F)
  29. vsd=getVarianceStabilizedData(dds2)
  30. vsd2=t(vsd)
  31. tree=flashClust(dist(vsd2), method="average")
  32. sizeGrWindow(12,9)
  33. par(cex = 0.6)
  34. par(mar = c(0,4,2,0))
  35. plot(tree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
  36. powers = c(c(1:10), seq(from = 12, to=40, by=2))
  37. options(stringsAsFactors = FALSE)
  38. ALLOW_WGCNA_THREADS=20
  39. #sft = pickSoftThreshold(vsd2, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
  40. sft = pickSoftThreshold(vsd3, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
  41. sizeGrWindow(9,5)
  42. par(mfrow=c(1,2))
  43. cex1=0.9
  44. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  45. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
  46. main = paste("Scale independence"));
  47. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  48. labels=powers,cex=cex1,col="red");
  49. # this line corresponds to using an R^2 cut-off of h
  50. abline(h=0.90,col="red")
  51. # Mean connectivity as a function of the soft-thresholding power
  52. plot(sft$fitIndices[,1], sft$fitIndices[,5],
  53. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  54. main = paste("Mean connectivity"))
  55. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  56. abline(h=0.90, col="red")
  57.  
  58. softPower=16
  59. adjacency=adjacency(vsd2,power=softPower, type= 'unsigned')
  60. TOM=TOMsimilarity(adjacency, TOMType="unsigned", TOMDenom='mean')
  61. dissTOM=1-TOM
  62. geneTree=flashClust(as.dist(dissTOM), method="average")
  63. sizeGrWindow(12,9)
  64. plot(geneTree,xlab="", sub="", main="Gene clustering on TOM-based dissimilarity", labels=FALSE, hang=0.04)
  65.  
  66. minModuleSize=20
  67. dynamicMods=cutreeDynamic(dendro=geneTree, cutHeight = "hybrid", distM=dissTOM,
  68. deepSplit=2, pamRespectsDendro=FALSE, minAbsGap=0.05,
  69. minClusterSize=minModuleSize)
  70. table(dynamicMods)
  71. dynamicColors=labels2colors(dynamicMods)
  72. table(dynamicColors)
  73. sizeGrWindow(8,6)
  74. plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut",
  75. dendroLabels=FALSE, hang=0.03,
  76. addGuide=TRUE, guideHang=0.05,
  77. main= "Gene dendrogram")
  78. MEList=moduleEigengenes(vsd3, colors=dynamicColors)
  79. MEs = MEList$eigengenes
  80. MEDiss=1-cor(MEs)
  81. METree = flashClust(as.dist(MEDiss), method='average')
  82. sizeGrWindow(7,6)
  83. plot(METree, main="Clustering of module eigengenes", xlab='', sub='')
  84. MEDissThres = 0.1
  85. abline(h=MEDissThres, col="red")
  86. merge=mergeCloseModules(vsd2, dynamicColors, useAbs=T, cutHeight = MEDissThres, verbose=3)
  87. mergedColors=merge$colors
  88. mergedMEs=merge$newMEs
  89. sizeGrWindow(12,9)
  90. plotDendroAndColors(geneTree, cbind(mergedColors, mergedColors),
  91. c("Dynamic Tree Cut", "Merged dynamic"),
  92. dendroLabels=FALSE, hang=0.03,
  93. addGuide=TRUE, guideHang = 0.05)
  94. dev.off()
  95. moduleColors=mergedColors
  96. colorOrder=c("grey", standardColors(50))
  97. moduleLabels=match(moduleColors, colorOrder)-1
  98. MEs=mergedMEs
  99. save(MEs, moduleLabels, moduleColors, geneTree, file="intermediate.RData")
  100. MEList
  101. nGenes=ncol(vsd2)
  102. nSamples=nrow(vsd2)
  103. MEs0=moduleEigengenes(vsd2,moduleColors)$eigengenes
  104. MEs=orderMEs(MEs0)
  105.  
  106. ###signedKME
  107. hobag=signedKME(vsd2, mergedMEs, outputColumnName="kME", corFnc="cor", corOptions= "use = 'p'")
  108. write.table(hobag, file='hobag.csv', sep='\t')
  109.  
  110. ADJ1=abs(cor(vsd2,use="p"))^6
  111. Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
  112. write.table(Alldegrees1, file='connectivity.csv', sep='\t')
  113.  
  114. ###EXTRACT MODULES###
  115. gene.names=colnames(vsd2)
  116.  
  117. module_colors= setdiff(unique(moduleColors), "grey")
  118. for (color in module_colors){
  119. module=gene.names[which(dynamicColors==color)]
  120. write.table(module, paste("module_",color, ".txt",sep=""), sep="\t", row.names=FALSE, col.names=FALSE,quote=FALSE)
  121. }
  122.  
  123.  
  124. ###INTRAMODULAR CONNECTIVITY
  125. datME=mergedMEs
  126. signif(cor(datME, use="p"),2)
  127. dissimME=(1-t(cor(datME,method='p')))/2
  128. hclustdatME=hclust(as.dist(dissimME),method="average")
  129. par(mfrow=c(1,1))
  130. plot(hclustdatME, main="Clustering tree based of the module eigengenes")
  131. sizeGrWindow(8,9)
  132. y=(c(1,1,1,1,2,2,2,2))
  133. plotMEpairs(datME,y)
  134.  
  135. sizeGrWindow(8,7);
  136. which.module="pink"
  137. ME=datME[, paste("ME",which.module, sep="")]
  138. par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
  139. plotMat(t(scale(vsd2[,colorh1==which.module ]) ),
  140. nrgcols=30,rlabels=F,rcols=which.module,
  141. main=which.module, cex.main=2)
  142. par(mar=c(5, 4.2, 0, 0.7))
  143. barplot(ME, col=which.module, main="", cex.main=2,
  144. ylab="eigengene expression",xlab="array sample")
  145.  
  146. signif(cor(y,datME, use="p"),2)
  147.  
  148. ###FROM TREE-CUTTING COMPARISON
  149. vsd3=vsd2[, rank(-k, ties.method="first")<=3600]
  150.  
  151. dissADJ=1-ADJ1
  152. hierADJ=hclust(as.dist(dissADJ), method="average")
  153.  
  154. ###INTRAMODULAR CONNECTIVITY
  155. ADJ1=abs(cor(vsd3,use="p"))^6 ##vsd3 = vsd2 with subset of 3600 connected genes
  156. dissTOM=TOMdist(ADJ1)
  157. hierTOM=hclust(as.dist(dissTOM), method="average")
  158. colorStaticTOM = as.character(cutreeStaticColor(hierTOM, cutHeight=.99, minSize=20))
  159. colorDynamicTOM = labels2colors (cutreeDynamic(hierTOM,method="tree"))
  160. colorDynamicHybridTOM = labels2colors(cutreeDynamic(hierTOM, distM= dissTOM , cutHeight = 0.998,
  161. deepSplit=2, pamRespectsDendro = FALSE))
  162. colorh1= colorDynamicHybridTOM
  163.  
  164. Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
  165. #head(Alldegrees1)
  166. colorlevels=unique(colorh1)
  167. sizeGrWindow(9,6)
  168. par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
  169. par(mar = c(4,5,3,1))
  170. for (i in c(1:length(colorlevels)))
  171. {
  172. whichmodule=colorlevels[[i]];
  173. restrict1 = (colorh1==whichmodule);
  174. verboseScatterplot(Alldegrees1$kWithin[restrict1],
  175. GeneSignificance[restrict1], col=colorh1[restrict1],
  176. main=whichmodule,
  177. xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
  178. }
  179.  
  180.  
  181.  
  182. ######MODULE MEMBERSHIP -- "EIGENGENE"
  183.  
  184. modNames = substring(names(MEs), 3)
  185. module = 'turquoise'
  186. column = match(module, modNames);
  187. moduleGenes=moduleColors==module;
  188. geneModuleMembership=as.data.frame(cor(vsd3,MEs0, use='p'));
  189. MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
  190. names(geneModuleMembership)=paste("MM", modNames, sep="");
  191. names(MMPvalue)=paste("p.MM", modNames, sep="")
  192.  
  193. sizeGrWindow(7,7);
  194. par(mfrow=c(1,1));
  195. verboseScatterplot(abs(geneModuleMembership[moduleGenes,column]),
  196. abs()
  197.  
  198.  
  199. #####NETWORK CONNECTIVITY GRAPHS
  200. dissTOM = 1-TOMsimilarityFromExpr(vsd3, power = 12, TOMType= 'signed hybrid');
  201. # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
  202. plotTOM = dissTOM^7;
  203. # Set diagonal to NA for a nicer plot
  204. diag(plotTOM) = NA;
  205. # Call the plot function
  206. sizeGrWindow(9,9)
  207. TOMplot(plotTOM, geneTree, mergedColors, main = "Network heatmap plot, all genes")
  208. nSelect = 400
  209. # For reproducibility, we set the random seed
  210. set.seed(10);
  211. select = sample(nGenes, size = nSelect);
  212. selectTOM = dissTOM[select, select];
  213. # Thereโ€™s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
  214. selectTree = flashClust(as.dist(selectTOM), method = "average")
  215. selectColors = moduleColors[select];
  216. # Open a graphical window
  217. sizeGrWindow(9,9)
  218. # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
  219. # the color palette; setting the diagonal to NA also improves the clarity of the plot
  220. plotDiss = selectTOM^7;
  221. diag(plotDiss) = NA;
  222. TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
  223. MEs = moduleEigengenes(vsd2, moduleColors)$eigengenes
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement