Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(WGCNA)
- library(DESeq2)
- library(cluster)
- options(stringsAsFactors = FALSE);
- setwd(dir = '/home/nlong/Public/clc')
- x=read.csv("ccllcc", sep='\t', header=T)
- datExpr = as.data.frame(x[, -c(1)]);
- #names(datExpr) = names(x)[, -c(1)]
- rownames(datExpr) = x$gene
- datExpr=t(datExpr)
- gsg = goodSamplesGenes(datExpr, verbose = 3);
- gsg$allOK
- if (!gsg$allOK)
- {
- # Optionally, print the gene and sample names that were removed:
- if (sum(!gsg$goodGenes)>0)
- printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")));
- if (sum(!gsg$goodSamples)>0)
- printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")));
- # Remove the offending genes and samples from the data:
- datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]
- }
- datExpr<-as.matrix(t(datExpr))
- condition<-(c(rep("norm",4), rep("still",4), rep("norm",4), rep("still",4)))
- coldata<-data.frame(row.names=colnames(datExpr), condition)
- dds=DESeqDataSetFromMatrix(datExpr, colData=coldata, design=~condition)
- dds2=DESeq(dds,betaPrior=F)
- vsd=getVarianceStabilizedData(dds2)
- vsd2=t(vsd)
- tree=flashClust(dist(vsd2), method="average")
- sizeGrWindow(12,9)
- par(cex = 0.6)
- par(mar = c(0,4,2,0))
- plot(tree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
- powers = c(c(1:10), seq(from = 12, to=40, by=2))
- options(stringsAsFactors = FALSE)
- ALLOW_WGCNA_THREADS=20
- #sft = pickSoftThreshold(vsd2, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
- sft = pickSoftThreshold(vsd3, dataIsExpr = TRUE, powerVector = powers, verbose = 5, networkType='signed hybrid')
- sizeGrWindow(9,5)
- par(mfrow=c(1,2))
- cex1=0.9
- plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
- xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
- main = paste("Scale independence"));
- text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
- labels=powers,cex=cex1,col="red");
- # this line corresponds to using an R^2 cut-off of h
- abline(h=0.90,col="red")
- # Mean connectivity as a function of the soft-thresholding power
- plot(sft$fitIndices[,1], sft$fitIndices[,5],
- xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
- main = paste("Mean connectivity"))
- text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
- abline(h=0.90, col="red")
- softPower=16
- adjacency=adjacency(vsd2,power=softPower, type= 'unsigned')
- TOM=TOMsimilarity(adjacency, TOMType="unsigned", TOMDenom='mean')
- dissTOM=1-TOM
- geneTree=flashClust(as.dist(dissTOM), method="average")
- sizeGrWindow(12,9)
- plot(geneTree,xlab="", sub="", main="Gene clustering on TOM-based dissimilarity", labels=FALSE, hang=0.04)
- minModuleSize=20
- dynamicMods=cutreeDynamic(dendro=geneTree, cutHeight = "hybrid", distM=dissTOM,
- deepSplit=2, pamRespectsDendro=FALSE, minAbsGap=0.05,
- minClusterSize=minModuleSize)
- table(dynamicMods)
- dynamicColors=labels2colors(dynamicMods)
- table(dynamicColors)
- sizeGrWindow(8,6)
- plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut",
- dendroLabels=FALSE, hang=0.03,
- addGuide=TRUE, guideHang=0.05,
- main= "Gene dendrogram")
- MEList=moduleEigengenes(vsd3, colors=dynamicColors)
- MEs = MEList$eigengenes
- MEDiss=1-cor(MEs)
- METree = flashClust(as.dist(MEDiss), method='average')
- sizeGrWindow(7,6)
- plot(METree, main="Clustering of module eigengenes", xlab='', sub='')
- MEDissThres = 0.1
- abline(h=MEDissThres, col="red")
- merge=mergeCloseModules(vsd2, dynamicColors, useAbs=T, cutHeight = MEDissThres, verbose=3)
- mergedColors=merge$colors
- mergedMEs=merge$newMEs
- sizeGrWindow(12,9)
- plotDendroAndColors(geneTree, cbind(mergedColors, mergedColors),
- c("Dynamic Tree Cut", "Merged dynamic"),
- dendroLabels=FALSE, hang=0.03,
- addGuide=TRUE, guideHang = 0.05)
- dev.off()
- moduleColors=mergedColors
- colorOrder=c("grey", standardColors(50))
- moduleLabels=match(moduleColors, colorOrder)-1
- MEs=mergedMEs
- save(MEs, moduleLabels, moduleColors, geneTree, file="intermediate.RData")
- MEList
- nGenes=ncol(vsd2)
- nSamples=nrow(vsd2)
- MEs0=moduleEigengenes(vsd2,moduleColors)$eigengenes
- MEs=orderMEs(MEs0)
- ###signedKME
- hobag=signedKME(vsd2, mergedMEs, outputColumnName="kME", corFnc="cor", corOptions= "use = 'p'")
- write.table(hobag, file='hobag.csv', sep='\t')
- ADJ1=abs(cor(vsd2,use="p"))^6
- Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
- write.table(Alldegrees1, file='connectivity.csv', sep='\t')
- ###EXTRACT MODULES###
- gene.names=colnames(vsd2)
- module_colors= setdiff(unique(moduleColors), "grey")
- for (color in module_colors){
- module=gene.names[which(dynamicColors==color)]
- write.table(module, paste("module_",color, ".txt",sep=""), sep="\t", row.names=FALSE, col.names=FALSE,quote=FALSE)
- }
- ###INTRAMODULAR CONNECTIVITY
- datME=mergedMEs
- signif(cor(datME, use="p"),2)
- dissimME=(1-t(cor(datME,method='p')))/2
- hclustdatME=hclust(as.dist(dissimME),method="average")
- par(mfrow=c(1,1))
- plot(hclustdatME, main="Clustering tree based of the module eigengenes")
- sizeGrWindow(8,9)
- y=(c(1,1,1,1,2,2,2,2))
- plotMEpairs(datME,y)
- sizeGrWindow(8,7);
- which.module="pink"
- ME=datME[, paste("ME",which.module, sep="")]
- par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
- plotMat(t(scale(vsd2[,colorh1==which.module ]) ),
- nrgcols=30,rlabels=F,rcols=which.module,
- main=which.module, cex.main=2)
- par(mar=c(5, 4.2, 0, 0.7))
- barplot(ME, col=which.module, main="", cex.main=2,
- ylab="eigengene expression",xlab="array sample")
- signif(cor(y,datME, use="p"),2)
- ###FROM TREE-CUTTING COMPARISON
- vsd3=vsd2[, rank(-k, ties.method="first")<=3600]
- dissADJ=1-ADJ1
- hierADJ=hclust(as.dist(dissADJ), method="average")
- ###INTRAMODULAR CONNECTIVITY
- ADJ1=abs(cor(vsd3,use="p"))^6 ##vsd3 = vsd2 with subset of 3600 connected genes
- dissTOM=TOMdist(ADJ1)
- hierTOM=hclust(as.dist(dissTOM), method="average")
- colorStaticTOM = as.character(cutreeStaticColor(hierTOM, cutHeight=.99, minSize=20))
- colorDynamicTOM = labels2colors (cutreeDynamic(hierTOM,method="tree"))
- colorDynamicHybridTOM = labels2colors(cutreeDynamic(hierTOM, distM= dissTOM , cutHeight = 0.998,
- deepSplit=2, pamRespectsDendro = FALSE))
- colorh1= colorDynamicHybridTOM
- Alldegrees1=intramodularConnectivity(ADJ1, colorh1)
- #head(Alldegrees1)
- colorlevels=unique(colorh1)
- sizeGrWindow(9,6)
- par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
- par(mar = c(4,5,3,1))
- for (i in c(1:length(colorlevels)))
- {
- whichmodule=colorlevels[[i]];
- restrict1 = (colorh1==whichmodule);
- verboseScatterplot(Alldegrees1$kWithin[restrict1],
- GeneSignificance[restrict1], col=colorh1[restrict1],
- main=whichmodule,
- xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
- }
- ######MODULE MEMBERSHIP -- "EIGENGENE"
- modNames = substring(names(MEs), 3)
- module = 'turquoise'
- column = match(module, modNames);
- moduleGenes=moduleColors==module;
- geneModuleMembership=as.data.frame(cor(vsd3,MEs0, use='p'));
- MMPvalue=as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
- names(geneModuleMembership)=paste("MM", modNames, sep="");
- names(MMPvalue)=paste("p.MM", modNames, sep="")
- sizeGrWindow(7,7);
- par(mfrow=c(1,1));
- verboseScatterplot(abs(geneModuleMembership[moduleGenes,column]),
- abs()
- #####NETWORK CONNECTIVITY GRAPHS
- dissTOM = 1-TOMsimilarityFromExpr(vsd3, power = 12, TOMType= 'signed hybrid');
- # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
- plotTOM = dissTOM^7;
- # Set diagonal to NA for a nicer plot
- diag(plotTOM) = NA;
- # Call the plot function
- sizeGrWindow(9,9)
- TOMplot(plotTOM, geneTree, mergedColors, main = "Network heatmap plot, all genes")
- nSelect = 400
- # For reproducibility, we set the random seed
- set.seed(10);
- select = sample(nGenes, size = nSelect);
- selectTOM = dissTOM[select, select];
- # Thereโs no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
- selectTree = flashClust(as.dist(selectTOM), method = "average")
- selectColors = moduleColors[select];
- # Open a graphical window
- sizeGrWindow(9,9)
- # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
- # the color palette; setting the diagonal to NA also improves the clarity of the plot
- plotDiss = selectTOM^7;
- diag(plotDiss) = NA;
- TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
- MEs = moduleEigengenes(vsd2, moduleColors)$eigengenes
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement