Light1992

Script FRUITY

Jul 27th, 2021
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 21.69 KB | None | 0 0
  1.  
  2. ##############################################################
  3. ####### WEIGHTED (GENE) CORRELATION NETWORK ANALYSIS##########
  4. ##############################################################
  5.  
  6. ##WGCNA is available from CRAN but can be badly compiled the script below allows manual loading
  7.  
  8. install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))
  9. source("http://bioconductor.org/biocLite.R")
  10. biocLite(c("GO.db", "preprocessCore", "impute"))
  11. source("https://bioconductor.org/biocLite.R")
  12. biocLite("BiocUpgrade")
  13.  
  14. #also install flashClust from CRAN
  15.  
  16. library(impute)
  17. library(WGCNA)
  18. library(flashClust)
  19.  
  20. # loading data, again in separate files. Select your own method.
  21. d()
  22. #rename working directory
  23. workingDir = "/Users/Natasha.spadafora/OneDrive - Markes International Limited/R software/Peach VOCs Sensor Phytochem 2017"
  24. #C:\R software\Fruitydata2018
  25. #C:\TOF-DS\FRUITY2017\FRUITY WCNA\R tempate Analysis\FRUITY MARKES INT
  26. #C:\R software\Sqrt VOCas trait Fruity2017 by CTM 2019
  27. # set new working directory:
  28. setwd(workingDir)
  29.  
  30. datCompds0=read.csv("Sqrtalldata2017NoSensorialNogeneNoMDA.csv", header = T)
  31. #datCompds0=read.csv("D:/BI3153/NormAreaSqrt.csv", header = T)#Β equivalent to YData
  32.  
  33. dim(datCompds0)
  34. names(datCompds0)
  35.  
  36. traitData = read.csv("ParamSensorialall.csv")
  37. #traitData = read.csv("D:/BI3153/xdata2.csv");# equivalent to XData
  38.  
  39. dim(traitData)
  40. names(traitData)
  41.  
  42. sampleTree = flashClust(dist(datCompds0), method = "average");
  43. # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
  44. # The user should change the dimensions if the window is too large or too small.
  45. sizeGrWindow(12,10) #this command is redundant in RStudio
  46. par(cex = 0.6);
  47. par(mar = c(0,4,2,0))
  48. plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
  49.  
  50. # Plot a line to show the cut
  51. abline(h = 40, col = "red");
  52.  
  53. # Usually with scent data there is no cut required as there's no outgroup; if there is go to appendix 1
  54.  
  55. # Convert traits to a color representation: white means low, red means high, grey means missing entry
  56.  
  57. traitColors = numbers2colors(traitData ,signed = FALSE);
  58. # Plot the sample dendrogram and the colors underneath.
  59. plotDendroAndColors(sampleTree, traitColors,
  60. groupLabels = names(traitData),
  61. main = "Sample dendrogram and trait heatmap") #if reclustering is required the term 'sampletree' needs to be amended to sampletree2 (see appendix I)
  62.  
  63. # Choose a set of soft-thresholding powers
  64. powers = c(c(1:10), seq(from = 12, to=20, by=2))
  65. # Call the network topology analysis function
  66. sft = pickSoftThreshold(datCompds0, powerVector = powers, verbose = 5)
  67. # Plot the results:
  68. sizeGrWindow(9, 5)
  69. par(mfrow = c(1,2));
  70. cex1 = 0.9;
  71. # Scale-free topology fit index as a function of the soft-thresholding power
  72. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  73. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
  74. main = paste("Scale independence"));
  75. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  76. labels=powers,cex=cex1,col="red")
  77. # this line corresponds to using an R^2 cut-off of h
  78. abline(h=0.90,col="red")
  79. # Mean connectivity as a function of the soft-thresholding power
  80. plot(sft$fitIndices[,1], sft$fitIndices[,5],
  81. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  82. main = paste("Mean connectivity"))
  83. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  84. # all above is plot only!
  85. #SELECT SOFTPOWER TO GAIN LARGEST SCALE FREE TOPOLOGY FIT AND LOWWEST MEAN CONNECTIVITY TO CONTINUE
  86.  
  87. # Calculating adjeacency with soft power 4
  88. softPower= 5
  89. adjacency = adjacency(datCompds0, power = softPower)
  90.  
  91. # Turn adjacency into topological overlap
  92. TOM = TOMsimilarity(adjacency);
  93. dissTOM = 1-TOM
  94.  
  95. # Call the hierarchical clustering function
  96. geneTree = flashClust(as.dist(dissTOM), method = "average");
  97. # Plot the resulting clustering tree (dendrogram)
  98. sizeGrWindow(8,5)
  99. plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
  100. labels = FALSE, hang = 0.04)
  101.  
  102. # We like large modules, so we set the minimum module size relatively high:
  103. minModuleSize =4;
  104. # Module identification using dynamic tree cut:
  105. dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
  106. deepSplit =3, pamRespectsDendro = FALSE,
  107. minClusterSize = minModuleSize);
  108. table(dynamicMods)
  109. # Convert numeric lables into colors
  110. dynamicColors = labels2colors(dynamicMods)
  111. table(dynamicColors)
  112.  
  113. # Plot the dendrogram and colors underneath
  114. sizeGrWindow(8,6)
  115. plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
  116. dendroLabels = FALSE, hang = 0.03,
  117. addGuide = TRUE, guideHang = 0.05,
  118. main = "Gene dendrogram and module colors")
  119.  
  120.  
  121.  
  122. # Calculate eigengenes
  123. MEList = moduleEigengenes(datCompds0, colors = dynamicColors)
  124. MEs = MEList$eigengenes
  125. # Calculate dissimilarity of module eigengenes
  126. MEDiss = 1-cor(MEs);
  127. # Cluster module eigengenes
  128. METree = flashClust(as.dist(MEDiss), method = "average");
  129. # Plot the result
  130. sizeGrWindow(7, 6)
  131. plot(METree, main = "Clustering of module eigengenes",
  132. xlab = "", sub = "")
  133.  
  134. MEDissThres = 0.01
  135. # Plot the cut line into the dendrogram
  136. abline(h=MEDissThres, col = "red")
  137. ###### IF MERGER IS WANTED, E.G. COMBINING MODULES GO TO APPENDIX 2
  138.  
  139. # Define numbers of genes and samples
  140. nGenes = ncol(datCompds0);
  141. nSamples = nrow(datCompds0);
  142. # Recalculate MEs with color labels
  143. #MEs0 = moduleEigengenes(datCompds0, moduleColors)$eigengenes
  144. #MEs = orderMEs(MEs0)
  145. moduleTraitCor = cor(MEs, traitData, use = "p");
  146. moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
  147.  
  148. # START PLOTTING HEAT MAP MODULE-TRAIT RELATION AND SIGNIFICANCE
  149.  
  150.  
  151. sizeGrWindow(10,6)
  152. # Will display correlations and their p-values
  153. textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
  154. signif(moduleTraitPvalue, 1), ")", sep = "");
  155. dim(textMatrix) = dim(moduleTraitCor)
  156. par(mar = c(6, 8.5, 3, 3));
  157. # Display the correlation values within a heatmap plot
  158. labeledHeatmap(Matrix = moduleTraitCor,
  159. xLabels = names(traitData),
  160. yLabels = names(MEs),
  161. ySymbols = names(MEs),
  162. colorLabels = FALSE,
  163. colors = greenWhiteRed(50),
  164. textMatrix = textMatrix,
  165. setStdMargins = FALSE,
  166. cex.text = 0.5,
  167. zlim = c(-1,1),
  168. main = paste("Module-trait relationships"))
  169.  
  170. #END PLOTTING##
  171.  
  172. #PREPARING OUTPUT TABLE#
  173.  
  174. # Define variable 'Sex' containing the 'Sex' column of datTrait
  175.  
  176. S1 = as.data.frame(traitData$S1);
  177. names(S1) = "S1"
  178. S2 = as.data.frame(traitData$S2)
  179. names(S2)="S2"
  180. S3 = as.data.frame(traitData$S3)
  181. names(S3)="S3"
  182. S4 = as.data.frame(traitData$S4)
  183. names(S4)="S4"
  184. S5 = as.data.frame(traitData$S5)
  185. names(S5)="S5"
  186. S6 = as.data.frame(traitData$S6)
  187. names(S6)="S6"
  188. S7 = as.data.frame(traitData$S7)
  189. names(S7)="S7"
  190. S8 = as.data.frame(traitData$S8)
  191. names(S8)="S8"
  192. S9 = as.data.frame(traitData$S9)
  193. names(S9)="S9"
  194. # names (colors) of the modules
  195. modNames = substring(names(MEs), 5)
  196. geneModuleMembership = as.data.frame(cor(datCompds0, MEs, use = "p"));
  197. MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
  198. names(geneModuleMembership) = paste("MM", modNames, sep="");
  199. names(MMPvalue) = paste("p.MM", modNames, sep="");
  200. geneTraitSignificance = as.data.frame(cor(datCompds0, S1, use = "p"));
  201. GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
  202. names(geneTraitSignificance) = paste("GS.", names(S1), sep="");
  203. names(GSPvalue) = paste("p.GS.", names(S1), sep="");
  204.  
  205. geneTraitSignificance1 = as.data.frame(cor(datCompds0, S2, use = "p"));
  206. GSPvalue1 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance1), nSamples));
  207. names(geneTraitSignificance1) = paste("GS.", names(S2), sep="");
  208. names(GSPvalue1) = paste("p.GS.", names(S2), sep="");
  209.  
  210. geneTraitSignificance2 = as.data.frame(cor(datCompds0,S3, use = "p"));
  211. GSPvalue2 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance2), nSamples));
  212. names(geneTraitSignificance2) = paste("GS.", names(S3), sep="");
  213. names(GSPvalue2) = paste("p.GS.", names(S3), sep="");
  214.  
  215. geneTraitSignificance3 = as.data.frame(cor(datCompds0,S4, use = "p"));
  216. GSPvalue3 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance3), nSamples));
  217. names(geneTraitSignificance3) = paste("GS.", names(S4), sep="");
  218. names(GSPvalue3) = paste("p.GS.", names(S4), sep="");
  219.  
  220. geneTraitSignificance4 = as.data.frame(cor(datCompds0,S5, use = "p"));
  221. GSPvalue4 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance4), nSamples));
  222. names(geneTraitSignificance4) = paste("GS.", names(S5), sep="");
  223. names(GSPvalue4) = paste("p.GS.", names(S5), sep="");
  224.  
  225. geneTraitSignificance5 = as.data.frame(cor(datCompds0,S6, use = "p"));
  226. GSPvalue5 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance5), nSamples));
  227. names(geneTraitSignificance5) = paste("GS.", names(S6), sep="");
  228. names(GSPvalue5) = paste("p.GS.", names(S6), sep="");
  229.  
  230. geneTraitSignificance6 = as.data.frame(cor(datCompds0,S7, use = "p"));
  231. GSPvalue6 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance6), nSamples));
  232. names(geneTraitSignificance6) = paste("GS.", names(S7), sep="");
  233. names(GSPvalue6) = paste("p.GS.", names(S7), sep="");
  234.  
  235. geneTraitSignificance7 = as.data.frame(cor(datCompds0,S8, use = "p"));
  236. GSPvalue7 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance7), nSamples));
  237. names(geneTraitSignificance7) = paste("GS.", names(S8), sep="");
  238. names(GSPvalue7) = paste("p.GS.", names(S8), sep="");
  239.  
  240. geneTraitSignificance8 = as.data.frame(cor(datCompds0,S9, use = "p"));
  241. GSPvalue8 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance8), nSamples));
  242. names(geneTraitSignificance8) = paste("GS.", names(S9), sep="");
  243. names(GSPvalue8) = paste("p.GS.", names(S9), sep="");
  244.  
  245.  
  246.  
  247.  
  248.  
  249.  
  250. WCNASensorial2017=data.frame(moduleColors=dynamicColors,geneModuleMembership,MMPvalue,
  251. geneTraitSignificance,GSPvalue,geneTraitSignificance1,GSPvalue1,geneTraitSignificance2,GSPvalue2,geneTraitSignificance3,GSPvalue3,
  252. geneTraitSignificance4,GSPvalue4,geneTraitSignificance5,GSPvalue5,geneTraitSignificance6,GSPvalue6,geneTraitSignificance7,GSPvalue7,
  253. geneTraitSignificance8,GSPvalue8)
  254. write.csv(WCNASensorial2017, file="WCNA_TraitSensor_1.csv")
  255.  
  256. #####OUTPUT TABLE CREATED######
  257.  
  258. # ASSESSMENT CORRELATION MODULE V TRAIT SIGNIFICANCE OF COMPOUNDS WITHIN MODULES
  259. moduleColors = dynamicColors
  260. module = "blue"
  261. column = match(module, modNames);
  262. moduleGenes = moduleColors==module;
  263. sizeGrWindow(7, 7);
  264. par(mfrow = c(1,1));
  265. verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
  266. abs(geneTraitSignificance[moduleGenes, 1]),
  267. xlab = paste("Module Membership in", module, "module"),
  268. ylab = "Compound significance for Age",
  269. main = paste("Module membership vs. gene significance\n"),
  270. cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
  271.  
  272. names(datCompds0)
  273. names(datCompds0)[moduleColors=="purple"]
  274. names(datCompds0)[moduleColors=="turquoise"]
  275.  
  276.  
  277. #Β Stopped here for project all necessary data available
  278.  
  279. #Appendix 1: IF CUT: Determine cluster under the line
  280. #clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
  281. #table(clust)
  282. # clust 1 contains the samples we want to keep.
  283. #keepSamples = (clust==1)
  284. #datExpr = datExpr0[keepSamples, ] #generating new compounds file leaving out cut-offs
  285. #nCompds = ncol(datCompds0)
  286. #nSamples = nrow(datCompds0)
  287. #Re-cluster samples
  288. #sampleTree2 = flashClust(dist(datCompds0), method = "average")
  289.  
  290. #Appendix 2: IF MERGER
  291. # Call an automatic merging function
  292. #merge = mergeCloseModules(datCompds0, dynamicColors, cutHeight = MEDissThres, verbose = 3)
  293. # The merged module colors
  294. mergedColors = merge$colors;
  295. # Eigengenes of the new merged modules:
  296. #mergedMEs = merge$newMEs;
  297. #sizeGrWindow(10, 7)
  298. #pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
  299. #plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
  300. #c("Dynamic Tree Cut", "Merged dynamic"),
  301. #dendroLabels = FALSE, hang = 0.03,
  302. #addGuide = TRUE, guideHang = 0.05)
  303. #dev.off()
  304. # Rename to moduleColors
  305. moduleColors = dynamicColors #mergedColors
  306. # Construct numerical labels corresponding to the colors
  307. #colorOrder = c("grey", standardColors(50));
  308. #moduleLabels = match(moduleColors, colorOrder)-1;
  309.  
  310.  
  311. getwd()
  312. #rename working directory
  313. workingDir = "/R software/TranscriptomePeach"
  314. #C:\R software\Fruitydata2018
  315. #C:\TOF-DS\FRUITY2017\FRUITY WCNA\R tempate Analysis\FRUITY MARKES INT
  316. # set new working directory:
  317. setwd(workingDir)
  318.  
  319. datCompds0=read.csv("SqrtTranscript.csv", header = T)
  320. #datCompds0=read.csv("D:/BI3153/NormAreaSqrt.csv", header = T)#Β equivalent to YData
  321.  
  322. dim(datCompds0)
  323. names(datCompds0)
  324.  
  325. traitData = read.csv("ParamTranscript-rep.csv")
  326. #traitData = read.csv("D:/BI3153/xdata2.csv");# equivalent to XData
  327.  
  328. dim(traitData)
  329. names(traitData)
  330.  
  331. sampleTree = flashClust(dist(datCompds0), method = "average");
  332. # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
  333. # The user should change the dimensions if the window is too large or too small.
  334. sizeGrWindow(8,5) #this command is redundant in RStudio
  335. par(cex = 0.6);
  336. par(mar = c(0,4,2,0))
  337. plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
  338.  
  339. # Plot a line to show the cut
  340. abline(h = 350, col = "red");
  341.  
  342. # Usually with scent data there is no cut required as there's no outgroup; if there is go to appendix 1
  343.  
  344. # Convert traits to a color representation: white means low, red means high, grey means missing entry
  345.  
  346. traitColors = numbers2colors(traitData ,signed = FALSE);
  347. # Plot the sample dendrogram and the colors underneath.
  348. plotDendroAndColors(sampleTree, traitColors,
  349. groupLabels = names(traitData),
  350. main = "Sample dendrogram and trait heatmap") #if reclustering is required the term 'sampletree' needs to be amended to sampletree2 (see appendix I)
  351.  
  352. # Choose a set of soft-thresholding powers
  353. powers = c(c(1:10), seq(from = 12, to=20, by=2))
  354. # Call the network topology analysis function
  355. sft = pickSoftThreshold(datCompds0, powerVector = powers, verbose = 5)
  356. # Plot the results:
  357. sizeGrWindow(9, 5)
  358. par(mfrow = c(1,2));
  359. cex1 = 0.9;
  360. # Scale-free topology fit index as a function of the soft-thresholding power
  361. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  362. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
  363. main = paste("Scale independence"));
  364. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  365. labels=powers,cex=cex1,col="red")
  366. # this line corresponds to using an R^2 cut-off of h
  367. abline(h=0.90,col="red")
  368. # Mean connectivity as a function of the soft-thresholding power
  369. plot(sft$fitIndices[,1], sft$fitIndices[,5],
  370. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  371. main = paste("Mean connectivity"))
  372. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  373. # all above is plot only!
  374. #SELECT SOFTPOWER TO GAIN LARGEST SCALE FREE TOPOLOGY FIT AND LOWWEST MEAN CONNECTIVITY TO CONTINUE
  375.  
  376. # Calculating adjeacency with soft power 4
  377. softPower =6
  378. adjacency = adjacency(datCompds0, power = softPower)
  379.  
  380. # Turn adjacency into topological overlap
  381. TOM = TOMsimilarity(adjacency);
  382. dissTOM = 1-TOM
  383.  
  384. # Call the hierarchical clustering function
  385. geneTree = flashClust(as.dist(dissTOM), method = "average");
  386. # Plot the resulting clustering tree (dendrogram)
  387. sizeGrWindow(8,5)
  388. plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
  389. labels = FALSE, hang = 0.04)
  390.  
  391. # We like large modules, so we set the minimum module size relatively high:
  392. minModuleSize =60;
  393. # Module identification using dynamic tree cut:
  394. dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
  395. deepSplit = 3, pamRespectsDendro = FALSE,
  396. minClusterSize = minModuleSize);
  397. table(dynamicMods)
  398. # Convert numeric lables into colors
  399. dynamicColors = labels2colors(dynamicMods)
  400. table(dynamicColors)
  401.  
  402. # Plot the dendrogram and colors underneath
  403. sizeGrWindow(8,6)
  404. plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
  405. dendroLabels = FALSE, hang = 0.03,
  406. addGuide = TRUE, guideHang = 0.05,
  407. main = "Gene dendrogram and module colors")
  408.  
  409.  
  410.  
  411. # Calculate eigengenes
  412. MEList = moduleEigengenes(datCompds0, colors = dynamicColors)
  413. MEs = MEList$eigengenes
  414. # Calculate dissimilarity of module eigengenes
  415. MEDiss = 1-cor(MEs);
  416. # Cluster module eigengenes
  417. METree = flashClust(as.dist(MEDiss), method = "average");
  418. # Plot the result
  419. sizeGrWindow(7, 6)
  420. plot(METree, main = "Clustering of module eigengenes",
  421. xlab = "", sub = "")
  422.  
  423. MEDissThres = 0.01
  424. # Plot the cut line into the dendrogram
  425. abline(h=MEDissThres, col = "red")
  426. ###### IF MERGER IS WANTED, E.G. COMBINING MODULES GO TO APPENDIX 2
  427.  
  428. # Define numbers of genes and samples
  429. nGenes = ncol(datCompds0);
  430. nSamples = nrow(datCompds0);
  431. # Recalculate MEs with color labels
  432. #MEs0 = moduleEigengenes(datCompds0, moduleColors)$eigengenes
  433. #MEs = orderMEs(MEs0)
  434. moduleTraitCor = cor(MEs, traitData, use = "p");
  435. moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
  436.  
  437. # START PLOTTING HEAT MAP MODULE-TRAIT RELATION AND SIGNIFICANCE
  438.  
  439.  
  440. sizeGrWindow(10,6)
  441. # Will display correlations and their p-values
  442. textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
  443. signif(moduleTraitPvalue, 1), ")", sep = "");
  444. dim(textMatrix) = dim(moduleTraitCor)
  445. par(mar = c(6, 8.5, 3, 3));
  446. # Display the correlation values within a heatmap plot
  447. labeledHeatmap(Matrix = moduleTraitCor,
  448. xLabels = names(traitData),
  449. yLabels = names(MEs),
  450. ySymbols = names(MEs),
  451. colorLabels = FALSE,
  452. colors = greenWhiteRed(50),
  453. textMatrix = textMatrix,
  454. setStdMargins = FALSE,
  455. cex.text = 0.5,
  456. zlim = c(-1,1),
  457. main = paste("Module-trait relationships"))
  458.  
  459. #END PLOTTING##
  460.  
  461. #PREPARING OUTPUT TABLE#
  462.  
  463. # Define variable 'Sex' containing the 'Sex' column of datTrait
  464. Day = as.data.frame(traitData$Day);
  465. names(Day) = "Day"
  466. Cultivar = as.data.frame(traitData$Cultivar)
  467. names(Cultivar)="Cultivar"
  468. Temperature = as.data.frame(traitData$Temperature)
  469. names(Temperature)="Temperature"
  470. Sample = as.data.frame(traitData$Sample)
  471. names(Sample)="Sample"
  472. # names (colors) of the modules
  473. modNames = substring(names(MEs), 5)
  474. geneModuleMembership = as.data.frame(cor(datCompds0, MEs, use = "p"));
  475. MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
  476. names(geneModuleMembership) = paste("MM", modNames, sep="");
  477. names(MMPvalue) = paste("p.MM", modNames, sep="");
  478. geneTraitSignificance = as.data.frame(cor(datCompds0, Day, use = "p"));
  479. GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
  480. names(geneTraitSignificance) = paste("GS.", names(Day), sep="");
  481. names(GSPvalue) = paste("p.GS.", names(Day), sep="");
  482. geneTraitSignificance1 = as.data.frame(cor(datCompds0, Cultivar, use = "p"));
  483. GSPvalue1 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance1), nSamples));
  484. names(geneTraitSignificance1) = paste("GS.", names(Cultivar), sep="");
  485. names(GSPvalue1) = paste("p.GS.", names(Cultivar), sep="");
  486. geneTraitSignificance2 = as.data.frame(cor(datCompds0, Temperature, use = "p"));
  487. GSPvalue2 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance2), nSamples));
  488. names(geneTraitSignificance2) = paste("GS.", names(Temperature), sep="");
  489. names(GSPvalue2) = paste("p.GS.", names(Temperature), sep="");
  490. geneTraitSignificance3 = as.data.frame(cor(datCompds0, Sample, use = "p"));
  491. GSPvalue3 = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance3), nSamples));
  492. names(geneTraitSignificance3) = paste("GS.", names(Sample), sep="");
  493. names(GSPvalue3) = paste("p.GS.", names(Sample), sep="");
  494.  
  495. WCNASag_BT=data.frame(moduleColors=dynamicColors,geneModuleMembership,MMPvalue,geneTraitSignificance,GSPvalue,geneTraitSignificance1,GSPvalue1,geneTraitSignificance2,GSPvalue2,geneTraitSignificance3,GSPvalue3)
  496. write.csv(WCNASag_BT, file="WCNASag_BTnooverload.csv")
  497.  
  498. #####OUTPUT TABLE CREATED######
  499.  
  500. # ASSESSMENT CORRELATION MODULE V TRAIT SIGNIFICANCE OF COMPOUNDS WITHIN MODULES
  501. moduleColors = dynamicColors
  502. module = "blue"
  503. column = match(module, modNames);
  504. moduleGenes = moduleColors==module;
  505. sizeGrWindow(7, 7);
  506. par(mfrow = c(1,1));
  507. verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
  508. abs(geneTraitSignificance[moduleGenes, 1]),
  509. xlab = paste("Module Membership in", module, "module"),
  510. ylab = "Compound significance for Age",
  511. main = paste("Module membership vs. gene significance\n"),
  512. cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
  513.  
  514. names(datCompds0)
  515. names(datCompds0)[moduleColors=="purple"]
  516. names(datCompds0)[moduleColors=="turquoise"]
  517.  
  518.  
  519. #Β Stopped here for project all necessary data available
  520.  
  521. #Appendix 1: IF CUT: Determine cluster under the line
  522. #clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
  523. #table(clust)
  524. # clust 1 contains the samples we want to keep.
  525. #keepSamples = (clust==1)
  526. #datExpr = datExpr0[keepSamples, ] #generating new compounds file leaving out cut-offs
  527. #nCompds = ncol(datCompds0)
  528. #nSamples = nrow(datCompds0)
  529. #Re-cluster samples
  530. #sampleTree2 = flashClust(dist(datCompds0), method = "average")
  531.  
  532. #Appendix 2: IF MERGER
  533. # Call an automatic merging function
  534. #merge = mergeCloseModules(datCompds0, dynamicColors, cutHeight = MEDissThres, verbose = 3)
  535. # The merged module colors
  536. mergedColors = merge$colors;
  537. # Eigengenes of the new merged modules:
  538. #mergedMEs = merge$newMEs;
  539. #sizeGrWindow(10, 7)
  540. #pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
  541. #plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
  542. #c("Dynamic Tree Cut", "Merged dynamic"),
  543. #dendroLabels = FALSE, hang = 0.03,
  544. #addGuide = TRUE, guideHang = 0.05)
  545. #dev.off()
  546. # Rename to moduleColors
  547. moduleColors = dynamicColors #mergedColors
  548. # Construct numerical labels corresponding to the colors
  549. #colorOrder = c("grey", standardColors(50));
  550. #moduleLabels = match(moduleColors, colorOrder)-1;
  551. #MEs = mergedMEs;
  552.  
Advertisement
Add Comment
Please, Sign In to add comment