Advertisement
ProzacR

Edita2013_specifiskumo_QSAR nebaigtas 2

Aug 21st, 2015
538
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 9.70 KB | None | 0 0
  1. library(cvq2)
  2. #library(leaps)
  3.  
  4.  
  5. # cia is karto jau pKd skirtumas imtas:
  6. selectivity<-read.table("selectivityCA12.csv", sep=",", header=TRUE)
  7. qsar12_1<-data.frame(K<-selectivity$CA12.1)
  8. qsar12_1$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  9.  
  10.  
  11. #random numbers:
  12. #> test <- sort(round(runif(10, 1, 40)))
  13. #> test
  14. test <- c(2, 3, 8, 9, 10, 16, 18, 19, 29, 35)
  15. #tada:
  16. train <- c(1, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40)
  17.  
  18. r2=NULL
  19. r2good=NULL
  20. for (i in 1:1317 ) {
  21. fit.single<-lm(qsar12_1$K[train]~qsar12_1$x[train,i])
  22. r2[i]<-summary(fit.single)$r.squared
  23. if(r2[i]>0.3) {
  24. r2good[i]<-r2[i]
  25. }
  26. }
  27.  
  28. #leaps<-regsubsets(qsar12_1$K[train]~qsar12_1$x[train,c(1:31)], data=qsar12_1, nvmax=5)
  29. #plot(leaps, scale="r2")
  30.  
  31. qsar_12_1_train<-lm(qsar12_1$K[train] ~ qsar12_1$x[train, 105] + qsar12_1$x[train, 333] + qsar12_1$x[train, 1205])
  32. print(summary(qsar_12_1_train))
  33. qsar_12_1_test_pred_values<-coef(qsar_12_1_train)[1]+coef(qsar_12_1_train)[2]*qsar12_1$x[test, 105]+coef(qsar_12_1_train)[3]*qsar12_1$x[test, 333]+coef(qsar_12_1_train)[4]*qsar12_1$x[test, 1205]
  34. qsar_12_1_test<-lm(qsar_12_1_test_pred_values ~ qsar12_1$K[test])
  35. print(summary(qsar_12_1_test))
  36. x<-cbind(qsar12_1$x[train,c(105,333,1205)], qsar12_1$K[train])
  37. colnames(x)[4]<-"y"
  38. qsar_12_1_q2<-cvq2(x)
  39. print(qsar_12_1_q2)
  40.  
  41. #==========================================
  42.  
  43. qsar12_2<-data.frame(K<-selectivity$CA12.2)
  44. qsar12_2$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  45.  
  46. r2=NULL
  47. r2good=NULL
  48. for (i in 1:1317 ) {
  49. fit.single<-lm(qsar12_2$K[train]~qsar12_2$x[train,i])
  50. r2[i]<-summary(fit.single)$r.squared
  51. if(r2[i]>0.3) {
  52. r2good[i]<-r2[i]
  53. }
  54. }
  55.  
  56. #kaip ir be leaps variantas:
  57. #tada imti didziausia is r2good ir salinti koreliacijas su kitais, rasti kuris nekoreliuoja
  58. #r2very_good istrintos koreliacijos su 1231 kuris labai geras  sitame....
  59.  
  60. #arba r2good kas gero tada dar ziureti su leaps
  61.  
  62. qsar_12_2_train<-lm(qsar12_2$K[train] ~ qsar12_2$x[train, 293] +qsar12_2$x[train, 308] + qsar12_2$x[train, 311])
  63. print(summary(qsar_12_2_train))
  64. qsar_12_2_test_pred_values<-coef(qsar_12_2_train)[1]+coef(qsar_12_2_train)[2]*qsar12_2$x[test, 293]+coef(qsar_12_2_train)[3]*qsar12_2$x[test, 308]+coef(qsar_12_2_train)[4]*qsar12_2$x[test, 311]
  65. qsar_12_2_test<-lm(qsar_12_2_test_pred_values ~ qsar12_2$K[test])
  66. print(summary(qsar_12_2_test))
  67. x<-cbind(qsar12_2$x[train,c(293,308,311)], qsar12_2$K[train])
  68. colnames(x)[4]<-"y"
  69. qsar_12_2_q2<-cvq2(x)
  70. print(qsar_12_2_q2)
  71.  
  72. #=============================================
  73.  
  74. qsar12_6<-data.frame(K<-selectivity$CA12.6)
  75. qsar12_6$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  76.  
  77. r2=NULL
  78. r2good=NULL
  79. for (i in 1:1317 ) {
  80. fit.single<-lm(qsar12_6$K[train]~qsar12_6$x[train,i])
  81. r2[i]<-summary(fit.single)$r.squared
  82. if(r2[i]>0.3) {
  83. r2good[i]<-r2[i]
  84. }
  85. }
  86.  
  87. good_deskr_nr<-NULL
  88. for (i in 1:1317 ) {
  89.  if(r2[i]>0.3) {
  90.  good_deskr_nr<-c(good_deskr_nr, i)
  91.  }
  92. }
  93. leaps<-regsubsets(qsar12_6$K[train]~qsar12_6$x[train,good_deskr_nr[1:25]], data=qsar12_6, nvmax=3)
  94. plot(leaps, scale="r2")
  95.  
  96. qsar_12_6_train<-lm(qsar12_6$K[train] ~ qsar12_6$x[train, 38] + qsar12_6$x[train, 283] + qsar12_6$x[train, 299])
  97. print(summary(qsar_12_6_train))
  98. qsar_12_6_test_pred_values<-coef(qsar_12_6_train)[1]+coef(qsar_12_6_train)[2]*qsar12_6$x[test, 38]+coef(qsar_12_6_train)[3]*qsar12_6$x[test, 283]+coef(qsar_12_6_train)[4]*qsar12_6$x[test, 299]
  99. qsar_12_6_test<-lm(qsar_12_6_test_pred_values ~ qsar12_6$K[test])
  100. print(summary(qsar_12_6_test))
  101. x<-cbind(qsar12_6$x[train,c(38,283,299)], qsar12_6$K[train])
  102. colnames(x)[4]<-"y"
  103. qsar_12_6_q2<-cvq2(x)
  104. print(qsar_12_6_q2)
  105.  
  106. #===============================================
  107.  
  108. qsar12_7<-data.frame(K<-selectivity$CA12.7)
  109. qsar12_7$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  110.  
  111. r2=NULL
  112. r2good=NULL
  113. for (i in 1:1317 ) {
  114. fit.single<-lm(qsar12_7$K[train]~qsar12_7$x[train,i])
  115. r2[i]<-summary(fit.single)$r.squared
  116. if(r2[i]>0.3) {
  117. r2good[i]<-r2[i]
  118. }
  119. }
  120.  
  121. good_deskr_nr<-NULL
  122. for (i in 1:1317 ) {
  123.  if(r2[i]>0.3) {
  124.  good_deskr_nr<-c(good_deskr_nr, i)
  125.  }
  126. }
  127. leaps<-regsubsets(qsar12_7$K[train]~qsar12_7$x[train,good_deskr_nr[1:25]], data=qsar12_7, nvmax=3)
  128. plot(leaps, scale="r2")
  129.  
  130. qsar_12_7_train<-lm(qsar12_7$K[train] ~ qsar12_7$x[train, 300] + qsar12_7$x[train, 463] + qsar12_7$x[train, 841])
  131. print(summary(qsar_12_7_train))
  132. qsar_12_7_test_pred_values<-coef(qsar_12_7_train)[1]+coef(qsar_12_7_train)[2]*qsar12_7$x[test, 300]+coef(qsar_12_7_train)[3]*qsar12_7$x[test, 463]+coef(qsar_12_7_train)[4]*qsar12_7$x[test, 841]
  133. qsar_12_7_test<-lm(qsar_12_7_test_pred_values ~ qsar12_7$K[test])
  134. print(summary(qsar_12_7_test))
  135. x<-cbind(qsar12_7$x[train,c(300,463,841)], qsar12_7$K[train])
  136. colnames(x)[4]<-"y"
  137. qsar_12_7_q2<-cvq2(x)
  138. print(qsar_12_7_q2)
  139.  
  140. #===============================================
  141. qsar12_13<-data.frame(K<-selectivity$CA12.13)
  142. qsar12_13$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  143.  
  144. r2=NULL
  145. r2good=NULL
  146. for (i in 1:1317 ) {
  147. fit.single<-lm(qsar12_13$K[train]~qsar12_13$x[train,i])
  148. r2[i]<-summary(fit.single)$r.squared
  149. if(r2[i]>0.3) {
  150. r2good[i]<-r2[i]
  151. }
  152. }
  153.  
  154. good_deskr_nr<-NULL
  155. for (i in 1:1317 ) {
  156.  if(r2[i]>0.3) {
  157.  good_deskr_nr<-c(good_deskr_nr, i)
  158.  }
  159. }
  160. leaps<-regsubsets(qsar12_13$K[train]~qsar12_13$x[train,good_deskr_nr], data=qsar12_13, nvmax=3)
  161. plot(leaps, scale="r2")
  162.  
  163. qsar_12_13_train<-lm(qsar12_13$K[train] ~ qsar12_13$x[train, 278] + qsar12_13$x[train, 300] + qsar12_13$x[train, 1153])
  164. print(summary(qsar_12_13_train))
  165. qsar_12_13_test_pred_values<-coef(qsar_12_13_train)[1]+coef(qsar_12_13_train)[2]*qsar12_13$x[test, 278]+coef(qsar_12_13_train)[3]*qsar12_13$x[test, 300]+coef(qsar_12_13_train)[4]*qsar12_13$x[test, 1153]
  166. qsar_12_13_test<-lm(qsar_12_13_test_pred_values ~ qsar12_13$K[test])
  167. print(summary(qsar_12_13_test))
  168. x<-cbind(qsar12_13$x[train,c(278,300,1153)], qsar12_13$K[train])
  169. colnames(x)[4]<-"y"
  170. qsar_12_13_q2<-cvq2(x)
  171. print(qsar_12_13_q2)
  172.  
  173. #===============================================
  174. qsarSUM<-data.frame(K<-selectivity$SUMsCA12)
  175. qsarSUM$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  176.  
  177. r2=NULL
  178. r2good=NULL
  179. for (i in 1:1317 ) {
  180. fit.single<-lm(qsarSUM$K[train]~qsarSUM$x[train,i])
  181. r2[i]<-summary(fit.single)$r.squared
  182. if(r2[i]>0.3) {
  183. r2good[i]<-r2[i]
  184. }
  185. }
  186.  
  187. good_deskr_nr<-NULL
  188. for (i in 1:1317 ) {
  189.  if(r2[i]>0.3) {
  190.  good_deskr_nr<-c(good_deskr_nr, i)
  191.  }
  192. }
  193. leaps<-regsubsets(qsarSUM$K[train]~qsarSUM$x[train,good_deskr_nr], data=qsarSUM, nvmax=3)
  194. plot(leaps, scale="r2")
  195.  
  196. qsar_SUM_train<-lm(qsarSUM$K[train] ~ qsarSUM$x[train, 1167] + qsarSUM$x[train, 1178] + qsarSUM$x[train, 1232])
  197. print(summary(qsar_SUM_train))
  198. qsar_SUM_test_pred_values<-coef(qsar_SUM_train)[1]+coef(qsar_SUM_train)[2]*qsarSUM$x[test, 1167]+coef(qsar_SUM_train)[3]*qsarSUM$x[test, 1178]+coef(qsar_SUM_train)[4]*qsarSUM$x[test, 1232]
  199. qsar_SUM_test<-lm(qsar_SUM_test_pred_values ~ qsarSUM$K[test])
  200. print(summary(qsar_SUM_test))
  201. x<-cbind(qsarSUM$x[train,c(1167,1178,1232)], qsarSUM$K[train])
  202. colnames(x)[4]<-"y"
  203. qsar_SUM_q2<-cvq2(x)
  204. print(qsar_SUM_q2)
  205.  
  206. #===============================================
  207.  
  208.  
  209.  
  210.  
  211. #grafiko asys nuo/iki:
  212. minK<-5.8
  213. maxK<-9
  214.  
  215. qsar1<-coef(qsar_12_1_train)[1]+coef(qsar_12_1_train)[2]*qsar$x[, 275]+coef(qsar_12_1_train)[3]*qsar$x[, 1074]+coef(qsar_12_1_train)[4]*qsar$x[, 1107]
  216. qsar2<-coef(qsar_12_2_train)[1]+coef(qsar_12_2_train)[2]*qsar$x[, 649]+coef(qsar_12_2_train)[3]*qsar$x[, 1074]+coef(qsar_12_2_train)[4]*qsar$x[, 1259]
  217. qsar3<-coef(qsar_12_3_train)[1]+coef(qsar_12_3_train)[2]*qsar$x[, 100]+coef(qsar_12_3_train)[3]*qsar$x[, 275]+coef(qsar_12_3_train)[4]*qsar$x[, 300]
  218. grafikui<-cbind(qsar$K, qsar1, qsar2, qsar3)
  219. colnames(grafikui)[1]<-"pKd"
  220. mod_qsar1<-lm(grafikui[train,1]~grafikui[train,2])
  221. mod_qsar2<-lm(grafikui[train,1]~grafikui[train,3])
  222. mod_qsar3<-lm(grafikui[train,1]~grafikui[train,4])
  223. mod_qsar1t<-lm(grafikui[test,1]~grafikui[test,2])
  224. mod_qsar2t<-lm(grafikui[test,1]~grafikui[test,3])
  225. mod_qsar3t<-lm(grafikui[test,1]~grafikui[test,4])
  226.  
  227.  
  228. png("Edita2013_grafikas2.png", width=600, height=900)
  229. par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
  230. plot(grafikui[train,1], grafikui[train,2], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  231. abline(mod_qsar1)
  232. title('QSAR1', line = -3, cex.main=3)
  233. axis(1,col.axis = "transparent", tck = 0.02)
  234.  
  235. plot(grafikui[test,1], grafikui[test,2], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
  236. abline(mod_qsar1t)
  237. title('QSAR1 test set', line = -3, cex.main=3)
  238. axis(1,col.axis = "transparent", tck = 0.02)
  239. axis(2,col.axis = "transparent", tck = 0.02)
  240.  
  241. plot(grafikui[train,1], grafikui[train,3], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  242. abline(mod_qsar2)
  243. title('QSAR2', line = -3, cex.main=3)
  244.  
  245. plot(grafikui[test,1], grafikui[test,3], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
  246. abline(mod_qsar2t)
  247. title('QSAR2 test set', line = -3, cex.main=3)
  248. axis(1,col.axis = "transparent", tck = 0.02)
  249. axis(2,col.axis = "transparent", tck = 0.02)
  250.  
  251. plot(grafikui[train,1], grafikui[train,4], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
  252. abline(mod_qsar3)
  253. title('QSAR3', line = -3, cex.main=3)
  254.  
  255. plot(grafikui[test,1], grafikui[test,4], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
  256. abline(mod_qsar3t)
  257. title('QSAR3 test set', line = -3, cex.main=3)
  258. axis(2,col.axis = "transparent", tck = 0.02)
  259.  
  260. mtext('pKd (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE)
  261. mtext('pKd (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE)
  262.  
  263. dev.off()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement