Advertisement
ProzacR

Edita_2013_specifiskumo_QSAR_mod_kiekvienai_nd

Nov 26th, 2015
444
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 18.82 KB | None | 0 0
  1. library(cvq2)
  2. library(leaps)
  3.  
  4. #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati
  5. q2test<-function(activity, predicted_activity) {
  6.                 prediction_error_sq<-(predicted_activity-activity)^2
  7.                 avg_activity<-mean(activity)
  8.                 sigma_y_sq<-(activity-avg_activity)^2
  9.                 q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq)
  10.                 return(q2test_val)
  11. }
  12.  
  13. # cia is karto jau pKd imta:
  14. visu_CA_pKd<-read.table("visu_CA_pKd.csv", sep=",", header=TRUE)
  15. qsar1<-data.frame(K<-visu_CA_pKd$CAI)
  16. qsar1$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  17.  
  18.  
  19. #random numbers:
  20. #> test <- sort(round(runif(10, 1, 40)))
  21. #> test
  22. test <- c(2, 3, 8, 9, 10, 16, 18, 19, 29, 35)
  23. #tada:
  24. train <- c(1, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40)
  25.  
  26. r2=NULL
  27. r2good=NULL
  28. for (i in 1:1317 ) {
  29. fit.single<-lm(qsar1$K[train]~qsar1$x[train,i])
  30. r2[i]<-summary(fit.single)$r.squared                                                                                                                        
  31. if(r2[i]>0.25) {                                                                                                                                            
  32. r2good[i]<-r2[i]                                                                                                                                            
  33. }                                                                                                                                                            
  34. }
  35.  
  36. good_deskr_nr<-NULL
  37. for (i in 1:1317 ) {
  38.          if(r2[i]>0.25) {
  39.                   good_deskr_nr<-c(good_deskr_nr, i)
  40.  }
  41. }
  42. #leaps<-regsubsets(qsar1$K[train]~qsar1$x[train,good_deskr_nr], data=qsar1, nvmax=3)
  43. #plot(leaps, scale="r2")
  44. CA1_geri_deskr<-c(292, 313, 331)
  45.  
  46.  
  47. qsar_1_train<-lm(qsar1$K[train] ~ qsar1$x[train, CA1_geri_deskr[1]] + qsar1$x[train, CA1_geri_deskr[2]] + qsar1$x[train, CA1_geri_deskr[3]])
  48. print(summary(qsar_1_train))
  49. qsar_1_test_pred_values<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[test, CA1_geri_deskr[1]]+coef(qsar_1_train)[3]*qsar1$x[test, CA1_geri_deskr[2]]+coef(qsar_1_train)[4]*qsar1$x[test, CA1_geri_deskr[3]]
  50. qsar_1_test<-lm(qsar_1_test_pred_values ~ qsar1$K[test])
  51. print(summary(qsar_1_test))
  52. x<-cbind(qsar1$x[train,c(CA1_geri_deskr[1], CA1_geri_deskr[2], CA1_geri_deskr[3])], qsar1$K[train])
  53. colnames(x)[4]<-"y"
  54. qsar_1_q2<-cvq2(x)
  55. print(qsar_1_q2)
  56. print(q2test(qsar1$K[test],qsar_1_test_pred_values))
  57.  
  58.  
  59. #==========================================
  60.  
  61. qsar2<-data.frame(K<-visu_CA_pKd$CAII)
  62. qsar2$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  63.  
  64. r2=NULL
  65. r2good=NULL
  66. for (i in 1:1317 ) {
  67. fit.single<-lm(qsar2$K[train]~qsar2$x[train,i])
  68. r2[i]<-summary(fit.single)$r.squared
  69. if(r2[i]>0.25) {
  70. r2good[i]<-r2[i]
  71. }
  72. }
  73.  
  74. good_deskr_nr<-NULL
  75. for (i in 1:1317 ) {
  76.          if(r2[i]>0.25) {
  77.            good_deskr_nr<-c(good_deskr_nr, i)
  78.  }
  79. }
  80. #leaps<-regsubsets(qsar2$K[train]~qsar2$x[train,good_deskr_nr], data=qsar2, nvmax=3)
  81. #plot(leaps, scale="r2")
  82. CA2_geri_deskr<-c(298, 647, 1093)
  83.  
  84.  
  85. #kaip ir be leaps variantas:
  86. #tada imti didziausia is r2good ir salinti koreliacijas su kitais, rasti kuris nekoreliuoja
  87. #r2very_good istrintos koreliacijos su 1231 kuris labai geras  sitame....
  88.  
  89. #arba r2good kas gero tada dar ziureti su leaps
  90. #cia panaudot sena gal geriau
  91.  
  92. qsar_2_train<-lm(qsar2$K[train] ~ qsar2$x[train, CA2_geri_deskr[1]] +qsar2$x[train, CA2_geri_deskr[2]] + qsar2$x[train, CA2_geri_deskr[3]])
  93. print(summary(qsar_2_train))
  94. qsar_2_test_pred_values<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[test, CA2_geri_deskr[1]]+coef(qsar_2_train)[3]*qsar2$x[test, CA2_geri_deskr[2]]+coef(qsar_2_train)[4]*qsar2$x[test, CA2_geri_deskr[3]]
  95. qsar_2_test<-lm(qsar_2_test_pred_values ~ qsar2$K[test])
  96. print(summary(qsar_2_test))
  97. x<-cbind(qsar2$x[train,c(CA2_geri_deskr[1], CA2_geri_deskr[2], CA2_geri_deskr[3])], qsar2$K[train])
  98. colnames(x)[4]<-"y"
  99. qsar_2_q2<-cvq2(x)
  100. print(qsar_2_q2)
  101. print(q2test(qsar2$K[test],qsar_2_test_pred_values))
  102.  
  103.  
  104. #=============================================
  105.  
  106. qsar6<-data.frame(K<-visu_CA_pKd$CAVI)
  107. qsar6$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  108.  
  109. r2=NULL
  110. r2good=NULL
  111. for (i in 1:1317 ) {
  112. fit.single<-lm(qsar6$K[train]~qsar6$x[train,i])
  113. r2[i]<-summary(fit.single)$r.squared
  114. if(r2[i]>0.25) {
  115. r2good[i]<-r2[i]
  116. }
  117. }
  118.  
  119. good_deskr_nr<-NULL
  120. for (i in 1:1317 ) {
  121.  if(r2[i]>0.25) {
  122.  good_deskr_nr<-c(good_deskr_nr, i)
  123.  }
  124. }
  125.  
  126. #leaps<-regsubsets(qsar6$K[train]~qsar6$x[train,good_deskr_nr[1:25]], data=qsar6, nvmax=3)
  127. #plot(leaps, scale="r2")
  128. CA6_geri_deskr<-c(271, 423, 942)
  129.  
  130.  
  131. qsar_6_train<-lm(qsar6$K[train] ~ qsar6$x[train, CA6_geri_deskr[1]] + qsar6$x[train, CA6_geri_deskr[2]] + qsar6$x[train, CA6_geri_deskr[3]])
  132. print(summary(qsar_6_train))
  133. qsar_6_test_pred_values<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[test, CA6_geri_deskr[1]]+coef(qsar_6_train)[3]*qsar6$x[test, CA6_geri_deskr[2]]+coef(qsar_6_train)[4]*qsar6$x[test, CA6_geri_deskr[3]]
  134. qsar_6_test<-lm(qsar_6_test_pred_values ~ qsar6$K[test])
  135. print(summary(qsar_6_test))
  136. x<-cbind(qsar6$x[train,c(CA6_geri_deskr[1], CA6_geri_deskr[2], CA6_geri_deskr[3])], qsar6$K[train])
  137. colnames(x)[4]<-"y"
  138. qsar_6_q2<-cvq2(x)
  139. print(qsar_6_q2)
  140. print(q2test(qsar6$K[test],qsar_6_test_pred_values))
  141.  
  142.  
  143. #===============================================
  144.  
  145. qsar7<-data.frame(K<-visu_CA_pKd$CAVII)
  146. qsar7$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  147.  
  148. r2=NULL
  149. r2good=NULL
  150. for (i in 1:1317 ) {
  151. fit.single<-lm(qsar7$K[train]~qsar7$x[train,i])
  152. r2[i]<-summary(fit.single)$r.squared
  153. if(r2[i]>0.25) {
  154. r2good[i]<-r2[i]
  155. }
  156. }
  157.  
  158. good_deskr_nr<-NULL
  159. for (i in 1:1317 ) {
  160.  if(r2[i]>0.25) {
  161.  good_deskr_nr<-c(good_deskr_nr, i)
  162.  }
  163. }
  164. #leaps<-regsubsets(qsar7$K[train]~qsar7$x[train,good_deskr_nr[1:25]], data=qsar7, nvmax=3)
  165. #plot(leaps, scale="r2")
  166. CA7_geri_deskr<-c(422, 647, 942)
  167.  
  168.  
  169. qsar_7_train<-lm(qsar7$K[train] ~ qsar7$x[train, CA7_geri_deskr[1]] + qsar7$x[train, CA7_geri_deskr[2]] + qsar7$x[train, CA7_geri_deskr[3]])
  170. print(summary(qsar_7_train))
  171. qsar_7_test_pred_values<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[test, CA7_geri_deskr[1]]+coef(qsar_7_train)[3]*qsar7$x[test, CA7_geri_deskr[2]]+coef(qsar_7_train)[4]*qsar7$x[test, CA7_geri_deskr[3]]
  172. qsar_7_test<-lm(qsar_7_test_pred_values ~ qsar7$K[test])
  173. print(summary(qsar_7_test))
  174. x<-cbind(qsar7$x[train,c(CA7_geri_deskr[1], CA7_geri_deskr[2], CA7_geri_deskr[3])], qsar7$K[train])
  175. colnames(x)[4]<-"y"
  176. qsar_7_q2<-cvq2(x)
  177. print(qsar_7_q2)
  178. print(q2test(qsar7$K[test],qsar_7_test_pred_values))
  179.  
  180.  
  181. #===============================================
  182. qsar13<-data.frame(K<-visu_CA_pKd$CAXIII)
  183. qsar13$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  184.  
  185. r2=NULL
  186. r2good=NULL
  187. for (i in 1:1317 ) {
  188. fit.single<-lm(qsar13$K[train]~qsar13$x[train,i])
  189. r2[i]<-summary(fit.single)$r.squared
  190. if(r2[i]>0.25) {
  191. r2good[i]<-r2[i]
  192. }
  193. }
  194.  
  195. good_deskr_nr<-NULL
  196. for (i in 1:1317 ) {
  197.  if(r2[i]>0.25) {
  198.  good_deskr_nr<-c(good_deskr_nr, i)
  199.  }
  200. }
  201. #leaps<-regsubsets(qsar13$K[train]~qsar13$x[train,good_deskr_nr], data=qsar13, nvmax=3)
  202. #plot(leaps, scale="r2")
  203. CA13_geri_deskr<-c(365, 649, 937)
  204.  
  205.  
  206. qsar_13_train<-lm(qsar13$K[train] ~ qsar13$x[train, CA13_geri_deskr[1]] + qsar13$x[train, CA13_geri_deskr[2]] + qsar13$x[train, CA13_geri_deskr[3]])
  207. print(summary(qsar_13_train))
  208. qsar_13_test_pred_values<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[test, CA13_geri_deskr[1]]+coef(qsar_13_train)[3]*qsar13$x[test, CA13_geri_deskr[2]]+coef(qsar_13_train)[4]*qsar13$x[test, CA13_geri_deskr[3]]
  209. qsar_13_test<-lm(qsar_13_test_pred_values ~ qsar13$K[test])
  210. print(summary(qsar_13_test))
  211. x<-cbind(qsar13$x[train,c(CA13_geri_deskr[1], CA13_geri_deskr[2], CA13_geri_deskr[3])], qsar13$K[train])
  212. colnames(x)[4]<-"y"
  213. qsar_13_q2<-cvq2(x)
  214. print(qsar_13_q2)
  215. print(q2test(qsar13$K[test],qsar_13_test_pred_values))
  216.  
  217.  
  218. #===============================================
  219. qsar12<-data.frame(K<-visu_CA_pKd$CAXII)
  220. qsar12$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1))
  221. novel_descriptor<-read.table("novel_descriptor.csv", sep=",", header=TRUE)
  222. qsar12$x<-cbind(qsar12$x, novel_descriptor$T.OH..Cl.)
  223.  
  224. r2=NULL
  225. r2good=NULL
  226. for (i in 1:1318 ) {
  227. fit.single<-lm(qsar12$K[train]~qsar12$x[train,i])
  228. r2[i]<-summary(fit.single)$r.squared
  229. if(r2[i]>0.25) {
  230. r2good[i]<-r2[i]
  231. }
  232. }
  233.  
  234. good_deskr_nr<-NULL
  235. for (i in 1:1318 ) {
  236.  if(r2[i]>0.25) {
  237.  good_deskr_nr<-c(good_deskr_nr, i)
  238.  }
  239. }
  240. #leaps<-regsubsets(qsar12$K[train]~qsar12$x[train,good_deskr_nr], data=qsar12, nvmax=3)
  241. #plot(leaps, scale="r2")
  242. CA12_geri_deskr<-c(1135, 1259, 1318)
  243.  
  244. qsar_12_train<-lm(qsar12$K[train] ~ qsar12$x[train, CA12_geri_deskr[1]] + qsar12$x[train, CA12_geri_deskr[2]] + qsar12$x[train, CA12_geri_deskr[3]])
  245. print(summary(qsar_12_train))
  246. qsar_12_test_pred_values<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[test, CA12_geri_deskr[1]]+coef(qsar_12_train)[3]*qsar12$x[test, CA12_geri_deskr[2]]+coef(qsar_12_train)[4]*qsar12$x[test, CA12_geri_deskr[3]]
  247. qsar_12_test<-lm(qsar_12_test_pred_values ~ qsar12$K[test])
  248. print(summary(qsar_12_test))
  249. x<-cbind(qsar12$x[train,c(CA12_geri_deskr[1], CA12_geri_deskr[2], CA12_geri_deskr[3])], qsar12$K[train])
  250. colnames(x)[3]<-"nd"
  251. colnames(x)[4]<-"y"
  252. qsar_12_q2<-cvq2(x)
  253. print(qsar_12_q2)
  254. print(q2test(qsar12$K[test],qsar_12_test_pred_values))
  255.  
  256. #===============================================
  257. #su CA 12, bet be naujo deskriptoriaus:
  258. r2=NULL
  259. r2good=NULL
  260. for (i in 1:1317 ) {
  261. fit.single<-lm(qsar12$K[train]~qsar12$x[train,i])
  262. r2[i]<-summary(fit.single)$r.squared
  263. if(r2[i]>0.25) {
  264. r2good[i]<-r2[i]
  265. }
  266. }
  267.  
  268. good_deskr_nr<-NULL
  269. for (i in 1:1317 ) {
  270.  if(r2[i]>0.25) {
  271.  good_deskr_nr<-c(good_deskr_nr, i)
  272.  }
  273. }
  274. #leaps<-regsubsets(qsar12$K[train]~qsar12$x[train,good_deskr_nr], data=qsar12, nvmax=3)
  275. #plot(leaps, scale="r2")
  276. CA12a_geri_deskr<-c(111, 275, 1135)
  277.  
  278. qsar_12a_train<-lm(qsar12$K[train] ~ qsar12$x[train, CA12a_geri_deskr[1]] + qsar12$x[train, CA12a_geri_deskr[2]] + qsar12$x[train, CA12a_geri_deskr[3]])
  279. print(summary(qsar_12_train))
  280. qsar_12a_test_pred_values<-coef(qsar_12a_train)[1]+coef(qsar_12a_train)[2]*qsar12$x[test, CA12a_geri_deskr[1]]+coef(qsar_12a_train)[3]*qsar12$x[test, CA12a_geri_deskr[2]]+coef(qsar_12a_train)[4]*qsar12$x[test, CA12a_geri_deskr[3]]
  281. qsar_12a_test<-lm(qsar_12a_test_pred_values ~ qsar12$K[test])
  282. print(summary(qsar_12a_test))
  283. x<-cbind(qsar12$x[train,c(CA12a_geri_deskr[1], CA12a_geri_deskr[2], CA12a_geri_deskr[3])], qsar12$K[train])
  284. colnames(x)[3]<-"nd"
  285. colnames(x)[4]<-"y"
  286. qsar_12a_q2<-cvq2(x)
  287. print(qsar_12a_q2)
  288. print(q2test(qsar12$K[test],qsar_12a_test_pred_values))
  289.  
  290. #===============================================
  291.  
  292.  
  293.  
  294.  
  295. #grafiko asys nuo/iki:
  296. minK<-4.3
  297. maxK<-9.4
  298.  
  299. qsar1_sv<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[, CA1_geri_deskr[1]]+coef(qsar_1_train)[3]*qsar1$x[, CA1_geri_deskr[2]]+coef(qsar_1_train)[4]*qsar1$x[, CA1_geri_deskr[3]]
  300. qsar2_sv<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[, CA2_geri_deskr[1]]+coef(qsar_2_train)[3]*qsar2$x[, CA2_geri_deskr[2]]+coef(qsar_2_train)[4]*qsar2$x[, CA2_geri_deskr[3]]
  301. qsar6_sv<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[, CA6_geri_deskr[1]]+coef(qsar_6_train)[3]*qsar6$x[, CA6_geri_deskr[2]]+coef(qsar_6_train)[4]*qsar6$x[, CA6_geri_deskr[3]]
  302. qsar7_sv<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[, CA7_geri_deskr[1]]+coef(qsar_7_train)[3]*qsar7$x[, CA7_geri_deskr[2]]+coef(qsar_7_train)[4]*qsar7$x[, CA7_geri_deskr[3]]
  303. qsar13_sv<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[, CA13_geri_deskr[1]]+coef(qsar_13_train)[3]*qsar13$x[, CA13_geri_deskr[2]]+coef(qsar_13_train)[4]*qsar13$x[, CA13_geri_deskr[3]]
  304. qsar12_sv<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[, CA12_geri_deskr[1]]+coef(qsar_12_train)[3]*qsar12$x[, CA12_geri_deskr[2]]+coef(qsar_12_train)[4]*qsar12$x[, CA12_geri_deskr[3]]
  305.  
  306.  
  307.  
  308. #grafikui<-cbind(qsar$K, qsar1, qsar2, qsar3)
  309. #colnames(grafikui)[1]<-"pKd"
  310. mod_qsar1<-lm(qsar1_sv[train]~qsar1$K[train])
  311. mod_qsar2<-lm(qsar2_sv[train]~qsar2$K[train])
  312. mod_qsar6<-lm(qsar6_sv[train]~qsar6$K[train])
  313. mod_qsar7<-lm(qsar7_sv[train]~qsar7$K[train])
  314. mod_qsar13<-lm(qsar13_sv[train]~qsar13$K[train])
  315. mod_qsar12<-lm(qsar12_sv[train]~qsar12$K[train])
  316.  
  317. mod_qsar1t<-lm(qsar1_sv[test]~qsar1$K[test])
  318. mod_qsar2t<-lm(qsar2_sv[test]~qsar1$K[test])
  319. mod_qsar6t<-lm(qsar6_sv[test]~qsar6$K[test])
  320. mod_qsar7t<-lm(qsar7_sv[test]~qsar7$K[test])
  321. mod_qsar13t<-lm(qsar13_sv[test]~qsar13$K[test])
  322. mod_qsar12t<-lm(qsar12_sv[test]~qsar12$K[test])
  323.  
  324.  
  325. png("Edita2013_visom_CA.png", width=600, height=900)
  326. par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
  327. plot(qsar1$K[train], qsar1_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  328. points(qsar1$K[test], qsar1_sv[test], tck = 0.02, pch=15, cex=3)
  329. abline(mod_qsar1)
  330. title(bquote(atop("CA I", atop(R^2==0.83, R[TEST] ^2==0.70))), line = -4, cex.main=3, adj=0)
  331. #title('QSAR CAI train set', line = -3, cex.main=3)
  332. axis(1,col.axis = "transparent", tck = 0.02)
  333.  
  334. plot(qsar2$K[train], qsar2_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
  335. points(qsar2$K[test], qsar2_sv[test], tck = 0.02, pch=15, cex=3)
  336. abline(mod_qsar2)
  337. title(bquote(atop("CA II", atop(R^2==0.89, R[TEST] ^2==0.57))), line = -4, cex.main=3, adj=0)
  338. #title('QSAR CAII train set', line = -3, cex.main=3)
  339. axis(1,col.axis = "transparent", tck = 0.02)
  340. axis(2,col.axis = "transparent", tck = 0.02)
  341.  
  342. plot(qsar6$K[train], qsar6_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  343. points(qsar6$K[test], qsar6_sv[test], tck = 0.02, pch=15, cex=3)
  344. abline(mod_qsar6)
  345. title(bquote(atop("CA VI", atop(R^2==0.79, R[TEST] ^2==0.77))), line = -4, cex.main=3, adj=0)
  346. #title('QSAR CAVI train set', line = -3, cex.main=3)
  347.  
  348. plot(qsar7$K[train], qsar7_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
  349. points(qsar7$K[test], qsar7_sv[test], tck = 0.02, pch=15, cex=3)
  350. abline(mod_qsar7)
  351. title(bquote(atop("CA VII", atop(R^2==0.89, R[TEST] ^2==0.87))), line = -4, cex.main=3, adj=0)
  352. #title('QSAR CAVII train set', line = -3, cex.main=3)
  353. axis(1,col.axis = "transparent", tck = 0.02)
  354. axis(2,col.axis = "transparent", tck = 0.02)
  355.  
  356. plot(qsar12$K[train], qsar12_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
  357. points(qsar12$K[test], qsar12_sv[test], tck = 0.02, pch=15, cex=3)
  358. abline(mod_qsar12)
  359. title(bquote(atop("CA XII", atop(R^2==0.82, R[TEST] ^2==0.60))), line = -4, cex.main=3, adj=0)
  360. #title('QSAR CAXII train set', line = -3, cex.main=3)
  361.  
  362. plot(qsar13$K[train], qsar13_sv[train], tck = 0.02, pch=16, col="gray", cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
  363. points(qsar13$K[test], qsar13_sv[test], tck = 0.02, pch=15, cex=3)
  364. abline(mod_qsar13)
  365. title(bquote(atop("CA XIII", atop(R^2==0.88, R[TEST] ^2==0.68))), line = -4, cex.main=3, adj=0)
  366. #title('QSAR CAXIII train set', line = -3, cex.main=3)
  367. axis(2,col.axis = "transparent", tck = 0.02)
  368.  
  369. mtext(expression(paste(pK[d], ' (experimental)')), SOUTH<-1, line=2.5, cex=2, outer=TRUE)
  370. mtext(expression(paste(pK[d], ' (calculated)')), WEST<-2, line=2.5, cex=2, outer=TRUE)
  371.  
  372. dev.off()
  373.  
  374.  
  375. #png("Edita2013_visom_CA_test.png", width=600, height=900)
  376. #par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2)
  377. #plot(qsar1$K[test], qsar1_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  378. #abline(mod_qsar1t)
  379. #title(bquote(atop("CA I", R^2==0.70)), line = -3, cex.main=3)
  380. ##title('QSAR CAI test set', line = -3, cex.main=3)
  381. #axis(1,col.axis = "transparent", tck = 0.02)
  382.  
  383. #plot(qsar2$K[test], qsar2_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n")
  384. #abline(mod_qsar2t)
  385. #title(bquote(atop("CA II", R^2==0.57)), line = -3, cex.main=3)
  386. ##title('QSAR CAII test set', line = -3, cex.main=3)
  387. #axis(1,col.axis = "transparent", tck = 0.02)
  388. #axis(2,col.axis = "transparent", tck = 0.02)
  389.  
  390. #plot(qsar6$K[test], qsar6_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n")
  391. #abline(mod_qsar6t)
  392. #title(bquote(atop("CA VI", R^2==0.77)), line = -3, cex.main=3)
  393. ##title('QSAR CAVI test set', line = -3, cex.main=3)
  394.  
  395. #plot(qsar7$K[test], qsar7_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n")
  396. #abline(mod_qsar7t)
  397. #title(bquote(atop("CA VII", R^2==0.87)), line = -3, cex.main=3)
  398. ##title('QSAR CAVII test set', line = -3, cex.main=3)
  399. #axis(1,col.axis = "transparent", tck = 0.02)
  400. #axis(2,col.axis = "transparent", tck = 0.02)
  401.  
  402. #plot(qsar12$K[test], qsar12_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE)
  403. #abline(mod_qsar12t)
  404. #title(bquote(atop("CA XII", R^2==0.36)), line = -3, cex.main=3)
  405. ##title('QSAR CAXII test set', line = -3, cex.main=3)
  406.  
  407. #plot(qsar13$K[test], qsar13_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3)
  408. #abline(mod_qsar13t)
  409. #title(bquote(atop("CA XIII", R^2==0.68)), line = -3, cex.main=3)
  410. ##title('QSAR CAXIII test set', line = -3, cex.main=3)
  411. #axis(2,col.axis = "transparent", tck = 0.02)
  412.  
  413. #mtext(expression(paste(pK[d], ' (experimental)')), SOUTH<-1, line=2.5, cex=2, outer=TRUE)
  414. #mtext(expression(paste(pK[d], ' (calculated)')), WEST<-2, line=2.5, cex=2, outer=TRUE)
  415.  
  416. #dev.off()
  417.  
  418. #================================================
  419.  
  420. #dabar paskaiciuoti, test setui
  421. #pKd skirtumus eperm ir spejamus
  422. #padaryti ju koreliacijas ir grafikus
  423.  
  424. CA12_1_exprm<-qsar12$K[test]-qsar1$K[test]
  425. CA12_2_exprm<-qsar12$K[test]-qsar2$K[test]
  426. CA12_6_exprm<-qsar12$K[test]-qsar6$K[test]
  427. CA12_7_exprm<-qsar12$K[test]-qsar7$K[test]
  428. CA12_13_exprm<-qsar12$K[test]-qsar13$K[test]
  429. CA12_SUM_exprm<-CA12_1_exprm+CA12_2_exprm+CA12_6_exprm+CA12_7_exprm+CA12_13_exprm
  430.  
  431.  
  432. CA12_1_sp<-qsar12_sv[test]-qsar1_sv[test]
  433. CA12_2_sp<-qsar12_sv[test]-qsar2_sv[test]
  434. CA12_6_sp<-qsar12_sv[test]-qsar6_sv[test]
  435. CA12_7_sp<-qsar12_sv[test]-qsar7_sv[test]
  436. CA12_13_sp<-qsar12_sv[test]-qsar13_sv[test]
  437. CA12_SUM_sp<-CA12_1_sp+CA12_2_sp+CA12_6_sp+CA12_7_sp+CA12_13_sp
  438.  
  439. #dabar vel tas pats tik cia jau sumuota pabaigoj:
  440. mod_CA12_1t<-lm(CA12_1_sp~CA12_1_exprm)
  441. mod_CA12_2t<-lm(CA12_2_sp~CA12_2_exprm)
  442. mod_CA12_6t<-lm(CA12_6_sp~CA12_6_exprm)
  443. mod_CA12_7t<-lm(CA12_7_sp~CA12_7_exprm)
  444. mod_CA12_13t<-lm(CA12_13_sp~CA12_13_exprm)
  445. mod_CA12_SUMt<-lm(CA12_SUM_sp~CA12_SUM_exprm)
  446.  
  447. x<-cbind(CA12_1_sp, CA12_1_exprm)
  448. colnames(x)[2]<-"y"
  449. qsar_12_1_q2<-cvq2(x)
  450. print(qsar_12_1_q2)
  451.  
  452.  
  453. print(summary(mod_CA12_1t))
  454. print(summary(mod_CA12_2t))
  455. print(summary(mod_CA12_6t))
  456. print(summary(mod_CA12_7t))
  457. print(summary(mod_CA12_13t))
  458. print(summary(mod_CA12_SUMt))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement