library(cvq2) library(leaps) # cia is karto jau pKd imta: visu_CA_pKd<-read.table("visu_CA_pKd.csv", sep=",", header=TRUE) qsar1<-data.frame(K<-visu_CA_pKd$CAI) qsar1$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) #random numbers: #> test <- sort(round(runif(10, 1, 40))) #> test test <- c(2, 3, 8, 9, 10, 16, 18, 19, 29, 35) #tada: train <- c(1, 4, 5, 6, 7, 11, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar1$K[train]~qsar1$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } good_deskr_nr<-NULL for (i in 1:1317 ) { if(r2[i]>0.3) { good_deskr_nr<-c(good_deskr_nr, i) } } #leaps<-regsubsets(qsar1$K[train]~qsar1$x[train,good_deskr_nr], data=qsar1, nvmax=3) #plot(leaps, scale="r2") qsar_1_train<-lm(qsar1$K[train] ~ qsar1$x[train, 283] + qsar1$x[train, 290] + qsar1$x[train, 314]) print(summary(qsar_1_train)) qsar_1_test_pred_values<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[test, 283]+coef(qsar_1_train)[3]*qsar1$x[test, 290]+coef(qsar_1_train)[4]*qsar1$x[test, 314] qsar_1_test<-lm(qsar_1_test_pred_values ~ qsar1$K[test]) print(summary(qsar_1_test)) x<-cbind(qsar1$x[train,c(283,290,314)], qsar1$K[train]) colnames(x)[4]<-"y" qsar_1_q2<-cvq2(x) print(qsar_1_q2) #========================================== qsar2<-data.frame(K<-visu_CA_pKd$CAII) qsar2$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar2$K[train]~qsar2$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } #kaip ir be leaps variantas: #tada imti didziausia is r2good ir salinti koreliacijas su kitais, rasti kuris nekoreliuoja #r2very_good istrintos koreliacijos su 1231 kuris labai geras sitame.... #arba r2good kas gero tada dar ziureti su leaps #cia panaudot sena gal geriau qsar_2_train<-lm(qsar2$K[train] ~ qsar2$x[train, 275] +qsar2$x[train, 1074] + qsar2$x[train, 1107]) print(summary(qsar_2_train)) qsar_2_test_pred_values<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[test, 275]+coef(qsar_2_train)[3]*qsar2$x[test, 1074]+coef(qsar_2_train)[4]*qsar2$x[test, 1107] qsar_2_test<-lm(qsar_2_test_pred_values ~ qsar2$K[test]) print(summary(qsar_2_test)) x<-cbind(qsar2$x[train,c(275,1074,1107)], qsar2$K[train]) colnames(x)[4]<-"y" qsar_2_q2<-cvq2(x) print(qsar_2_q2) #============================================= qsar6<-data.frame(K<-visu_CA_pKd$CAVI) qsar6$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar6$K[train]~qsar6$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } good_deskr_nr<-NULL for (i in 1:1317 ) { if(r2[i]>0.3) { good_deskr_nr<-c(good_deskr_nr, i) } } #leaps<-regsubsets(qsar6$K[train]~qsar6$x[train,good_deskr_nr[1:25]], data=qsar6, nvmax=3) #plot(leaps, scale="r2") qsar_6_train<-lm(qsar6$K[train] ~ qsar6$x[train, 305] + qsar6$x[train, 1091] + qsar6$x[train, 1205]) print(summary(qsar_6_train)) qsar_6_test_pred_values<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[test, 305]+coef(qsar_6_train)[3]*qsar6$x[test, 1091]+coef(qsar_6_train)[4]*qsar6$x[test, 1205] qsar_6_test<-lm(qsar_6_test_pred_values ~ qsar6$K[test]) print(summary(qsar_6_test)) x<-cbind(qsar6$x[train,c(305,1091,1205)], qsar6$K[train]) colnames(x)[4]<-"y" qsar_6_q2<-cvq2(x) print(qsar_6_q2) #=============================================== qsar7<-data.frame(K<-visu_CA_pKd$CAVII) qsar7$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar7$K[train]~qsar7$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } good_deskr_nr<-NULL for (i in 1:1317 ) { if(r2[i]>0.3) { good_deskr_nr<-c(good_deskr_nr, i) } } #leaps<-regsubsets(qsar7$K[train]~qsar7$x[train,good_deskr_nr[1:25]], data=qsar7, nvmax=3) #plot(leaps, scale="r2") qsar_7_train<-lm(qsar7$K[train] ~ qsar7$x[train, 113] + qsar7$x[train, 153] + qsar7$x[train, 283]) print(summary(qsar_7_train)) qsar_7_test_pred_values<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[test, 113]+coef(qsar_7_train)[3]*qsar7$x[test, 153]+coef(qsar_7_train)[4]*qsar7$x[test, 283] qsar_7_test<-lm(qsar_7_test_pred_values ~ qsar7$K[test]) print(summary(qsar_7_test)) x<-cbind(qsar7$x[train,c(113,153,283)], qsar7$K[train]) colnames(x)[4]<-"y" qsar_7_q2<-cvq2(x) print(qsar_7_q2) #=============================================== qsar13<-data.frame(K<-visu_CA_pKd$CAXIII) qsar13$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar13$K[train]~qsar13$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } good_deskr_nr<-NULL for (i in 1:1317 ) { if(r2[i]>0.3) { good_deskr_nr<-c(good_deskr_nr, i) } } #leaps<-regsubsets(qsar13$K[train]~qsar13$x[train,good_deskr_nr], data=qsar13, nvmax=3) #plot(leaps, scale="r2") qsar_13_train<-lm(qsar13$K[train] ~ qsar13$x[train, 365] + qsar13$x[train, 649] + qsar13$x[train, 807]) print(summary(qsar_13_train)) qsar_13_test_pred_values<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[test, 365]+coef(qsar_13_train)[3]*qsar13$x[test, 649]+coef(qsar_13_train)[4]*qsar13$x[test, 807] qsar_13_test<-lm(qsar_13_test_pred_values ~ qsar13$K[test]) print(summary(qsar_13_test)) x<-cbind(qsar13$x[train,c(365,649,807)], qsar13$K[train]) colnames(x)[4]<-"y" qsar_13_q2<-cvq2(x) print(qsar_13_q2) #=============================================== qsar12<-data.frame(K<-visu_CA_pKd$CAXII) qsar12$x<-as.matrix(read.table("edragon_descriptors_fix4.csv", sep=",", skip=1)) r2=NULL r2good=NULL for (i in 1:1317 ) { fit.single<-lm(qsar12$K[train]~qsar12$x[train,i]) r2[i]<-summary(fit.single)$r.squared if(r2[i]>0.3) { r2good[i]<-r2[i] } } good_deskr_nr<-NULL for (i in 1:1317 ) { if(r2[i]>0.3) { good_deskr_nr<-c(good_deskr_nr, i) } } #leaps<-regsubsets(qsar12$K[train]~qsar12$x[train,good_deskr_nr], data=qsar12, nvmax=3) #plot(leaps, scale="r2") qsar_12_train<-lm(qsar12$K[train] ~ qsar12$x[train, 101] + qsar12$x[train, 111] + qsar12$x[train, 275]) print(summary(qsar_12_train)) qsar_12_test_pred_values<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[test, 101]+coef(qsar_12_train)[3]*qsar12$x[test, 111]+coef(qsar_12_train)[4]*qsar12$x[test, 275] qsar_12_test<-lm(qsar_12_test_pred_values ~ qsar12$K[test]) print(summary(qsar_12_test)) x<-cbind(qsar12$x[train,c(101,111,275)], qsar12$K[train]) colnames(x)[4]<-"y" qsar_12_q2<-cvq2(x) print(qsar_12_q2) #=============================================== #grafiko asys nuo/iki: minK<--7.4 maxK<-1.8 qsar1_sv<-coef(qsar_1_train)[1]+coef(qsar_1_train)[2]*qsar1$x[, 283]+coef(qsar_1_train)[3]*qsar1$x[, 290]+coef(qsar_1_train)[4]*qsar1$x[, 314] qsar2_sv<-coef(qsar_2_train)[1]+coef(qsar_2_train)[2]*qsar2$x[, 275]+coef(qsar_2_train)[3]*qsar2$x[, 1074]+coef(qsar_2_train)[4]*qsar2$x[, 1107] qsar6_sv<-coef(qsar_6_train)[1]+coef(qsar_6_train)[2]*qsar6$x[, 305]+coef(qsar_6_train)[3]*qsar6$x[, 1091]+coef(qsar_6_train)[4]*qsar6$x[, 1205] qsar7_sv<-coef(qsar_7_train)[1]+coef(qsar_7_train)[2]*qsar7$x[, 113]+coef(qsar_7_train)[3]*qsar7$x[, 153]+coef(qsar_7_train)[4]*qsar7$x[, 283] qsar13_sv<-coef(qsar_13_train)[1]+coef(qsar_13_train)[2]*qsar13$x[, 365]+coef(qsar_13_train)[3]*qsar13$x[, 649]+coef(qsar_13_train)[4]*qsar13$x[, 807] qsar12_sv<-coef(qsar_12_train)[1]+coef(qsar_12_train)[2]*qsar12$x[, 101]+coef(qsar_12_train)[3]*qsar12$x[, 111]+coef(qsar_12_train)[4]*qsar12$x[, 275] #grafikui<-cbind(qsar$K, qsar1, qsar2, qsar3) #colnames(grafikui)[1]<-"pKd" mod_qsar1<-lm(qsar1$K[train]~qsar1_sv[train]) mod_qsar2<-lm(qsar2$K[train]~qsar2_sv[train]) mod_qsar6<-lm(qsar6$K[train]~qsar6_sv[train]) mod_qsar7<-lm(qsar7$K[train]~qsar7_sv[train]) mod_qsar13<-lm(qsar13$K[train]~qsar13_sv[train]) mod_qsar12<-lm(qsar12$K[train]~qsar12_sv[train]) mod_qsar1t<-lm(qsar1$K[test]~qsar1_sv[test]) mod_qsar2t<-lm(qsar2$K[test]~qsar2_sv[test]) mod_qsar6t<-lm(qsar6$K[test]~qsar6_sv[test]) mod_qsar7t<-lm(qsar7$K[test]~qsar7_sv[test]) mod_qsar13t<-lm(qsar13$K[test]~qsar13_sv[test]) mod_qsar12t<-lm(qsar12$K[test]~qsar12_sv[test]) png("Edita2013_visom_CA_train.png", width=600, height=900) par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2) plot(qsar1$K[train], qsar1_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n") abline(mod_qsar1) title('QSAR CAI train set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) plot(qsar2$K[train], qsar2_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n") abline(mod_qsar2) title('QSAR CAII train set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) axis(2,col.axis = "transparent", tck = 0.02) plot(qsar6$K[train], qsar6_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n") abline(mod_qsar6) title('QSAR CAVI train set', line = -3, cex.main=3) plot(qsar7$K[train], qsar7_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n") abline(mod_qsar7) title('QSAR CAVII train set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) axis(2,col.axis = "transparent", tck = 0.02) plot(qsar12$K[train], qsar12_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE) abline(mod_qsar12) title('QSAR CAXII train set', line = -3, cex.main=3) plot(qsar13$K[train], qsar13_sv[train], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3) abline(mod_qsar13) title('QSAR CAXIII train set', line = -3, cex.main=3) axis(2,col.axis = "transparent", tck = 0.02) mtext('pKd difference (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE) mtext('pKd difference (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE) dev.off() png("Edita2013_visom_CA_test.png", width=600, height=900) par(mfrow=c(3,2), mar=c(1,1,0,0), oma=c(6,6,0,0), cex.axis=2) plot(qsar1$K[test], qsar1_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n") abline(mod_qsar1t) title('QSAR CAI test set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) plot(qsar2$K[test], qsar2_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), xlab=NA, ylab=NA, xaxt="n", yaxt="n") abline(mod_qsar2t) title('QSAR CAII test set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) axis(2,col.axis = "transparent", tck = 0.02) plot(qsar6$K[test], qsar6_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n") abline(mod_qsar6t) title('QSAR CAVI test set', line = -3, cex.main=3) plot(qsar7$K[test], qsar7_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE, xaxt="n", yaxt="n") abline(mod_qsar7t) title('QSAR CAVII test set', line = -3, cex.main=3) axis(1,col.axis = "transparent", tck = 0.02) axis(2,col.axis = "transparent", tck = 0.02) plot(qsar12$K[test], qsar12_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), ann=FALSE) abline(mod_qsar12t) title('QSAR CAXII test set', line = -3, cex.main=3) plot(qsar13$K[test], qsar13_sv[test], tck = 0.02, pch=15, cex=3, xlim=c(minK, maxK), ylim=c(minK, maxK), yaxt="n", cex.lab=3) abline(mod_qsar13t) title('QSAR CAXIII test set', line = -3, cex.main=3) axis(2,col.axis = "transparent", tck = 0.02) mtext('pKd difference (experimental)', SOUTH<-1, line=2.5, cex=2, outer=TRUE) mtext('pKd difference (calculated)', WEST<-2, line=2.5, cex=2, outer=TRUE) dev.off()