#QSAR su intrinciniais parametrais # # VR #pradiniai duomenys: deskriptoriai<-as.matrix(read.csv(file='sutvarkyti_descriptoriai.csv')) matavimai<-read.csv(file='Intinciniai_matavimai.csv') #random numbers: # test <- sort(round(runif(8, 1, 28))) # test #test <- c(1, 9, 10, 13, 14, 19, 22, 26) #tada: #train <- c(2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28) # gal geriau tiesiog ismesti 3 nefuorintus # test <- sort(round(runif(5, 1, 25))) test <- c(8, 11, 13, 17, 23) train <- c(1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25) library(cvq2) #Q2TEST f-ja kaip ir PHASE Q2 lygiai tokia pati q2test<-function(activity, predicted_activity) { prediction_error_sq<-(predicted_activity-activity)^2 avg_activity<-mean(activity) sigma_y_sq<-(activity-avg_activity)^2 q2test_val<-1-sum(prediction_error_sq)/sum(sigma_y_sq) return(q2test_val) } #Genetic Algorithm (GA) for Variable Selection from High-Dimensional Data: library(gaselect) ctrl <- genAlgControl(populationSize = 5000, numGenerations = 750, minVariables = 3, maxVariables = 4, verbosity = 1) evaluatorRDCV <- evaluatorPLS(numReplications = 2, innerSegments = 5, outerSegments = 3, numThreads = 3) #GintrCAI: #cia su greitesniu nustatymu buvo...:"WD.unity", "E1v","maxssCH2", "TDB5u" #resultRDCV.GintrCAI <- genAlg(matavimai$GintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123) #subsets(resultRDCV.GintrCAI, 1:5) qsar_1_train<-lm(matavimai$GintrCAI[train] ~ deskriptoriai[train, "WD.unity"] + deskriptoriai[train, "E1v"] + deskriptoriai[train, "maxssCH2"] + deskriptoriai[train, "TDB5u"]) print(summary(qsar_1_train)) qsar_1_test_pred_values<-coef(qsar_1_train)[1] + coef(qsar_1_train)[2]*deskriptoriai[test, "WD.unity"] + coef(qsar_1_train)[3]*deskriptoriai[test, "E1v"] + coef(qsar_1_train)[4]*deskriptoriai[test, "maxssCH2"] + coef(qsar_1_train)[5]*deskriptoriai[test, "TDB5u"] qsar_1_test<-lm(qsar_1_test_pred_values ~ matavimai$GintrCAI[test]) print(summary(qsar_1_test)) x<-cbind(deskriptoriai[train, "WD.unity"], deskriptoriai[train, "E1v"], deskriptoriai[train,"maxssCH2"], deskriptoriai[train, "TDB5u"], matavimai$GintrCAI[train]) colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y') qsar_1_q2<-cvq2(x) print(qsar_1_q2) print(q2test(matavimai$GintrCAI[test], qsar_1_test_pred_values)) #HintrCAI: #resultRDCV.HintrCAI <- genAlg(matavimai$HintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123) #subsets(resultRDCV.HintrCAI, 1:5) qsar_2_train<-lm(matavimai$HintrCAI[train] ~ deskriptoriai[train, "Wlambda2.unity"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MATSm4"] + deskriptoriai[train, "MoRSEV15"]) print(summary(qsar_2_train)) qsar_2_test_pred_values<-coef(qsar_2_train)[1] + coef(qsar_2_train)[2]*deskriptoriai[test, "Wlambda2.unity"] + coef(qsar_2_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_2_train)[4]*deskriptoriai[test, "MATSm4"] + coef(qsar_2_train)[5]*deskriptoriai[test, "MoRSEV15"] qsar_2_test<-lm(qsar_2_test_pred_values ~ matavimai$HintrCAI[test]) print(summary(qsar_2_test)) x<-cbind(deskriptoriai[train, "Wlambda2.unity"], deskriptoriai[train, "S9"], deskriptoriai[train,"MATSm4"], deskriptoriai[train, "MoRSEV15"], matavimai$HintrCAI[train]) colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y') qsar_2_q2<-cvq2(x) print(qsar_2_q2) print(q2test(matavimai$HintrCAI[test], qsar_2_test_pred_values)) #TsintrCAI: #dar ilgesni skaiciavimai ctrl <- genAlgControl(populationSize = 50000, numGenerations = 750, minVariables = 3, maxVariables = 4, verbosity = 1) #resultRDCV.TsintrCAI <- genAlg(matavimai$TsintrCAI[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123) #subsets(resultRDCV.TsintrCAI, 1:5) qsar_3_train<-lm(matavimai$TsintrCAI[train] ~ deskriptoriai[train, "MOMI.Z"] + deskriptoriai[train, "S9"] + deskriptoriai[train, "MoRSEV15"] + deskriptoriai[train, "hmin"]) print(summary(qsar_3_train)) qsar_3_test_pred_values<-coef(qsar_3_train)[1] + coef(qsar_3_train)[2]*deskriptoriai[test, "MOMI.Z"] + coef(qsar_3_train)[3]*deskriptoriai[test, "S9"] + coef(qsar_3_train)[4]*deskriptoriai[test, "MoRSEV15"] + coef(qsar_3_train)[5]*deskriptoriai[test, "hmin"] qsar_3_test<-lm(qsar_3_test_pred_values ~ matavimai$TsintrCAI[test]) print(summary(qsar_3_test)) x<-cbind(deskriptoriai[train, "MOMI.Z"], deskriptoriai[train, "S9"], deskriptoriai[train,"MoRSEV15"], deskriptoriai[train, "hmin"], matavimai$TsintrCAI[train]) colnames(x)<-c('x1', 'x2', 'x3', 'x4', 'y') qsar_3_q2<-cvq2(x) print(qsar_3_q2) print(q2test(matavimai$TsintrCAI[test], qsar_3_test_pred_values)) #GintrCAII: #dar kitokie nustatymai: ctrl <- genAlgControl(populationSize = 50000, numGenerations = 1000, minVariables = 2, maxVariables = 3, verbosity = 1) #Tikriausiai, kad neoverfitint tai imti maziau deskriptoriu, jei jau su tiek iseina tai ok matyt. #resultRDCV.GintrCAII <- genAlg(matavimai$GintrCAII[train], deskriptoriai[train,], control = ctrl, evaluator = evaluatorRDCV, seed = 123) #subsets(resultRDCV.GintrCAII, 1:5) qsar_4_train<-lm(matavimai$GintrCAII[train] ~ deskriptoriai[train, "RPCS"] + deskriptoriai[train, "nRotBt"] + deskriptoriai[train, "RDF55s"]) print(summary(qsar_4_train)) qsar_4_test_pred_values<-coef(qsar_4_train)[1] + coef(qsar_4_train)[2]*deskriptoriai[test, "RPCS"] + coef(qsar_4_train)[3]*deskriptoriai[test, "nRotBt"] + coef(qsar_4_train)[4]*deskriptoriai[test, "RDF55s"] qsar_4_test<-lm(qsar_4_test_pred_values ~ matavimai$GintrCAII[test]) print(summary(qsar_4_test)) x<-cbind(deskriptoriai[train, "RPCS"], deskriptoriai[train, "nRotBt"], deskriptoriai[train,"RDF55s"], matavimai$GintrCAII[train]) colnames(x)<-c('x1', 'x2', 'x3', 'y') qsar_4_q2<-cvq2(x) print(qsar_4_q2) print(q2test(matavimai$GintrCAII[test], qsar_4_test_pred_values))