Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # pachete -----------------------------------------------------------------
- library(ROSE)
- library(memisc)
- library(ROCR)
- library(ggplot2)
- library(Hmisc)
- library(corrplot)
- library(redcapAPI)
- library(PerformanceAnalytics)
- library(support.CEs)
- library(AlgDesign)
- library(Matching)
- library(rgenoud)
- library(survival)
- library(rpart)
- library(rpart.plot)
- library(ROSE)
- library(ROCR)
- library(randomForest)
- # Regresie logistica -----------------------------------------------------
- a<- data.frame(as.data.set(spss.system.file("antrep.sav")))
- View(a)
- attach(a)
- table(a$knowent)
- table(country)
- country<-subset(a, country=="Germany")
- table(country$bstart)
- variabile<-which(names(country) %in%
- c("bstart","gemwork3", "gemhhinc", "gemeduc", "knowent", "suskill","fearfail", "gender", "age9c"))
- country<-country[,variabile]
- country<-na.omit(country)
- head(country)
- table(country$bstart)
- # functie regresie logistica GLM
- model1<-glm(bstart~ gemwork3+suskill + gemhhinc+ knowent+ gender+ age9c, data=country, family="binomial")
- summary(model1)
- exp(coef(model1))
- table(country$gemwork3)
- table(country$age9c)
- levels(country$age9c)[levels(country$age9c) %in% c("0-17","18-24","25-34")]<- "categorie1"
- levels(country$age9c)[levels(country$age9c) %in% c("35-44","45-54","55-64")]<- "categorie2"
- levels(country$age9c)[levels(country$age9c) %in% c("65-120")]<- "categorie3"
- table(country$age9c)
- model2<-glm(bstart~ gemwork3+suskill+knowent+gender +age9c,data=country,family = "binomial")
- summary(model2)
- exp(coef(model2))
- #curba ROC
- yhat<- predict(model2, type="response")
- pr<-prediction(yhat, country$bstart, label.ordering = NULL)
- perf<-performance(pr, "tpr", "fpr")
- windows()
- plot(perf, colorize= TRUE, lwd=5)
- performance(pr, "auc")
- #re-echilibram
- table(country$bstart)
- date.rose<-ROSE(bstart~.,data=country,p=0.4, seed = 123)$data
- table(date.rose$bstart)
- model<-glm(bstart~ gemwork3+suskill+knowent+gender +age9c,data=date.rose,family = "binomial")
- summary(model)
- exp(coef(model))
- yhat<- predict(model, type="response")
- pr<-prediction(yhat, date.rose$bstart, label.ordering = NULL)
- perf<-performance(pr, "tpr", "fpr")
- windows()
- plot(perf, colorize= TRUE, lwd=5)
- performance(pr, "auc")
- country1<-country[,c(1,2,7,8)]
- View(country1)
- View(country)
- perfmRose<- ROSE.eval(bstart~., data= country1,
- learner = glm, method.assess = "LKOCV", K=5,
- control.learner = list(family=binomial),
- control.rose=list(p=0.4),seed=123, trace=TRUE)
- summary(perfmRose)
- #asa scade, nu e ok
- # Metode Contrafactuale ---------------------------------------------------
- ?lalonde
- data("lalonde")
- View(lalonde)
- table(lalonde$treat)
- head(lalonde)
- mean(lalonde$re78[lalonde$treat== 1])
- mean(lalonde$re78[lalonde$treat== 0])
- diferenta<-mean(lalonde$re78[lalonde$treat== 1]) -mean(lalonde$re78[lalonde$treat== 0])
- diferenta
- scor<- glm(treat~ age + educ+ black+ hisp+ married+ nodegr, data= lalonde, family=binomial)
- summary(scor)
- efect<-Match(Y=lalonde$re78, Tr= lalonde$treat, X= scor$fitted, estimand = "ATT", M=1, replace= TRUE)
- summary(efect)
- MatchBalance(treat~ age + educ+ black+ hisp+ married+ nodegr, match.out= efect, data=lalonde, nboots = 200)
- qqplot(lalonde$age[efect$index.control], lalonde$age[efect$index.treated])
- abline(coef=c(0,1), col=2)
- qqplot(lalonde$educ[efect$index.control], lalonde$educ[efect$index.treated])
- abline(coef= c(0,1),col=4)
- scor<- glm(treat~ educ+ hisp+ married+ nodegr, data= lalonde, family=binomial)
- efect<-Match(Y=lalonde$re78, Tr= lalonde$treat, X= scor$fitted, estimand = "ATT", M=1, replace= TRUE)
- summary(efect)
- MatchBalance(treat~ educ+ hisp+ married+ nodegr, match.out= efect, data=lalonde, nboots = 200)
- library(rgenoud)
- x<-cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, lalonde$married, lalonde$nodegr)
- genetic<- GenMatch(Tr=lalonde$treat, X=x, pop.size = 1000)
- mgen1<-Match(Y=lalonde$re78, Tr=lalonde$treat, X=x, Weight.matrix = genetic)
- summary(mgen1)
- MatchBalance(treat~ age+ educ+ black+ hisp+ married+ nodegr, match.out= mgen1, data=lalonde, nboots = 200)
- # Analiza Conjoint --------------------------------------------------------
- ffd<-gen.factorial(c(2,2,2,2), varNames = c("Memorie","Rezolutie","Diagonala","Pret"),factors = "all")
- View(ffd)
- ffd<-ffd[-c(1,4,6,8,9,11,15,16),]
- set.seed(100)
- des<-rotation.design(candidate.array = ffd, attribute.names = list(Memorie=c("sub 32","peste 32"), Rezolutie=c("sub 12","peste 12"),
- Diagonala=c("sub 5.5", "peste 5.5"), Pret=c("sub 2000","peste 2000")),
- nalternatives = 2, nblocks=1, row.renames= FALSE, randomize= FALSE, seed=100)
- desmat<-make.design.matrix(choice.experiment.design = des, optout = FALSE,
- categorical.attributes = c("Memorie","Rezolutie","Diagonala","Pret"),
- unlabeled = TRUE)
- q<-questionnaire(choice.experiment.design = des)
- rasp<-read.table("RaspunsuriDM.txt",header=TRUE, sep= "\t")
- View(rasp)
- dataset<- make.dataset(respondent.dataset = rasp, choice.indicators = c("I1","I2","I3","I4","I5","I6","I7","I8"),
- design.matrix = desmat)
- rezultat<-clogit(RES~ASC+peste.32+peste.12+peste.5.5+peste.2000 + strata(STR),data=dataset)
- summary(rezultat)
- # Arbori ------------------------------------------------------------------
- #simpli
- library(rpart)
- library(rpart.plot)
- library(ROSE)
- library(ROCR)
- modelarb1<-rpart(bstart~ ., method="class", data=date.rose)
- summary(modelarb1)
- View(a)
- datarose<-ROSE(bstart~. , data=date.rose , p=0.4, seed=123)$data
- windows()
- prp(modelarb1, type=3, extra=106, under=TRUE, box.palette = "GyPu")
- plotcp(modelarb1)
- printcp(modelarb1)
- arboref<-prune(modelarb1, cp=0.012)
- arboref
- prp(arboref, type=3, extra=106, under=TRUE, box.palette = "GyPu")
- summary(arboref)
- esantion<- sample(1:nrow(date.rose), 1130) #1130, 80% din esantionul nostru de 1418 linii
- train<-date.rose[esantion,]
- test<-date.rose[-esantion,]
- p<- predict(modelarb1, test, type="prob")[,2] #prob - probabilitatea pe clasa 1 si pe clasa 2, [,2]- pentru a alege a 2a clasa, YES
- pred<-prediction(p, test$bstart)
- roc<-performance(pred, "tpr", "fpr")
- plot(roc, colorize = TRUE, lwd=5)
- performance(pred, "auc")
- #random forest
- rf<-randomForest(x=train[,-7],y=train$bstart, importance= TRUE, ntree=1000, replace= TRUE,
- xtest=test[,-7], ytest=test$bstart)
- rf$oob.times
- predictii<-rf$predicted
- rf$importance
- predictions=as.vector(rf$votes[,2])
- pred=prediction(predictions,train$bstart)
- perf_AUC=performance(pred,"auc")
- AUC= perf_AUC@y.values[[1]]
- perf_ROC=performance(pred,"tpr","fpr")
- plot(perf_ROC, main="ROC plot",colorize=TRUE, lwd=5)
- performance(pred,"auc")
- View(date.rose)
- # comparatie --------------------------------------------------------------
- testare<-rf$test
- pred1<-predict(model, test, type="response")
- pred2<-predict(modelarb1,test, type="prob")
- pred3<-testare$predicted
- preds<-cbind(pred1, pred2, pred3)
- View(preds)
- prednot<-prediction(preds, labels=cbind(test$bstart,test$bstart, test$bstart))
- perf<-performance(prednot,"tpr","fpr")
- plot(perf, col=as.list(1:3))
- legend("bottomright", legend=c("logistica","arbore","forest"),pch=19,col=c("black","red","yellow"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement