Advertisement
Guest User

Untitled

a guest
Jan 14th, 2019
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 7.24 KB | None | 0 0
  1.  
  2. # pachete -----------------------------------------------------------------
  3.  
  4. library(ROSE)
  5. library(memisc)
  6. library(ROCR)
  7. library(ggplot2)
  8. library(Hmisc)
  9. library(corrplot)
  10. library(redcapAPI)
  11. library(PerformanceAnalytics)
  12. library(support.CEs)
  13. library(AlgDesign)
  14. library(Matching)
  15. library(rgenoud)
  16. library(survival)
  17. library(rpart)
  18. library(rpart.plot)
  19. library(ROSE)
  20. library(ROCR)
  21. library(randomForest)
  22.  
  23. # Regresie logistica  -----------------------------------------------------
  24.  
  25. a<- data.frame(as.data.set(spss.system.file("antrep.sav")))
  26. View(a)
  27. attach(a)
  28. table(a$knowent)
  29. table(country)
  30. country<-subset(a, country=="Germany")
  31. table(country$bstart)
  32. variabile<-which(names(country) %in%
  33.                    c("bstart","gemwork3", "gemhhinc", "gemeduc", "knowent", "suskill","fearfail", "gender", "age9c"))
  34.  
  35. country<-country[,variabile]
  36. country<-na.omit(country)
  37. head(country)
  38. table(country$bstart)
  39. # functie regresie logistica GLM
  40. model1<-glm(bstart~ gemwork3+suskill + gemhhinc+ knowent+ gender+ age9c, data=country, family="binomial")
  41. summary(model1)
  42. exp(coef(model1))
  43. table(country$gemwork3)
  44.  
  45. table(country$age9c)
  46. levels(country$age9c)[levels(country$age9c) %in% c("0-17","18-24","25-34")]<- "categorie1"
  47. levels(country$age9c)[levels(country$age9c) %in% c("35-44","45-54","55-64")]<- "categorie2"
  48. levels(country$age9c)[levels(country$age9c) %in% c("65-120")]<- "categorie3"
  49. table(country$age9c)
  50. model2<-glm(bstart~ gemwork3+suskill+knowent+gender +age9c,data=country,family = "binomial")
  51. summary(model2)
  52. exp(coef(model2))
  53. #curba ROC
  54. yhat<- predict(model2, type="response")
  55. pr<-prediction(yhat, country$bstart, label.ordering = NULL)
  56. perf<-performance(pr, "tpr", "fpr")
  57. windows()
  58. plot(perf, colorize= TRUE, lwd=5)
  59. performance(pr, "auc")
  60.  
  61. #re-echilibram
  62. table(country$bstart)
  63. date.rose<-ROSE(bstart~.,data=country,p=0.4, seed = 123)$data
  64. table(date.rose$bstart)
  65. model<-glm(bstart~ gemwork3+suskill+knowent+gender +age9c,data=date.rose,family = "binomial")
  66. summary(model)
  67. exp(coef(model))
  68. yhat<- predict(model, type="response")
  69. pr<-prediction(yhat, date.rose$bstart, label.ordering = NULL)
  70. perf<-performance(pr, "tpr", "fpr")
  71. windows()
  72. plot(perf, colorize= TRUE, lwd=5)
  73. performance(pr, "auc")
  74.  
  75. country1<-country[,c(1,2,7,8)]
  76. View(country1)
  77. View(country)
  78. perfmRose<- ROSE.eval(bstart~., data= country1,
  79.                       learner = glm, method.assess = "LKOCV", K=5,
  80.                       control.learner = list(family=binomial),
  81.                       control.rose=list(p=0.4),seed=123, trace=TRUE)
  82. summary(perfmRose)
  83. #asa scade, nu e ok
  84.  
  85.  
  86. # Metode Contrafactuale ---------------------------------------------------
  87. ?lalonde
  88. data("lalonde")
  89. View(lalonde)
  90. table(lalonde$treat)
  91. head(lalonde)
  92. mean(lalonde$re78[lalonde$treat== 1])
  93. mean(lalonde$re78[lalonde$treat== 0])
  94. diferenta<-mean(lalonde$re78[lalonde$treat== 1]) -mean(lalonde$re78[lalonde$treat== 0])
  95. diferenta
  96. scor<- glm(treat~ age + educ+ black+ hisp+ married+ nodegr, data= lalonde, family=binomial)
  97. summary(scor)
  98. efect<-Match(Y=lalonde$re78, Tr= lalonde$treat, X= scor$fitted, estimand = "ATT", M=1, replace= TRUE)
  99. summary(efect)
  100. MatchBalance(treat~ age + educ+ black+ hisp+ married+ nodegr, match.out= efect, data=lalonde, nboots = 200)
  101. qqplot(lalonde$age[efect$index.control], lalonde$age[efect$index.treated])
  102. abline(coef=c(0,1), col=2)
  103. qqplot(lalonde$educ[efect$index.control], lalonde$educ[efect$index.treated])
  104. abline(coef= c(0,1),col=4)
  105. scor<- glm(treat~ educ+ hisp+ married+ nodegr, data= lalonde, family=binomial)
  106. efect<-Match(Y=lalonde$re78, Tr= lalonde$treat, X= scor$fitted, estimand = "ATT", M=1, replace= TRUE)
  107. summary(efect)
  108. MatchBalance(treat~ educ+ hisp+ married+ nodegr, match.out= efect, data=lalonde, nboots = 200)
  109. library(rgenoud)
  110. x<-cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, lalonde$married, lalonde$nodegr)
  111. genetic<- GenMatch(Tr=lalonde$treat, X=x, pop.size = 1000)
  112. mgen1<-Match(Y=lalonde$re78, Tr=lalonde$treat, X=x, Weight.matrix = genetic)
  113. summary(mgen1)
  114. MatchBalance(treat~ age+ educ+ black+ hisp+ married+ nodegr, match.out= mgen1, data=lalonde, nboots = 200)
  115. # Analiza Conjoint --------------------------------------------------------
  116.  
  117. ffd<-gen.factorial(c(2,2,2,2), varNames = c("Memorie","Rezolutie","Diagonala","Pret"),factors = "all")
  118. View(ffd)
  119. ffd<-ffd[-c(1,4,6,8,9,11,15,16),]
  120. set.seed(100)
  121. des<-rotation.design(candidate.array = ffd, attribute.names = list(Memorie=c("sub 32","peste 32"), Rezolutie=c("sub 12","peste 12"),
  122.                                                                    Diagonala=c("sub 5.5", "peste 5.5"), Pret=c("sub 2000","peste 2000")),
  123.                      nalternatives = 2, nblocks=1, row.renames= FALSE, randomize= FALSE, seed=100)
  124. desmat<-make.design.matrix(choice.experiment.design = des, optout = FALSE,
  125.                         categorical.attributes = c("Memorie","Rezolutie","Diagonala","Pret"),
  126.                         unlabeled = TRUE)
  127. q<-questionnaire(choice.experiment.design = des)
  128. rasp<-read.table("RaspunsuriDM.txt",header=TRUE, sep= "\t")
  129. View(rasp)
  130. dataset<- make.dataset(respondent.dataset = rasp, choice.indicators = c("I1","I2","I3","I4","I5","I6","I7","I8"),
  131.                        design.matrix = desmat)
  132. rezultat<-clogit(RES~ASC+peste.32+peste.12+peste.5.5+peste.2000 + strata(STR),data=dataset)
  133. summary(rezultat)
  134.  
  135. # Arbori ------------------------------------------------------------------
  136. #simpli
  137. library(rpart)
  138. library(rpart.plot)
  139. library(ROSE)
  140. library(ROCR)
  141.  
  142. modelarb1<-rpart(bstart~ ., method="class", data=date.rose)
  143. summary(modelarb1)
  144. View(a)
  145. datarose<-ROSE(bstart~. , data=date.rose , p=0.4, seed=123)$data
  146. windows()
  147. prp(modelarb1, type=3, extra=106, under=TRUE, box.palette = "GyPu")
  148. plotcp(modelarb1)
  149. printcp(modelarb1)
  150.  
  151. arboref<-prune(modelarb1, cp=0.012)
  152. arboref
  153. prp(arboref, type=3, extra=106, under=TRUE, box.palette = "GyPu")
  154.  
  155. summary(arboref)
  156.  
  157. esantion<- sample(1:nrow(date.rose), 1130) #1130, 80% din esantionul nostru de 1418 linii  
  158. train<-date.rose[esantion,]
  159. test<-date.rose[-esantion,]
  160.  
  161. p<- predict(modelarb1, test, type="prob")[,2] #prob - probabilitatea pe clasa 1 si pe clasa 2, [,2]- pentru a alege a 2a clasa, YES
  162. pred<-prediction(p, test$bstart)
  163. roc<-performance(pred, "tpr", "fpr")
  164. plot(roc, colorize = TRUE, lwd=5)
  165. performance(pred, "auc")
  166.  
  167.  
  168. #random forest
  169.  
  170. rf<-randomForest(x=train[,-7],y=train$bstart, importance= TRUE, ntree=1000, replace= TRUE,
  171.                  xtest=test[,-7], ytest=test$bstart)
  172. rf$oob.times
  173. predictii<-rf$predicted
  174. rf$importance
  175. predictions=as.vector(rf$votes[,2])
  176. pred=prediction(predictions,train$bstart)
  177. perf_AUC=performance(pred,"auc")
  178. AUC= perf_AUC@y.values[[1]]
  179. perf_ROC=performance(pred,"tpr","fpr")
  180. plot(perf_ROC, main="ROC plot",colorize=TRUE, lwd=5)
  181. performance(pred,"auc")
  182.  
  183. View(date.rose)
  184. # comparatie --------------------------------------------------------------
  185.  
  186. testare<-rf$test
  187. pred1<-predict(model, test, type="response")
  188. pred2<-predict(modelarb1,test, type="prob")
  189. pred3<-testare$predicted
  190.  
  191. preds<-cbind(pred1, pred2, pred3)
  192. View(preds)
  193. prednot<-prediction(preds, labels=cbind(test$bstart,test$bstart, test$bstart))
  194. perf<-performance(prednot,"tpr","fpr")
  195. plot(perf, col=as.list(1:3))
  196. legend("bottomright", legend=c("logistica","arbore","forest"),pch=19,col=c("black","red","yellow"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement