Advertisement
Guest User

Untitled

a guest
Dec 4th, 2016
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.81 KB | None | 0 0
  1. library(foreign)
  2. library(boot)
  3.  
  4. datafile <- read.dta("~/Downloads/peace.dta")
  5. vars = c("wartype", "logdead", "wardur", "factnum", "factnum2", "trnsfcap", "develop",
  6. "exp", "decade","treaty","un2int","pbs2s3")
  7. df = datafile[vars]
  8. df = na.omit(df)
  9. attach(df)
  10.  
  11. #un2int: Was there a UN peace operation? 0 = no; 1 = yes.
  12. #pbs2s3: strict PB success
  13.  
  14. ##################
  15. set.seed(12345)
  16. #Randomly split the data into training and test set
  17. index <- sample(c(1:122), 122, replace = F)
  18. train <- index[1:61]
  19. test <- index[62:122]
  20. train_set <- df[train,]
  21. test_set <- df[test,]
  22.  
  23. #Generate the logistic regression model
  24. log_reg <- glm (pbs2s3 ~ wartype + logdead + wardur + factnum + factnum2 + trnsfcap +
  25. develop + exp + decade + treaty + un2int + I(wardur * un2int), family = binomial, data = train_set)
  26. #+ I(wardur * un2int)
  27. summary(log_reg)
  28. #plot(log_reg, pch=20)
  29.  
  30. #Use this logistic model to predict the test set
  31. predicted.prob <- predict(log_reg, test_set, type = "response")
  32. predicted=rep("Fail",61)
  33. predicted[which(predicted.prob >= 0.5)] = "Success"
  34.  
  35. #Code the results of test set for comparison
  36. test = rep("Fail",61)
  37. test[which(test_set$pbs2s3 == 1)] = "Success"
  38.  
  39. #Generate the confusion matrix to calculate the accuracy rate or predicting success
  40. table(predicted,test)
  41. #how many percent of the results in test set is correctly predicted
  42. mean(predicted == test)
  43.  
  44. #K-Fold cross validation
  45. #create a vector that stores the result errors
  46. cv.error.10=rep(0,10)
  47. for (i in 1:10){
  48. glm.fit=glm (pbs2s3 ~ wartype + logdead + wardur + factnum + factnum2 + trnsfcap +
  49. develop + exp + decade + treaty + un2int + I(wardur * un2int), family = binomial, data = df)
  50. cv.error.10[i]=cv.glm(df, glm.fit, K=10)$delta[1]}
  51. #the cross validation errors
  52. print(cv.error.10)
  53. #average test misclassification rate
  54. mean(cv.error.10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement