Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(foreign)
- library(boot)
- datafile <- read.dta("~/Downloads/peace.dta")
- vars = c("wartype", "logdead", "wardur", "factnum", "factnum2", "trnsfcap", "develop",
- "exp", "decade","treaty","un2int","pbs2s3")
- df = datafile[vars]
- df = na.omit(df)
- attach(df)
- #un2int: Was there a UN peace operation? 0 = no; 1 = yes.
- #pbs2s3: strict PB success
- ##################
- set.seed(12345)
- #Randomly split the data into training and test set
- index <- sample(c(1:122), 122, replace = F)
- train <- index[1:61]
- test <- index[62:122]
- train_set <- df[train,]
- test_set <- df[test,]
- #Generate the logistic regression model
- log_reg <- glm (pbs2s3 ~ wartype + logdead + wardur + factnum + factnum2 + trnsfcap +
- develop + exp + decade + treaty + un2int + I(wardur * un2int), family = binomial, data = train_set)
- #+ I(wardur * un2int)
- summary(log_reg)
- #plot(log_reg, pch=20)
- #Use this logistic model to predict the test set
- predicted.prob <- predict(log_reg, test_set, type = "response")
- predicted=rep("Fail",61)
- predicted[which(predicted.prob >= 0.5)] = "Success"
- #Code the results of test set for comparison
- test = rep("Fail",61)
- test[which(test_set$pbs2s3 == 1)] = "Success"
- #Generate the confusion matrix to calculate the accuracy rate or predicting success
- table(predicted,test)
- #how many percent of the results in test set is correctly predicted
- mean(predicted == test)
- #K-Fold cross validation
- #create a vector that stores the result errors
- cv.error.10=rep(0,10)
- for (i in 1:10){
- glm.fit=glm (pbs2s3 ~ wartype + logdead + wardur + factnum + factnum2 + trnsfcap +
- develop + exp + decade + treaty + un2int + I(wardur * un2int), family = binomial, data = df)
- cv.error.10[i]=cv.glm(df, glm.fit, K=10)$delta[1]}
- #the cross validation errors
- print(cv.error.10)
- #average test misclassification rate
- mean(cv.error.10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement