Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- setwd("insert working directory here")
- library(doBy)
- library(party)
- x<-read.csv("seckillingscity.csv")
- y<-read.csv("y.csv")
- str(x) #checking to see if all the data is in the correct format
- str(y)
- y$injured[is.na(y$injured)]<-0
- y$killed[is.na(y$killed)]<-0
- y$Province[566]<-"Sindh"
- residfitplot <- function (model, col="black") {
- f<-fitted(model)
- r<-residuals(model, type='pearson')
- plot(f, r, col=col, main='Residuals vs. Fitted')
- L1<-loess(r~f)
- fvec = seq(min(f),max(f),length.out=150)
- lines(fvec,predict(L1,fvec),col=2)
- }
- #scale-location plot for GLMMADMB object
- locscaleplot <- function(model,col="black") {
- f <- fitted(model)
- r <- sqrt(abs(residuals(model, type='pearson')))
- plot(f,r,col=col, main='Scale-Location Plot')
- L1 <- loess(r~f)
- fvec = seq(min(f),max(f),length.out=150)
- lines(fvec,predict(L1,fvec),col=2)
- }
- #summing the attack and killed data for each year
- sumyear1<-summaryBy(killed + injured ~ Year, data = subset(y, tag == "Shia killing"
- | tag == "Shia blast" | tag == "sectarian conflict" | tag == "sectarian conflict"),
- FUN = function(x) sum(x))
- attacks1<-summaryBy(Date ~ Year, data = subset(y, tag == "Shia killing" | tag == "Shia blast"
- | tag == "sectarian conflict" | tag == "sectarian conflict" & Province != "Afghanistan"),
- FUN = function(x) length(x))
- # @@ RGEDIT LANDMARK @@: combining attack and killed/injured data
- sumyearN<-cbind(sumyear1, attacks1[,2])
- colnames(sumyearN)<-c("year", "killed", "injured", "attacks")
- sumyearN[14,]<-cbind(2014, 222, 289, 54) #adding data from Pakistan fact sheet
- #looking at the means, median and the IQR
- summary(sumyearN)
- # @@ RGEDIT LANDMARK @@: subset data by province
- p<-summaryBy(killed + injured ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
- FUN = function(x) sum(x))
- pattacks<-summaryBy(Date ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
- FUN = function(x) length(x))
- province<-cbind(p, pattacks[,3])
- colnames(province)<-c("Year", "Province", "Killed", "Injured", "Attacks")
- # @@ RGEDIT LANDMARK @@: subset data by city
- c<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
- FUN = function(x) sum(x))
- city<-summaryBy(killed + injured ~ City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
- FUN = function(x) sum(x))
- city2<-city[order(city$killed), ]
- colnames(c)<-c("Year", "City", "Killed", "Injured")
- cattacks<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
- FUN = function(x) length(x))
- city<-cbind(c, cattacks[,3])
- colnames(city)<-c("Year", "City", "Killed", "Injured", "Attacks")
- # @@ RGEDIT LANDMARK @@: General anti-Shia violence trends
- #plotting the relationship between attacks and year
- pdf("Attacksperyear.pdf")
- plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
- pch = 16, cex = 1.5, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3", lwd = 3)
- dev.off()
- #cleary, not very normal. resid fit plot looks awful and scale-location plots shows that variance is not consistent, but increasing)
- #let's try a poisson regression, because this is count data and count data is generally not poisson distributed
- glm1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'poisson', link = 'log')
- glm1.1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom1', link = 'log')
- glm1.2<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom', link = 'log')
- ICtab(glm1, glm1.1, glm1.2, type = "AIC")
- #quasi poisson is best model
- #model selection via log likelihood ratio tests
- glm1.1<-glm(attacks ~ year, data = sumyearN, family = quasipoisson('log'))
- plot(glm1.1)
- #residual versus fitted plot is not bad, but scale location plot is not good
- #calculating overdispersion for the relationship
- phi.hat=sum((residuals(glm1.1, type = "pearson"))^2)/glm1.1$df.residual
- #phi.hat is 15.17. The model is very overdispersed.
- #Switching to negative binomial
- glm1.21<-glmmadmb(attacks ~ 1, data = sumyearN, family = 'nbinom', link = 'log')
- anova(glm1.2, glm1.21)
- #year is important predictor of attack
- #plotting model onto plot of attack versus year
- residfitplot(glm1.2) #looks okay
- locscaleplot(glm1.2) #looks okay
- disperse(glm1.2, data = sumyearN)
- #dispersion is 1.33, model is not over-dispersed.
- summary(glm1.2)
- #year is a significant predictor of attacks.
- svg("attacksperyear.svg")
- plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
- pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
- pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
- data=data.frame("attacks" = 0, "year" = pred)
- curve<-predict(glm1.2, data, type = "response")
- lines(pred, curve, lwd = 3, lty =2, col = "black")
- dev.off()
- #looks ok
- #siginficant relationship according to summary
- #making regression tree to examine threshold in attacks in data
- library(party)
- tree1<-ctree(attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
- png("attacktree.png")
- plot(tree1)
- dev.off()
- #shifts in attacks per year before and after 2007
- #exmaining relationship between killed and year
- plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3")
- #seems to be a positive relationship between year and number killed
- #Trying negative binomial GLMs
- glm2<-glm(killed ~ year, family = poisson(link ='log'), data = sumyearN)
- glm2.1<-glmmadmb(killed ~ year, family = 'nbinom', data = sumyearN)
- glm2.2<-glmmadmb(killed ~ year, family = "nbinom1", link = "log", data = sumyearN)
- ICtab(glm2, glm2.1, glm2.2, type = "AICc")
- #negative binomial is the best model
- #model selection
- glm2.21<-glmmadmb(killed ~ 1, family = "nbinom1", link = "log", data = sumyearN)
- anova(glm2.21, glm2.1)
- #year is an important predictor of the number of people killed
- residfitplot(glm2.2) #looks okay
- locscaleplot(glm2.2) #looks okay
- disperse(glm2.2, data = sumyearN)
- #overdispersed 7.88
- #switching to negative binomial
- glm2.11<-glmmadmb(killed ~ 1, family = "nbinom", link = "log", data = sumyearN)
- anova(glm2.1, glm2.11)
- #year is an important predictor
- residfitplot(glm2.1) #looks okay
- locscaleplot(glm2.1) #looks okay
- disperse(glm2.1, data = sumyearN)
- #underdispersed, 0.70
- #plotting model onto data
- svg("killedperyear.svg")
- plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", type = 'l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd =3)
- pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
- data=data.frame("killed" = 0, "year" = pred)
- curve<-predict(glm2.1, data, type = "response")
- lines(pred, curve, lwd = 3, lty =2, col = "black")
- dev.off()
- #Making a regression tree to examine thresholds in data
- tree2<-ctree(killed ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
- png("killedtree.png")
- plot(tree1)
- dev.off()
- #shift before and after 2007
- #killed per attack
- #Plotting killed per attack for each year
- plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", type ='l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
- lm2<-glm(killed/attacks ~ year, data = sumyearN)
- #looking at residual versus fitted and scale-location plots.
- #Residual fitted is ok. Scale location plot is not great
- plot(lm2)
- #using the dredge function in MuMIn package to determine the best model
- library(MuMIn)
- dredge(lm2)
- #lookis like null model is best
- #since the plot looks like a non-linear relationship switching to regression trees
- tree3<-ctree(killed/attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
- png("killedattack.png")
- plot(tree3)
- dev.off()
- #again 2007
- svg("killedattacksperyear.svg")
- plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3")
- pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
- data=data.frame("killed" = 0, "year" = pred)
- curve<-predict(glm2.1, data, type = "response")
- lines(pred, curve, lwd = 1, col = "black")
- dev.off()
- # @@ RGEDIT LANDMARK @@: Province Analysis
- #making a plot of attacks by province
- png("AttacksProvince.png")
- plot(Attacks ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
- lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
- lines(Attacks ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
- lines(Attacks ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
- lines(Attacks ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
- lines(Attacks ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
- lines(Attacks ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
- lines(Attacks ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
- legend(2004, 150, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
- dev.off()
- #Analyzing Province specific attack data
- glmp1<-glmmadmb(Attacks ~ Year, data = province, family = 'poisson', link = 'log')
- glmp1.1<-glmmadmb(Attacks ~ Province, data = province, family = 'nbinom1', link = 'log')
- glmp1.2<-glmmadmb(Attacks ~ Year, data = subset(province, Province != "Afghanistan"|Province != "Islamabad"), family = 'nbinom', link = 'log')
- AIC(glmp1, glmp1.1, glmp1.2)
- #negative binomial is the best model
- #models also run with Province and Year, but can;t run them together because of lack of replicates
- #log likelihood ratio tests
- glmp1.3<-update(glmp1.2, .~. - Province)
- anova(glmp1.2, glmp1.3)
- #Province is significant
- summary(glmp1.2)
- #All provinces are different from each other in number of attacks. Sindh and Baluchistan lead attacks
- residfitplot(glmp1.2)
- locscaleplot(glmp1.2)
- #conditional inference tree
- ptree1<-ctree(Attacks ~ Year*Province, data = province)
- predict(ptree1)
- #killed per province
- png("Killedprovince.png")
- plot(Killed ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = " Total Number of People Killed", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,550))
- lines(killed ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
- lines(Killed ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
- lines(Killed ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
- lines(Killed ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
- lines(Killed ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
- lines(Killed ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
- lines(Killed ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
- legend(2002, 550, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
- dev.off()
- #
- glmkp1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'poisson', link = 'log')
- glmkp1.1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom1', link = 'log')
- glmkp1.2<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
- AIC(glmkp1, glmkp1.1, glmkp1.2)
- #quasi binomial is the best
- #log likelihood ratio tests
- glmkp1.3<-update(glmkp1.1, .~. - Province:Year)
- anova(glmkp1.1, glmkp1.3)
- #Interaction is not significant
- glmkp1.4<-update(glmkp1.3, .~. - Province)
- anova(glmkp1.3, glmkp1.4)
- #Province is significant
- glmkp1.5<-update(glmkp1.3, .~. - Year)
- anova(glmkp1.3, glmkp1.5)
- #Year is significant
- residfitplot(glmkp1.3)
- locscaleplot(glmkp1.3)
- #plots look good
- summary(glmkp1.1)
- #Gilgit Baltistan is different from rest
- #Increase in killed over time
- #killed per attack - province
- kpap<-province$Killed/province$Attacks
- province2<-cbind(province, kpap)
- kpac<-city$Killed/city$Attacks
- city2<-cbind(city, kpac)
- png("killedattackprovince.png")
- plot(kpap ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,60))
- lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
- lines(kpap ~ Year, data = subset(province2, Province == "Sindh"), col = '#97BDAF', lwd = 3)
- lines(kpap ~ Year, data = subset(province2, Province == "Punjab"), col = '#C453A4', lwd = 3)
- lines(kpap ~ Year, data = subset(province2, Province == "Baluchistan"), col = '#C46643', lwd =3)
- lines(kpap ~ Year, data = subset(province2, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
- lines(kpap ~ Year, data = subset(province2, Province == "FATA"), col = '#54433D', lwd = 3)
- lines(kpap ~ Year, data = subset(province2, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
- legend(2001, 58, c("Total","Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("Black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
- dev.off()
- boxplot(kpap ~ Province, data = subset(province2, Province != "Afghanistan" & Province != "Sndh"), las = 2)
- #running models
- kpap1<-lm(log(kpap + 1) ~ Year*Province, data = province2)
- dredge(kpap1)
- #Province only is the best model
- plot(kpap1)
- #not so bad
- summary(kpap1)
- #conditional inference trees
- kpaptree1<-ctree(kpap ~ Year*Province, data = province2)
- # @@ RGEDIT LANDMARK @@: City Analysis
- #making a plot of attacks by most common cities in dataset
- #Karachi, Quetta, Gilgit, Peshawar, Dera Ismail Khan, Hangu,
- city[order(city$Year, -city$Attacks, -city$Killed),]
- png("AttacksCity.png")
- plot(Attacks ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
- lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
- lines(Attacks ~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
- lines(Attacks ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
- lines(Attacks ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
- lines(Attacks ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
- legend(2002, 150, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("Black","#97BDAF", "#C453A4", "#C46643","#94C053"), pch = 19, cex = 1.25)
- dev.off()
- #model
- #unable to run Year*City model because of lack of replicates. City only model run
- glmc1<-glmmadmb(Attacks ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'poisson', link = 'log')
- glmc1.1<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
- glmc1.2<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
- AIC(glmc1, glmc1.1, glmc1.2)
- #negative binomial model is the best
- #log likelihood ratio tests
- glmc1.3<-update(glmc1.2, .~. - City*Year)
- anova(glmc1.2, glmc1.3)
- #City is significant
- residfitplot(glmc1.2)
- summary(glmc1.2)
- #Attack rates differ per year per city
- residfitplot(glmc1.2)
- locscaleplot(glmc1.2)
- #plots look good
- #conditional inference trees
- ptree1<-ctree(Attacks ~ Year*City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
- predict(ptree1)
- #killed per city
- png("killedcity.png")
- plot(Killed ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,600))
- lines(killed ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
- lines(Killed~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
- lines(Killed ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
- lines(Killed ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
- lines(Killed ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
- legend(2001, 600, c("Total", "Karachi","Quetta", "Peshwara", "Dera Ismail Khan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D"), pch = 19, cex = 1.25)
- dev.off()
- #GLMs
- glmkc1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'poisson', link = 'log')
- glmkc1.1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
- glmkc1.2<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
- AIC(glmkc1, glmkc1.1, glmkc1.2)
- #quasi binomial is the best
- #log likelihood ratio tests
- glmkc1.3<-update(glmkc1.1, .~. - City:Year)
- anova(glmkc1.1, glmkc1.3)
- #Interaction is not significant
- glmkc1.4<-update(glmkc1.3, .~. - City)
- anova(glmkc1.3, glmkc1.4)
- #City is significantly different
- glmkc1.5<-update(glmkc1.3, .~. - Year)
- anova(glmkc1.3, glmkc1.5)
- #year is significant
- residfitplot(glmkc1.3)
- summary(glmkc1.3)
- #Increasing attacks per year. Quetta is marginally significantly different from all other cities
- tree2<-ctree(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan", control = ctree_control(teststat = "quad", testtype = "MonteCarlo")
- #Killed don't differ per year per city
- residfitplot(glmkp1.1)
- locscaleplot(glmkp1.1)
- #interaction is not significant
- png("killedattackcity.png")
- plot(kpac ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,60))
- lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
- lines(kpac~ Year, data = subset(city2, City == "Karachi"), col = '#97BDAF', lwd = 3)
- lines(kpac ~ Year, data = subset(city2, City == "Quetta"), col = '#C453A4', lwd = 3)
- lines(kpac ~ Year, data = subset(city2, City == "Peshawar"), col = '#C46643', lwd =3)
- lines(kpac ~ Year, data = subset(city2, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
- legend(2001, 60, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
- dev.off()
- #models
- kpac1<-lm(log(kpac+1) ~ Year*City, data = subset(city2, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
- dredge(kpac1)
- #Null model is best, followed by year
- plot(kpac1)
- #plots look fine
- #conditional inference trees
- library(party)
- tree1<-ctree(Attacks ~ Year + Province, data = province, control = ctree_control(teststat = "quad", testtype = "MonteCarlo"))
- plot(tree1)
- tree2<-ctree(Attacks ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
- #across these cities violence picks up after 2011
- tree3<-ctree(Killed ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
- plot(tree3)
- tree4<-ctree(Killed ~ Year + Province, data = province, control = ctree_control(teststat = "quad"))
- plot(tree4)
- #same as attacks, pre 2011 Baluchistan/FATA/Kurram Region highest, followed by Sindh/Punjab and then Gilgit Baltistan/Islamabad. After 2011, number of Shias killed across all provinces/regions rise.
- #Examining province specific trees
- tree4<-ctree(Attacks ~ Province, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "Khyber Pakhtunkhwa"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic")))
- boxplot(Attacks ~ Province, data = province)
- plot(tree4)
- tree4.1<-ctree(Killed ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic"))
- x11()
- plot(tree4.1)
- tree4.2<-ctree(Killed/Year ~ Province, data = province, control = ctree_control(minsplit = 3))
- plot(tree4.2)
- #Nothing is important. No specific differences between provinces
- #Examining cities
- x11()
- tree5<-ctree(Attacks ~ City + Year, data = city, control = ctree_control(teststat = "quad", testtype = "Univariate"))
- plot(tree5)
- summary(tree5)
- #Attacks is not changing by city,
- tree6<-ctree(Killed ~ City, data = city)
- x11()
- #Killed not changing every year by city
- #same for killed/attacks
- #Running glms for city and province data
- glm4<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'poisson')
- glm4.1<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan" ), family = 'nbinom1')
- glm4.2<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
- ICtab(glm4, glm4.1, glm4.2)
- #negative binomial is the best model
- glm4.3<-glmmadmb(Attacks ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
- anova(glm4.2, glm4.3)
- #no interaction
- glm4.31<-glmmadmb(Attacks ~ Province, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
- anova(glm4.3, glm4.31) #year is important
- glm4.32<-glmmadmb(Attacks ~ Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
- anova(glm4.3, glm4.32) #province is important
- residfitplot(glm4.3) #looks ok
- locscaleplot(glm4.3) #looks ok
- disperse(glm4.3, province) #model is under-dispersed 0.813
- summary(glm4.3)
- #running tree model with data
- tree2<-ctree(Attacks ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =1, mtry = 1))
- plot(tree2)
- nodes(tree2,1)
- #model suggests that when looking at province-wide data
- library(partykit)
- z<-subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan")
- tree2<-tree2<-ctree(Attacks ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =10))
- # @@ RGEDIT LANDMARK @@: Comparing number of ppl killed in terrorism vs anti-Shia violence
- gen<-read.csv("generalviolence.csv")
- #removing 2015 and 2016 data
- #plotting Shia killed and general deaths
- ge2<-subset(gen, Year <=2014)
- png("GeneralvsShia.png")
- plot(killed ~ year, data = subset(sumyearN, year > 2002), xlab = "Year", ylab = "Number of People Killed", ylim = c(0,3200), bty = "l", cex.axis = 1.25, cex.lab = 1.5, type = 'n')
- lines(killed ~ year, data = subset(sumyearN, year > 2002), col = "#c14a34", lwd= 2)
- lines(Civilians ~ Year, data = ge2, col = "#6c44c0", lwd= 2)
- legend(2003, 3000, c("Terrorism", "Shia Killing"), col = c("#6c44c0","#c14a34"), lty = c(1,1), lwd = c(3,3), cex = 1.25)
- dev.off()
- zz<-(subset(sumyearN, year >= 2003)$killed/ge2$Civilians)*100
- #plotting percent deaths by Shias
- plot(killed ~ year, data = subset(sumyearN, year >= 2003), type = "n", xlab = "Year", ylab = "Percent of Shia deaths in Total", ylim = c(0,100), bty = "l", cex.axis = 1.25, cex.lab = 1.5)
- #running model
- k1<-glmmadmb(Civilians ~ Year, data = ge2, family = "poisson", link = "log")
- k2<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom", link = "log")
- k3<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom1", link = "log")
- AIC(k1, k2, k3)
- #negative binomial is the best model
- summary(k2)
- #Year is significant
- lines(zz ~ Year, data = ge2, col = "#71b54b", lwd = 2)
- sum(zz)/length(ge2$Year)
- #conditional inference tree
- tree5<-ctree(zz ~ Year, data = ge2, control = ctree_control(testtype = "Univariate"))
- plot(tree5)
Add Comment
Please, Sign In to add comment