SHARE
TWEET

Untitled

a guest Feb 25th, 2017 24 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. setwd("insert working directory here")
  2. library(doBy)
  3. library(party)
  4. x<-read.csv("seckillingscity.csv")
  5. y<-read.csv("y.csv")
  6. str(x) #checking to see if all the data is in the correct format
  7. str(y)
  8. y$injured[is.na(y$injured)]<-0
  9. y$killed[is.na(y$killed)]<-0
  10. y$Province[566]<-"Sindh"
  11.  
  12.  
  13. residfitplot <- function (model, col="black") {
  14.   f<-fitted(model)
  15.   r<-residuals(model, type='pearson')
  16.   plot(f, r, col=col, main='Residuals vs. Fitted')
  17.   L1<-loess(r~f)
  18.   fvec = seq(min(f),max(f),length.out=150)
  19.   lines(fvec,predict(L1,fvec),col=2)
  20. }
  21.  
  22. #scale-location plot for GLMMADMB object
  23. locscaleplot <- function(model,col="black") {
  24.   f <- fitted(model)
  25.   r <- sqrt(abs(residuals(model, type='pearson')))  
  26.   plot(f,r,col=col, main='Scale-Location Plot')
  27.   L1 <- loess(r~f)
  28.   fvec = seq(min(f),max(f),length.out=150)
  29.   lines(fvec,predict(L1,fvec),col=2)
  30. }
  31. #summing the attack and killed data for each year
  32. sumyear1<-summaryBy(killed + injured ~ Year, data = subset(y, tag == "Shia killing"
  33. | tag == "Shia blast" | tag == "sectarian conflict" | tag == "sectarian conflict"),
  34. FUN = function(x) sum(x))
  35. attacks1<-summaryBy(Date ~ Year, data = subset(y, tag == "Shia killing" | tag == "Shia blast"
  36. | tag == "sectarian conflict" | tag == "sectarian conflict" & Province != "Afghanistan"),
  37. FUN = function(x) length(x))
  38.  
  39. # @@ RGEDIT LANDMARK @@: combining attack and killed/injured data
  40.  
  41. sumyearN<-cbind(sumyear1, attacks1[,2])
  42. colnames(sumyearN)<-c("year", "killed", "injured", "attacks")
  43.  
  44. sumyearN[14,]<-cbind(2014, 222, 289, 54) #adding data from Pakistan fact sheet
  45.  
  46. #looking at the means, median and the IQR
  47. summary(sumyearN)
  48.  
  49. # @@ RGEDIT LANDMARK @@: subset data by province
  50. p<-summaryBy(killed + injured ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
  51. FUN = function(x) sum(x))
  52. pattacks<-summaryBy(Date ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
  53. FUN = function(x) length(x))
  54. province<-cbind(p, pattacks[,3])
  55. colnames(province)<-c("Year", "Province", "Killed", "Injured", "Attacks")
  56.  
  57. # @@ RGEDIT LANDMARK @@: subset data by city
  58. c<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
  59. FUN = function(x) sum(x))
  60. city<-summaryBy(killed + injured ~ City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
  61. FUN = function(x) sum(x))
  62. city2<-city[order(city$killed), ]
  63. colnames(c)<-c("Year", "City", "Killed", "Injured")
  64. cattacks<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
  65. FUN = function(x) length(x))
  66. city<-cbind(c, cattacks[,3])
  67. colnames(city)<-c("Year", "City", "Killed", "Injured", "Attacks")
  68.  
  69. # @@ RGEDIT LANDMARK @@: General anti-Shia violence trends
  70. #plotting the relationship between attacks and year
  71. pdf("Attacksperyear.pdf")
  72. plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
  73. pch = 16, cex = 1.5, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3", lwd = 3)
  74. dev.off()
  75. #cleary, not very normal. resid fit plot looks awful and scale-location plots shows that variance is not consistent, but increasing)
  76. #let's try a poisson regression, because this is count data and count data is generally not poisson distributed
  77. glm1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'poisson', link = 'log')
  78. glm1.1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom1', link = 'log')
  79. glm1.2<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom', link = 'log')
  80. ICtab(glm1, glm1.1, glm1.2, type = "AIC")
  81. #quasi poisson is best model
  82. #model selection via log likelihood ratio tests
  83. glm1.1<-glm(attacks ~ year, data = sumyearN, family = quasipoisson('log'))
  84. plot(glm1.1)
  85. #residual versus fitted plot is not bad, but scale location plot is not good
  86. #calculating overdispersion for the relationship
  87.  
  88. phi.hat=sum((residuals(glm1.1, type = "pearson"))^2)/glm1.1$df.residual
  89. #phi.hat is 15.17. The model is very overdispersed.
  90. #Switching to negative binomial
  91. glm1.21<-glmmadmb(attacks ~ 1, data = sumyearN, family = 'nbinom', link = 'log')
  92. anova(glm1.2, glm1.21)
  93. #year is important predictor of attack
  94. #plotting model onto plot of attack versus year
  95.  
  96. residfitplot(glm1.2) #looks okay
  97. locscaleplot(glm1.2) #looks okay
  98. disperse(glm1.2, data = sumyearN)
  99. #dispersion is 1.33, model is not over-dispersed.
  100. summary(glm1.2)
  101.  
  102. #year is a significant predictor of attacks.
  103. svg("attacksperyear.svg")
  104. plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
  105. pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
  106. pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
  107. data=data.frame("attacks" = 0, "year" = pred)
  108. curve<-predict(glm1.2, data, type = "response")
  109. lines(pred, curve, lwd = 3, lty =2,  col = "black")
  110. dev.off()
  111. #looks ok
  112. #siginficant relationship according to summary
  113. #making regression tree to examine threshold in attacks in data
  114. library(party)
  115. tree1<-ctree(attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
  116. png("attacktree.png")
  117. plot(tree1)
  118. dev.off()
  119. #shifts in attacks per year before and after 2007
  120. #exmaining relationship between killed and year
  121. plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3")
  122. #seems to be a positive relationship between year and number killed
  123. #Trying negative binomial GLMs
  124. glm2<-glm(killed ~ year, family = poisson(link ='log'), data = sumyearN)
  125. glm2.1<-glmmadmb(killed ~ year, family = 'nbinom', data = sumyearN)
  126. glm2.2<-glmmadmb(killed ~ year, family = "nbinom1", link = "log",  data = sumyearN)
  127. ICtab(glm2, glm2.1, glm2.2, type = "AICc")
  128. #negative binomial is the best model
  129. #model selection
  130. glm2.21<-glmmadmb(killed ~ 1, family = "nbinom1", link = "log",  data = sumyearN)
  131. anova(glm2.21, glm2.1)
  132. #year is an important predictor of the number of people killed
  133. residfitplot(glm2.2) #looks okay
  134. locscaleplot(glm2.2) #looks okay
  135. disperse(glm2.2, data = sumyearN)
  136. #overdispersed 7.88
  137. #switching to negative binomial
  138. glm2.11<-glmmadmb(killed ~ 1, family = "nbinom", link = "log",  data = sumyearN)
  139. anova(glm2.1, glm2.11)
  140. #year is an important predictor
  141. residfitplot(glm2.1) #looks okay
  142. locscaleplot(glm2.1) #looks okay
  143. disperse(glm2.1, data = sumyearN)
  144. #underdispersed, 0.70
  145. #plotting model onto data
  146. svg("killedperyear.svg")
  147. plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths",  type = 'l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd =3)
  148. pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
  149. data=data.frame("killed" = 0, "year" = pred)
  150. curve<-predict(glm2.1, data, type = "response")
  151. lines(pred, curve, lwd = 3, lty =2, col = "black")
  152. dev.off()
  153. #Making a regression tree to examine thresholds in data
  154. tree2<-ctree(killed ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
  155. png("killedtree.png")
  156. plot(tree1)
  157. dev.off()
  158. #shift before and after 2007
  159. #killed per attack
  160. #Plotting killed per attack for each year
  161. plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", type ='l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
  162. lm2<-glm(killed/attacks ~ year, data = sumyearN)
  163. #looking at residual versus fitted and scale-location plots.
  164. #Residual fitted is ok. Scale location plot is not great
  165. plot(lm2)
  166. #using the dredge function in MuMIn package to determine the best model
  167. library(MuMIn)
  168. dredge(lm2)
  169. #lookis like null model is best
  170. #since the plot looks like a non-linear relationship switching to regression trees
  171. tree3<-ctree(killed/attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
  172. png("killedattack.png")
  173. plot(tree3)
  174. dev.off()
  175. #again 2007
  176. svg("killedattacksperyear.svg")
  177. plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3")
  178. pred<-seq(from = min(sumyearN$year), to=max(sumyearN$year), length = 14)
  179. data=data.frame("killed" = 0, "year" = pred)
  180. curve<-predict(glm2.1, data, type = "response")
  181. lines(pred, curve, lwd = 1, col = "black")
  182. dev.off()
  183.  
  184. # @@ RGEDIT LANDMARK @@: Province Analysis
  185. #making a plot of attacks by province
  186. png("AttacksProvince.png")
  187. plot(Attacks ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
  188. lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
  189. lines(Attacks ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
  190. lines(Attacks ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
  191. lines(Attacks ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
  192. lines(Attacks ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
  193. lines(Attacks ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
  194. lines(Attacks ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
  195. legend(2004, 150, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
  196. dev.off()
  197.  
  198. #Analyzing Province specific attack data
  199. glmp1<-glmmadmb(Attacks ~ Year, data = province, family = 'poisson', link = 'log')
  200. glmp1.1<-glmmadmb(Attacks ~ Province, data = province, family = 'nbinom1', link = 'log')
  201. glmp1.2<-glmmadmb(Attacks ~ Year, data = subset(province, Province != "Afghanistan"|Province != "Islamabad"), family = 'nbinom', link = 'log')
  202. AIC(glmp1, glmp1.1, glmp1.2)
  203. #negative binomial is the best model
  204. #models also run with Province and Year, but can;t run them together because of lack of replicates
  205. #log likelihood ratio tests
  206. glmp1.3<-update(glmp1.2, .~. - Province)
  207. anova(glmp1.2, glmp1.3)
  208. #Province is significant
  209. summary(glmp1.2)
  210. #All provinces are different from each other in number of attacks. Sindh and Baluchistan lead attacks
  211. residfitplot(glmp1.2)
  212. locscaleplot(glmp1.2)
  213.  
  214. #conditional inference tree
  215. ptree1<-ctree(Attacks ~ Year*Province, data = province)
  216. predict(ptree1)
  217.  
  218. #killed per province
  219. png("Killedprovince.png")
  220. plot(Killed ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = " Total Number of People Killed", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,550))
  221. lines(killed ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
  222. lines(Killed ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
  223. lines(Killed ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
  224. lines(Killed ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
  225. lines(Killed ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
  226. lines(Killed ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
  227. lines(Killed ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
  228. legend(2002, 550, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
  229. dev.off()
  230.  
  231. #
  232. glmkp1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'poisson', link = 'log')
  233. glmkp1.1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom1', link = 'log')
  234. glmkp1.2<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
  235. AIC(glmkp1, glmkp1.1, glmkp1.2)
  236. #quasi binomial is the best
  237. #log likelihood ratio tests
  238. glmkp1.3<-update(glmkp1.1, .~. - Province:Year)
  239. anova(glmkp1.1, glmkp1.3)
  240. #Interaction is not significant
  241. glmkp1.4<-update(glmkp1.3, .~. - Province)
  242. anova(glmkp1.3, glmkp1.4)
  243. #Province is significant
  244. glmkp1.5<-update(glmkp1.3, .~. - Year)
  245. anova(glmkp1.3, glmkp1.5)
  246. #Year is significant
  247. residfitplot(glmkp1.3)
  248. locscaleplot(glmkp1.3)
  249. #plots look good
  250. summary(glmkp1.1)
  251. #Gilgit Baltistan is different from rest
  252. #Increase in killed over time
  253.  
  254. #killed per attack - province
  255. kpap<-province$Killed/province$Attacks
  256. province2<-cbind(province, kpap)
  257. kpac<-city$Killed/city$Attacks
  258. city2<-cbind(city, kpac)
  259.  
  260. png("killedattackprovince.png")
  261. plot(kpap ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,60))
  262. lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
  263. lines(kpap ~ Year, data = subset(province2, Province == "Sindh"), col = '#97BDAF', lwd = 3)
  264. lines(kpap ~ Year, data = subset(province2, Province == "Punjab"), col = '#C453A4', lwd = 3)
  265. lines(kpap ~ Year, data = subset(province2, Province == "Baluchistan"), col = '#C46643', lwd =3)
  266. lines(kpap ~ Year, data = subset(province2, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
  267. lines(kpap ~ Year, data = subset(province2, Province == "FATA"), col = '#54433D', lwd = 3)
  268. lines(kpap ~ Year, data = subset(province2, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
  269. legend(2001, 58, c("Total","Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("Black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
  270. dev.off()
  271. boxplot(kpap ~ Province, data = subset(province2, Province != "Afghanistan" & Province != "Sndh"), las = 2)
  272. #running models
  273. kpap1<-lm(log(kpap + 1) ~ Year*Province, data = province2)
  274. dredge(kpap1)
  275. #Province only is the best model
  276. plot(kpap1)
  277. #not so bad
  278. summary(kpap1)
  279. #conditional inference trees
  280. kpaptree1<-ctree(kpap ~ Year*Province, data = province2)
  281.  
  282. # @@ RGEDIT LANDMARK @@: City Analysis
  283. #making a plot of attacks by most common cities in dataset
  284. #Karachi, Quetta, Gilgit, Peshawar, Dera Ismail Khan, Hangu,
  285. city[order(city$Year, -city$Attacks, -city$Killed),]
  286. png("AttacksCity.png")
  287. plot(Attacks ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
  288. lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
  289. lines(Attacks ~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
  290. lines(Attacks ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
  291. lines(Attacks ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
  292. lines(Attacks ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
  293. legend(2002, 150, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("Black","#97BDAF", "#C453A4", "#C46643","#94C053"), pch = 19, cex = 1.25)
  294. dev.off()
  295.  
  296. #model
  297. #unable to run Year*City model because of lack of replicates. City only model run
  298. glmc1<-glmmadmb(Attacks ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'poisson', link = 'log')
  299. glmc1.1<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
  300. glmc1.2<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
  301. AIC(glmc1, glmc1.1, glmc1.2)
  302. #negative binomial model is the best
  303. #log likelihood ratio tests
  304. glmc1.3<-update(glmc1.2, .~. - City*Year)
  305. anova(glmc1.2, glmc1.3)
  306. #City is significant
  307. residfitplot(glmc1.2)
  308. summary(glmc1.2)
  309. #Attack rates differ per year per city
  310. residfitplot(glmc1.2)
  311. locscaleplot(glmc1.2)
  312. #plots look good
  313.  
  314. #conditional inference trees
  315. ptree1<-ctree(Attacks ~ Year*City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
  316. predict(ptree1)
  317.  
  318.  
  319. #killed per city
  320. png("killedcity.png")
  321. plot(Killed ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,600))
  322. lines(killed ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
  323. lines(Killed~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
  324. lines(Killed ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
  325. lines(Killed ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
  326. lines(Killed ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
  327. legend(2001, 600, c("Total", "Karachi","Quetta", "Peshwara", "Dera Ismail Khan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D"), pch = 19, cex = 1.25)
  328. dev.off()
  329. #GLMs
  330. glmkc1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"),  family = 'poisson', link = 'log')
  331. glmkc1.1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
  332. glmkc1.2<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
  333. AIC(glmkc1, glmkc1.1, glmkc1.2)
  334. #quasi binomial is the best
  335. #log likelihood ratio tests
  336. glmkc1.3<-update(glmkc1.1, .~. - City:Year)
  337. anova(glmkc1.1, glmkc1.3)
  338. #Interaction is not significant
  339. glmkc1.4<-update(glmkc1.3, .~. - City)
  340. anova(glmkc1.3, glmkc1.4)
  341. #City is significantly different
  342. glmkc1.5<-update(glmkc1.3, .~. - Year)
  343. anova(glmkc1.3, glmkc1.5)
  344. #year is significant
  345. residfitplot(glmkc1.3)
  346. summary(glmkc1.3)
  347. #Increasing attacks per year. Quetta is marginally significantly different from all other cities
  348. tree2<-ctree(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan", control = ctree_control(teststat = "quad", testtype = "MonteCarlo")
  349. #Killed don't differ per year per city
  350. residfitplot(glmkp1.1)
  351. locscaleplot(glmkp1.1)
  352.  
  353. #interaction is not significant
  354. png("killedattackcity.png")
  355. plot(kpac ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,60))
  356. lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
  357. lines(kpac~ Year, data = subset(city2, City == "Karachi"), col = '#97BDAF', lwd = 3)
  358. lines(kpac ~ Year, data = subset(city2, City == "Quetta"), col = '#C453A4', lwd = 3)
  359. lines(kpac ~ Year, data = subset(city2, City == "Peshawar"), col = '#C46643', lwd =3)
  360. lines(kpac ~ Year, data = subset(city2, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
  361. legend(2001, 60, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
  362. dev.off()
  363. #models
  364. kpac1<-lm(log(kpac+1) ~ Year*City, data = subset(city2, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
  365. dredge(kpac1)
  366. #Null model is best, followed by year
  367. plot(kpac1)
  368. #plots look fine
  369. #conditional inference trees
  370. library(party)
  371. tree1<-ctree(Attacks ~ Year + Province, data = province, control = ctree_control(teststat = "quad", testtype = "MonteCarlo"))
  372. plot(tree1)
  373. tree2<-ctree(Attacks ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
  374. #across these cities violence picks up after 2011
  375. tree3<-ctree(Killed ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
  376. plot(tree3)
  377. tree4<-ctree(Killed ~ Year + Province, data = province, control = ctree_control(teststat = "quad"))
  378. plot(tree4)
  379. #same as attacks, pre 2011 Baluchistan/FATA/Kurram Region highest, followed by Sindh/Punjab and then Gilgit Baltistan/Islamabad. After 2011, number of Shias killed across all provinces/regions rise.
  380.  
  381. #Examining province specific trees
  382. tree4<-ctree(Attacks ~ Province, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "Khyber Pakhtunkhwa"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic")))
  383. boxplot(Attacks ~ Province, data = province)
  384. plot(tree4)
  385. tree4.1<-ctree(Killed ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic"))
  386. x11()
  387. plot(tree4.1)
  388. tree4.2<-ctree(Killed/Year ~ Province, data = province, control = ctree_control(minsplit = 3))
  389. plot(tree4.2)
  390. #Nothing is important. No specific differences between provinces
  391. #Examining cities
  392. x11()
  393. tree5<-ctree(Attacks ~ City + Year, data = city, control = ctree_control(teststat = "quad", testtype = "Univariate"))
  394. plot(tree5)
  395. summary(tree5)
  396. #Attacks is not changing by city,
  397. tree6<-ctree(Killed ~  City, data = city)
  398. x11()
  399. #Killed not changing every year by city
  400. #same for killed/attacks
  401.  
  402. #Running glms for city and province data
  403. glm4<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'poisson')
  404. glm4.1<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan" ), family = 'nbinom1')
  405. glm4.2<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
  406. ICtab(glm4, glm4.1, glm4.2)
  407. #negative binomial is the best model
  408. glm4.3<-glmmadmb(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
  409. anova(glm4.2, glm4.3)
  410. #no interaction
  411. glm4.31<-glmmadmb(Attacks ~ Province,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
  412. anova(glm4.3, glm4.31) #year is important
  413. glm4.32<-glmmadmb(Attacks ~ Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
  414. anova(glm4.3, glm4.32) #province is important
  415. residfitplot(glm4.3) #looks ok
  416. locscaleplot(glm4.3) #looks ok
  417. disperse(glm4.3, province) #model is under-dispersed 0.813
  418. summary(glm4.3)
  419. #running tree model with data
  420. tree2<-ctree(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =1, mtry = 1))
  421. plot(tree2)
  422. nodes(tree2,1)
  423. #model suggests that when looking at province-wide data
  424. library(partykit)
  425. z<-subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan")
  426. tree2<-tree2<-ctree(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =10))
  427.  
  428. # @@ RGEDIT LANDMARK @@: Comparing number of ppl killed in terrorism vs anti-Shia violence
  429. gen<-read.csv("generalviolence.csv")
  430. #removing 2015 and 2016 data
  431. #plotting Shia killed and general deaths
  432. ge2<-subset(gen, Year <=2014)
  433. png("GeneralvsShia.png")
  434. plot(killed ~ year, data = subset(sumyearN, year > 2002), xlab = "Year", ylab = "Number of People Killed", ylim = c(0,3200), bty = "l", cex.axis = 1.25, cex.lab = 1.5, type = 'n')
  435. lines(killed ~ year, data = subset(sumyearN, year > 2002), col = "#c14a34", lwd= 2)
  436. lines(Civilians ~ Year, data = ge2, col = "#6c44c0", lwd= 2)
  437. legend(2003, 3000, c("Terrorism", "Shia Killing"), col = c("#6c44c0","#c14a34"), lty = c(1,1), lwd = c(3,3), cex = 1.25)
  438. dev.off()
  439. zz<-(subset(sumyearN, year >= 2003)$killed/ge2$Civilians)*100
  440. #plotting percent deaths by Shias
  441. plot(killed ~ year, data = subset(sumyearN, year >= 2003), type = "n", xlab = "Year", ylab = "Percent of Shia deaths in Total", ylim = c(0,100), bty = "l", cex.axis = 1.25, cex.lab = 1.5)
  442.  
  443. #running model
  444. k1<-glmmadmb(Civilians ~ Year, data = ge2, family = "poisson", link = "log")
  445. k2<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom", link = "log")
  446. k3<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom1", link = "log")
  447. AIC(k1, k2, k3)
  448. #negative binomial is the best model
  449. summary(k2)
  450. #Year is significant
  451. lines(zz ~ Year, data = ge2, col = "#71b54b", lwd = 2)
  452. sum(zz)/length(ge2$Year)
  453. #conditional inference tree
  454. tree5<-ctree(zz ~ Year, data = ge2, control = ctree_control(testtype = "Univariate"))
  455. plot(tree5)
RAW Paste Data
Pastebin PRO Autumn Special!
Get 40% OFF on Pastebin PRO accounts!
Top