• API
• FAQ
• Tools
• Trends
• Archive
SHARE
TWEET

# Untitled

a guest Feb 25th, 2017 24 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. setwd("insert working directory here")
2. library(doBy)
3. library(party)
6. str(x) #checking to see if all the data is in the correct format
7. str(y)
8. y\$injured[is.na(y\$injured)]<-0
9. y\$killed[is.na(y\$killed)]<-0
10. y\$Province[566]<-"Sindh"
11.
12.
13. residfitplot <- function (model, col="black") {
14.   f<-fitted(model)
15.   r<-residuals(model, type='pearson')
16.   plot(f, r, col=col, main='Residuals vs. Fitted')
17.   L1<-loess(r~f)
18.   fvec = seq(min(f),max(f),length.out=150)
19.   lines(fvec,predict(L1,fvec),col=2)
20. }
21.
22. #scale-location plot for GLMMADMB object
23. locscaleplot <- function(model,col="black") {
24.   f <- fitted(model)
25.   r <- sqrt(abs(residuals(model, type='pearson')))
26.   plot(f,r,col=col, main='Scale-Location Plot')
27.   L1 <- loess(r~f)
28.   fvec = seq(min(f),max(f),length.out=150)
29.   lines(fvec,predict(L1,fvec),col=2)
30. }
31. #summing the attack and killed data for each year
32. sumyear1<-summaryBy(killed + injured ~ Year, data = subset(y, tag == "Shia killing"
33. | tag == "Shia blast" | tag == "sectarian conflict" | tag == "sectarian conflict"),
34. FUN = function(x) sum(x))
35. attacks1<-summaryBy(Date ~ Year, data = subset(y, tag == "Shia killing" | tag == "Shia blast"
36. | tag == "sectarian conflict" | tag == "sectarian conflict" & Province != "Afghanistan"),
37. FUN = function(x) length(x))
38.
39. # @@ RGEDIT LANDMARK @@: combining attack and killed/injured data
40.
41. sumyearN<-cbind(sumyear1, attacks1[,2])
42. colnames(sumyearN)<-c("year", "killed", "injured", "attacks")
43.
44. sumyearN[14,]<-cbind(2014, 222, 289, 54) #adding data from Pakistan fact sheet
45.
46. #looking at the means, median and the IQR
47. summary(sumyearN)
48.
49. # @@ RGEDIT LANDMARK @@: subset data by province
50. p<-summaryBy(killed + injured ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
51. FUN = function(x) sum(x))
52. pattacks<-summaryBy(Date ~ Year + Province, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
53. FUN = function(x) length(x))
54. province<-cbind(p, pattacks[,3])
55. colnames(province)<-c("Year", "Province", "Killed", "Injured", "Attacks")
56.
57. # @@ RGEDIT LANDMARK @@: subset data by city
58. c<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
59. FUN = function(x) sum(x))
60. city<-summaryBy(killed + injured ~ City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
61. FUN = function(x) sum(x))
62. city2<-city[order(city\$killed), ]
63. colnames(c)<-c("Year", "City", "Killed", "Injured")
64. cattacks<-summaryBy(killed + injured ~ Year + City, data = subset(y, tag == "Shia killing" | tag == "Shia blast" | tag == "sectarian conflict"),
65. FUN = function(x) length(x))
66. city<-cbind(c, cattacks[,3])
67. colnames(city)<-c("Year", "City", "Killed", "Injured", "Attacks")
68.
69. # @@ RGEDIT LANDMARK @@: General anti-Shia violence trends
70. #plotting the relationship between attacks and year
71. pdf("Attacksperyear.pdf")
72. plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
73. pch = 16, cex = 1.5, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3", lwd = 3)
74. dev.off()
75. #cleary, not very normal. resid fit plot looks awful and scale-location plots shows that variance is not consistent, but increasing)
76. #let's try a poisson regression, because this is count data and count data is generally not poisson distributed
77. glm1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'poisson', link = 'log')
78. glm1.1<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom1', link = 'log')
79. glm1.2<-glmmadmb(attacks ~ year, data = sumyearN, family = 'nbinom', link = 'log')
80. ICtab(glm1, glm1.1, glm1.2, type = "AIC")
81. #quasi poisson is best model
82. #model selection via log likelihood ratio tests
83. glm1.1<-glm(attacks ~ year, data = sumyearN, family = quasipoisson('log'))
84. plot(glm1.1)
85. #residual versus fitted plot is not bad, but scale location plot is not good
86. #calculating overdispersion for the relationship
87.
88. phi.hat=sum((residuals(glm1.1, type = "pearson"))^2)/glm1.1\$df.residual
89. #phi.hat is 15.17. The model is very overdispersed.
90. #Switching to negative binomial
91. glm1.21<-glmmadmb(attacks ~ 1, data = sumyearN, family = 'nbinom', link = 'log')
92. anova(glm1.2, glm1.21)
93. #year is important predictor of attack
94. #plotting model onto plot of attack versus year
95.
96. residfitplot(glm1.2) #looks okay
97. locscaleplot(glm1.2) #looks okay
98. disperse(glm1.2, data = sumyearN)
99. #dispersion is 1.33, model is not over-dispersed.
100. summary(glm1.2)
101.
102. #year is a significant predictor of attacks.
103. svg("attacksperyear.svg")
104. plot(attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of attacks", type = 'l',
105. pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
106. pred<-seq(from = min(sumyearN\$year), to=max(sumyearN\$year), length = 14)
107. data=data.frame("attacks" = 0, "year" = pred)
108. curve<-predict(glm1.2, data, type = "response")
109. lines(pred, curve, lwd = 3, lty =2,  col = "black")
110. dev.off()
111. #looks ok
112. #siginficant relationship according to summary
113. #making regression tree to examine threshold in attacks in data
114. library(party)
115. tree1<-ctree(attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
116. png("attacktree.png")
117. plot(tree1)
118. dev.off()
119. #shifts in attacks per year before and after 2007
120. #exmaining relationship between killed and year
121. plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.5, cex.axis = 1.25, col = "#6E8CD3")
122. #seems to be a positive relationship between year and number killed
123. #Trying negative binomial GLMs
124. glm2<-glm(killed ~ year, family = poisson(link ='log'), data = sumyearN)
125. glm2.1<-glmmadmb(killed ~ year, family = 'nbinom', data = sumyearN)
126. glm2.2<-glmmadmb(killed ~ year, family = "nbinom1", link = "log",  data = sumyearN)
127. ICtab(glm2, glm2.1, glm2.2, type = "AICc")
128. #negative binomial is the best model
129. #model selection
130. glm2.21<-glmmadmb(killed ~ 1, family = "nbinom1", link = "log",  data = sumyearN)
131. anova(glm2.21, glm2.1)
132. #year is an important predictor of the number of people killed
133. residfitplot(glm2.2) #looks okay
134. locscaleplot(glm2.2) #looks okay
135. disperse(glm2.2, data = sumyearN)
136. #overdispersed 7.88
137. #switching to negative binomial
138. glm2.11<-glmmadmb(killed ~ 1, family = "nbinom", link = "log",  data = sumyearN)
139. anova(glm2.1, glm2.11)
140. #year is an important predictor
141. residfitplot(glm2.1) #looks okay
142. locscaleplot(glm2.1) #looks okay
143. disperse(glm2.1, data = sumyearN)
144. #underdispersed, 0.70
145. #plotting model onto data
146. svg("killedperyear.svg")
147. plot(killed ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths",  type = 'l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd =3)
148. pred<-seq(from = min(sumyearN\$year), to=max(sumyearN\$year), length = 14)
149. data=data.frame("killed" = 0, "year" = pred)
150. curve<-predict(glm2.1, data, type = "response")
151. lines(pred, curve, lwd = 3, lty =2, col = "black")
152. dev.off()
153. #Making a regression tree to examine thresholds in data
154. tree2<-ctree(killed ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
155. png("killedtree.png")
156. plot(tree1)
157. dev.off()
158. #shift before and after 2007
159. #killed per attack
160. #Plotting killed per attack for each year
161. plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", type ='l', pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3", lwd = 3)
162. lm2<-glm(killed/attacks ~ year, data = sumyearN)
163. #looking at residual versus fitted and scale-location plots.
164. #Residual fitted is ok. Scale location plot is not great
165. plot(lm2)
166. #using the dredge function in MuMIn package to determine the best model
167. library(MuMIn)
168. dredge(lm2)
169. #lookis like null model is best
170. #since the plot looks like a non-linear relationship switching to regression trees
171. tree3<-ctree(killed/attacks ~ year, data = sumyearN, control = ctree_control(minsplit = 3))
172. png("killedattack.png")
173. plot(tree3)
174. dev.off()
175. #again 2007
176. svg("killedattacksperyear.svg")
177. plot(killed/attacks ~ year, data = sumyearN, xlab = "Year", ylab = "Number of deaths", pch = 16, cex = 2, cex.lab = 1.25, cex.axis = 1, col = "#6E8CD3")
178. pred<-seq(from = min(sumyearN\$year), to=max(sumyearN\$year), length = 14)
179. data=data.frame("killed" = 0, "year" = pred)
180. curve<-predict(glm2.1, data, type = "response")
181. lines(pred, curve, lwd = 1, col = "black")
182. dev.off()
183.
184. # @@ RGEDIT LANDMARK @@: Province Analysis
185. #making a plot of attacks by province
186. png("AttacksProvince.png")
187. plot(Attacks ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
188. lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
189. lines(Attacks ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
190. lines(Attacks ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
191. lines(Attacks ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
192. lines(Attacks ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
193. lines(Attacks ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
194. lines(Attacks ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
195. legend(2004, 150, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
196. dev.off()
197.
198. #Analyzing Province specific attack data
199. glmp1<-glmmadmb(Attacks ~ Year, data = province, family = 'poisson', link = 'log')
200. glmp1.1<-glmmadmb(Attacks ~ Province, data = province, family = 'nbinom1', link = 'log')
201. glmp1.2<-glmmadmb(Attacks ~ Year, data = subset(province, Province != "Afghanistan"|Province != "Islamabad"), family = 'nbinom', link = 'log')
202. AIC(glmp1, glmp1.1, glmp1.2)
203. #negative binomial is the best model
204. #models also run with Province and Year, but can;t run them together because of lack of replicates
205. #log likelihood ratio tests
206. glmp1.3<-update(glmp1.2, .~. - Province)
207. anova(glmp1.2, glmp1.3)
208. #Province is significant
209. summary(glmp1.2)
210. #All provinces are different from each other in number of attacks. Sindh and Baluchistan lead attacks
211. residfitplot(glmp1.2)
212. locscaleplot(glmp1.2)
213.
214. #conditional inference tree
215. ptree1<-ctree(Attacks ~ Year*Province, data = province)
216. predict(ptree1)
217.
218. #killed per province
219. png("Killedprovince.png")
220. plot(Killed ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = " Total Number of People Killed", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,550))
221. lines(killed ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
222. lines(Killed ~ Year, data = subset(province, Province == "Sindh"), col = '#97BDAF', lwd = 3)
223. lines(Killed ~ Year, data = subset(province, Province == "Punjab"), col = '#C453A4', lwd = 3)
224. lines(Killed ~ Year, data = subset(province, Province == "Baluchistan"), col = '#C46643', lwd =3)
225. lines(Killed ~ Year, data = subset(province, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
226. lines(Killed ~ Year, data = subset(province, Province == "FATA"), col = '#54433D', lwd = 3)
227. lines(Killed ~ Year, data = subset(province, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
228. legend(2002, 550, c("Total", "Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
229. dev.off()
230.
231. #
232. glmkp1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'poisson', link = 'log')
233. glmkp1.1<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom1', link = 'log')
234. glmkp1.2<-glmmadmb(Killed ~ Province*Year, data = subset(province, Province == "Sindh"| Province == "Punjab"| Province == "Baluchistan"| Province == "Khyber Pakhtunkhwa"| Province == "FATA"| Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
235. AIC(glmkp1, glmkp1.1, glmkp1.2)
236. #quasi binomial is the best
237. #log likelihood ratio tests
238. glmkp1.3<-update(glmkp1.1, .~. - Province:Year)
239. anova(glmkp1.1, glmkp1.3)
240. #Interaction is not significant
241. glmkp1.4<-update(glmkp1.3, .~. - Province)
242. anova(glmkp1.3, glmkp1.4)
243. #Province is significant
244. glmkp1.5<-update(glmkp1.3, .~. - Year)
245. anova(glmkp1.3, glmkp1.5)
246. #Year is significant
247. residfitplot(glmkp1.3)
248. locscaleplot(glmkp1.3)
249. #plots look good
250. summary(glmkp1.1)
251. #Gilgit Baltistan is different from rest
252. #Increase in killed over time
253.
254. #killed per attack - province
255. kpap<-province\$Killed/province\$Attacks
256. province2<-cbind(province, kpap)
257. kpac<-city\$Killed/city\$Attacks
258. city2<-cbind(city, kpac)
259.
260. png("killedattackprovince.png")
261. plot(kpap ~ Year, data = province, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,60))
262. lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
263. lines(kpap ~ Year, data = subset(province2, Province == "Sindh"), col = '#97BDAF', lwd = 3)
264. lines(kpap ~ Year, data = subset(province2, Province == "Punjab"), col = '#C453A4', lwd = 3)
265. lines(kpap ~ Year, data = subset(province2, Province == "Baluchistan"), col = '#C46643', lwd =3)
266. lines(kpap ~ Year, data = subset(province2, Province == "Khyber Pakhtunkhwa"), col = '#94C053', lwd =3)
267. lines(kpap ~ Year, data = subset(province2, Province == "FATA"), col = '#54433D', lwd = 3)
268. lines(kpap ~ Year, data = subset(province2, Province == "Gilgit Baltistan"), col = '#7A75BC', lwd = 3)
269. legend(2001, 58, c("Total","Sindh","Punjab", "Baluchistan", "KP", "FATA", "Gilgit Baltistan"), col = c("Black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
270. dev.off()
271. boxplot(kpap ~ Province, data = subset(province2, Province != "Afghanistan" & Province != "Sndh"), las = 2)
272. #running models
273. kpap1<-lm(log(kpap + 1) ~ Year*Province, data = province2)
274. dredge(kpap1)
275. #Province only is the best model
276. plot(kpap1)
278. summary(kpap1)
279. #conditional inference trees
280. kpaptree1<-ctree(kpap ~ Year*Province, data = province2)
281.
282. # @@ RGEDIT LANDMARK @@: City Analysis
283. #making a plot of attacks by most common cities in dataset
284. #Karachi, Quetta, Gilgit, Peshawar, Dera Ismail Khan, Hangu,
285. city[order(city\$Year, -city\$Attacks, -city\$Killed),]
286. png("AttacksCity.png")
287. plot(Attacks ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Number of Attacks", cex.lab = 1.5, cex.axis = 1.25, bty = "l", ylim = c(0,150))
288. lines(attacks ~ year, data = subset(sumyearN, year<= 2013), col = "black", lwd = 3)
289. lines(Attacks ~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
290. lines(Attacks ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
291. lines(Attacks ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
292. lines(Attacks ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
293. legend(2002, 150, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("Black","#97BDAF", "#C453A4", "#C46643","#94C053"), pch = 19, cex = 1.25)
294. dev.off()
295.
296. #model
297. #unable to run Year*City model because of lack of replicates. City only model run
298. glmc1<-glmmadmb(Attacks ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'poisson', link = 'log')
299. glmc1.1<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
300. glmc1.2<-glmmadmb(Attacks ~ City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
301. AIC(glmc1, glmc1.1, glmc1.2)
302. #negative binomial model is the best
303. #log likelihood ratio tests
304. glmc1.3<-update(glmc1.2, .~. - City*Year)
305. anova(glmc1.2, glmc1.3)
306. #City is significant
307. residfitplot(glmc1.2)
308. summary(glmc1.2)
309. #Attack rates differ per year per city
310. residfitplot(glmc1.2)
311. locscaleplot(glmc1.2)
312. #plots look good
313.
314. #conditional inference trees
315. ptree1<-ctree(Attacks ~ Year*City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
316. predict(ptree1)
317.
318.
319. #killed per city
320. png("killedcity.png")
321. plot(Killed ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,600))
322. lines(killed ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
323. lines(Killed~ Year, data = subset(city, City == "Karachi"), col = '#97BDAF', lwd = 3)
324. lines(Killed ~ Year, data = subset(city, City == "Quetta"), col = '#C453A4', lwd = 3)
325. lines(Killed ~ Year, data = subset(city, City == "Peshawar"), col = '#C46643', lwd =3)
326. lines(Killed ~ Year, data = subset(city, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
327. legend(2001, 600, c("Total", "Karachi","Quetta", "Peshwara", "Dera Ismail Khan"), col = c("black", "#97BDAF", "#C453A4", "#C46643","#94C053","#54433D"), pch = 19, cex = 1.25)
328. dev.off()
329. #GLMs
330. glmkc1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"),  family = 'poisson', link = 'log')
331. glmkc1.1<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom1', link = 'log')
332. glmkc1.2<-glmmadmb(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), family = 'nbinom', link = 'log')
333. AIC(glmkc1, glmkc1.1, glmkc1.2)
334. #quasi binomial is the best
335. #log likelihood ratio tests
336. glmkc1.3<-update(glmkc1.1, .~. - City:Year)
337. anova(glmkc1.1, glmkc1.3)
338. #Interaction is not significant
339. glmkc1.4<-update(glmkc1.3, .~. - City)
340. anova(glmkc1.3, glmkc1.4)
341. #City is significantly different
342. glmkc1.5<-update(glmkc1.3, .~. - Year)
343. anova(glmkc1.3, glmkc1.5)
344. #year is significant
345. residfitplot(glmkc1.3)
346. summary(glmkc1.3)
347. #Increasing attacks per year. Quetta is marginally significantly different from all other cities
348. tree2<-ctree(Killed ~ City*Year, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan", control = ctree_control(teststat = "quad", testtype = "MonteCarlo")
349. #Killed don't differ per year per city
350. residfitplot(glmkp1.1)
351. locscaleplot(glmkp1.1)
352.
353. #interaction is not significant
354. png("killedattackcity.png")
355. plot(kpac ~ Year, data = city, type = 'n', axes = TRUE, xlab = "Year", ylab = "Total Number of People Killed per Attack", cex.lab = 1.5, cex.axis =1.25, bty = "l", ylim = c(0,60))
356. lines(killed/attacks ~ year, data = subset(sumyearN, year <= 2013), col = "black", lwd = 3)
357. lines(kpac~ Year, data = subset(city2, City == "Karachi"), col = '#97BDAF', lwd = 3)
358. lines(kpac ~ Year, data = subset(city2, City == "Quetta"), col = '#C453A4', lwd = 3)
359. lines(kpac ~ Year, data = subset(city2, City == "Peshawar"), col = '#C46643', lwd =3)
360. lines(kpac ~ Year, data = subset(city2, City == "Dera Ismail Khan"), col = '#94C053', lwd =3)
361. legend(2001, 60, c("Total","Karachi","Quetta", "Peshawar", "Dera Ismail Khan"), col = c("black","#97BDAF", "#C453A4", "#C46643","#94C053","#54433D","#7A75BC"), pch = 19, cex = 1.25)
362. dev.off()
363. #models
364. kpac1<-lm(log(kpac+1) ~ Year*City, data = subset(city2, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"))
365. dredge(kpac1)
366. #Null model is best, followed by year
367. plot(kpac1)
368. #plots look fine
369. #conditional inference trees
370. library(party)
371. tree1<-ctree(Attacks ~ Year + Province, data = province, control = ctree_control(teststat = "quad", testtype = "MonteCarlo"))
372. plot(tree1)
373. tree2<-ctree(Attacks ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
374. #across these cities violence picks up after 2011
375. tree3<-ctree(Killed ~ Year + City, data = subset(city, City == "Karachi"|City == "Quetta"|City == "Peshawar"|City == "Dera Ismail Khan"), control = ctree_control(teststat = "quad"))
376. plot(tree3)
377. tree4<-ctree(Killed ~ Year + Province, data = province, control = ctree_control(teststat = "quad"))
378. plot(tree4)
379. #same as attacks, pre 2011 Baluchistan/FATA/Kurram Region highest, followed by Sindh/Punjab and then Gilgit Baltistan/Islamabad. After 2011, number of Shias killed across all provinces/regions rise.
380.
381. #Examining province specific trees
382. tree4<-ctree(Attacks ~ Province, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "Khyber Pakhtunkhwa"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic")))
383. boxplot(Attacks ~ Province, data = province)
384. plot(tree4)
385. tree4.1<-ctree(Killed ~ Province + Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit = 10, teststat = "quad", testtype = "Teststatistic"))
386. x11()
387. plot(tree4.1)
388. tree4.2<-ctree(Killed/Year ~ Province, data = province, control = ctree_control(minsplit = 3))
389. plot(tree4.2)
390. #Nothing is important. No specific differences between provinces
391. #Examining cities
392. x11()
393. tree5<-ctree(Attacks ~ City + Year, data = city, control = ctree_control(teststat = "quad", testtype = "Univariate"))
394. plot(tree5)
395. summary(tree5)
396. #Attacks is not changing by city,
397. tree6<-ctree(Killed ~  City, data = city)
398. x11()
399. #Killed not changing every year by city
400. #same for killed/attacks
401.
402. #Running glms for city and province data
403. glm4<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'poisson')
404. glm4.1<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan" ), family = 'nbinom1')
405. glm4.2<-glmmadmb(Attacks ~ Province*Year, data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
406. ICtab(glm4, glm4.1, glm4.2)
407. #negative binomial is the best model
408. glm4.3<-glmmadmb(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), family = 'nbinom', link = 'log')
409. anova(glm4.2, glm4.3)
410. #no interaction
411. glm4.31<-glmmadmb(Attacks ~ Province,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
412. anova(glm4.3, glm4.31) #year is important
413. glm4.32<-glmmadmb(Attacks ~ Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"), family = 'nbinom', link = 'log')
414. anova(glm4.3, glm4.32) #province is important
415. residfitplot(glm4.3) #looks ok
416. locscaleplot(glm4.3) #looks ok
417. disperse(glm4.3, province) #model is under-dispersed 0.813
418. summary(glm4.3)
419. #running tree model with data
420. tree2<-ctree(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =1, mtry = 1))
421. plot(tree2)
422. nodes(tree2,1)
423. #model suggests that when looking at province-wide data
424. library(partykit)
425. z<-subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan")
426. tree2<-tree2<-ctree(Attacks ~ Province + Year,  data = subset(province, Province == "Sindh"|Province == "Punjab"|Province == "KP"|Province == "Baluchistan"|Province == "Gilgit Baltistan"), control = ctree_control(minsplit =10))
427.
428. # @@ RGEDIT LANDMARK @@: Comparing number of ppl killed in terrorism vs anti-Shia violence
430. #removing 2015 and 2016 data
431. #plotting Shia killed and general deaths
432. ge2<-subset(gen, Year <=2014)
433. png("GeneralvsShia.png")
434. plot(killed ~ year, data = subset(sumyearN, year > 2002), xlab = "Year", ylab = "Number of People Killed", ylim = c(0,3200), bty = "l", cex.axis = 1.25, cex.lab = 1.5, type = 'n')
435. lines(killed ~ year, data = subset(sumyearN, year > 2002), col = "#c14a34", lwd= 2)
436. lines(Civilians ~ Year, data = ge2, col = "#6c44c0", lwd= 2)
437. legend(2003, 3000, c("Terrorism", "Shia Killing"), col = c("#6c44c0","#c14a34"), lty = c(1,1), lwd = c(3,3), cex = 1.25)
438. dev.off()
439. zz<-(subset(sumyearN, year >= 2003)\$killed/ge2\$Civilians)*100
440. #plotting percent deaths by Shias
441. plot(killed ~ year, data = subset(sumyearN, year >= 2003), type = "n", xlab = "Year", ylab = "Percent of Shia deaths in Total", ylim = c(0,100), bty = "l", cex.axis = 1.25, cex.lab = 1.5)
442.
443. #running model
444. k1<-glmmadmb(Civilians ~ Year, data = ge2, family = "poisson", link = "log")
445. k2<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom", link = "log")
446. k3<-glmmadmb(Civilians ~ Year, data = ge2, family = "nbinom1", link = "log")
447. AIC(k1, k2, k3)
448. #negative binomial is the best model
449. summary(k2)
450. #Year is significant
451. lines(zz ~ Year, data = ge2, col = "#71b54b", lwd = 2)
452. sum(zz)/length(ge2\$Year)
453. #conditional inference tree
454. tree5<-ctree(zz ~ Year, data = ge2, control = ctree_control(testtype = "Univariate"))
455. plot(tree5)
RAW Paste Data
Top