Untitled

library(sp)
library(spatstat)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(Rfast)
library(fields)
load("SHAMP_AEGISS_DATA.RData")
objects()

casesidx <- over(SpatialPoints(cbind(x, y), proj4string = CRS(proj4string(shamp))),
                 geometry(shamp))
shamp$count <- sapply(1:146, function(x) {
  sum(casesidx == x)
})
crds <- coordinates(shamp)
dat <- as.data.frame(shamp)


casesxy<-cbind(x,y)
listofones<-rep(1,nrow(casesxy))
casesxy<-cbind(casesxy,listofones)
colnames(casesxy)<-c("x","y","label")

listofzeroes<-rep(0,nrow(controls))
controlsxy<-cbind(controls,listofzeroes)
colnames(controlsxy)<-c("x","y","label")

gasdata<-rbind(casesxy,controlsxy)
gasdata<-as.data.frame(gasdata)
gasdata$label<-as.factor(gasdata$label)

casesxy<-as.data.frame(casesxy)
controlsxy<-as.data.frame(controlsxy)
### Exploratory Analysis ###

head(dat)

result1 <- aov(Health~Environment*Income,data = dat)
print(summary(result1))

result2 <- aov(Health~Environment+Income,data = dat)
print(summary(result2))

anova(result1,result2) # Health is affected by Environment and Income.

#

result3 <- aov(Income~Education*Employment,data = dat)
print(summary(result3))

result4 <- aov(Income~Education+Employment,data = dat)
print(summary(result4))

anova(result3,result4) # Income is earned similarly through education in either low or high employment.

#

result5 <- aov(Health~Barriers,data = dat)
print(summary(result5)) # No significant relation between Health and Barriers.

result6 <- aov(Health~Crime,data = dat)
print(summary(result6)) # Significant relation between Health and Crime.

plot(dat$Crime,dat$Health) # shows clear correlation. Health measures premature death, high health values means more death.

## Conclude that Health is affected by Environment, Income, and Crime. We have Income affected by Employment and Education.

# Plot Distribution of Deprivation of Southhampton
qplot(IMD, data = dat, geom = "density", alpha = I(0.5), main = "Distribution of Deprivation",
       xlab = "IMD Score", ylab = "Density")

### South Hampton plot of cases and controls ###

simpwin <- simplify.owin(win, d = 200)
gasppp <- ppp(x = gasdata$x, y = gasdata$y,marks = gasdata$label,
               window = simpwin)
plot(gasppp, chars = c(1, 3), cols = c("red", "black"))

### Testing for spatial clustering for cases ###

patt1 <- ppp(x = casesxy$x, y = casesxy$y,
              window = simpwin)

k1 <- Kest(patt1)
plot(k1,pch = "+")

str(k1)

r1 <- k1$r
kest1 <- k1$iso

plot(r1, kest1, type = "l")
curve(pi * x^2, col = "red", lty = "dashed", add = TRUE)

# intensity
lambdahat1 <- patt1$n/area.owin(patt1$window)
lambdahat1 # 2.006403e-05


nsims <- 20
kfuns1 <- c() # initialise the object
for (i in 1:nsims) {
  simdata <- rpoispp(lambda = lambdahat1, win = simpwin)
  ksim <- Kest(simdata, r = r1)$iso
  kfuns1 <- cbind(kfuns1, ksim)
}

qts <- apply(kfuns1, 1, quantile, probs = c(0.025, 0.975))

plot(NULL, xlab = "r", ylab = "Estimated K function", xlim = range(r1),
     ylim = range(kfuns1), main="Cluster for cases")
polygon(c(r1, rev(r1)), c(qts[1, ], rev(qts[2, ])), col = "lightgrey",
        border = NA)
lines(r1, kest1)
curve(pi * x^2, col = "red", lty = "dashed", add = TRUE)

### Testing for spatial clustering for controls ###

patt2 <- ppp(x = controlsxy$x, y = controlsxy$y,
             window = simpwin)

k2 <- Kest(patt2)
plot(k2,pch = "+")

str(k2)

r2 <- k2$r
kest2 <- k2$iso

plot(r2, kest2, type = "l")
curve(pi * x^2, col = "red", lty = "dashed", add = TRUE)

# intensity
lambdahat2 <- patt2$n/area.owin(patt2$window)
lambdahat2 # 2.010428e-05


nsims <- 20
kfuns2 <- c() # initialise the object
for (i in 1:nsims) {
  simdata2 <- rpoispp(lambda = lambdahat2, win = simpwin)
  ksim2 <- Kest(simdata2, r = r1)$iso
  kfuns2 <- cbind(kfuns2, ksim2)
}

qts2 <- apply(kfuns2, 1, quantile, probs = c(0.025, 0.975))

plot(NULL, xlab = "r", ylab = "Estimated K function", xlim = range(r2),
     ylim = range(kfuns2), main="Cluster for controls")
polygon(c(r2, rev(r2)), c(qts2[1, ], rev(qts2[2, ])), col = "lightgrey",
        border = NA)
lines(r2, kest2)
curve(pi * x^2, col = "red", lty = "dashed", add = TRUE)

# Evidence of a cluster process since K-function lies above the tolerance envelope.
# reduced number of sims to 20 due to limited processing power

cases <- patt1
contr <- patt2

kcases <- Kest(cases)
rk <- kcases$r
kcontr <- Kest(contr, r = rk)

D <- kcases$iso - kcontr$iso
plot(rk, D, xlab = "r", ylab = "D(r)", type = "l")

#

Dsim <- c() # we'll store our result in this object
xk <- c(cases$x, contr$x)
yk <- c(cases$y, contr$y)
cc <- c(rep("case", cases$n), rep("control", contr$n))
for (i in 1:nsims) {
  cc <- sample(cc)
  simcases <- ppp(x = xk[cc == "case"], y = yk[cc == "case"],
                  window = simpwin)
  simcontr <- ppp(x = xk[cc == "control"], y = yk[cc == "control"],
                  window = simpwin)
  dsim <- Kest(simcases, r = rk, correction = "isotropic")$iso -
    Kest(simcontr, r = rk, correction = "isotropic")$iso
  Dsim <- cbind(Dsim, dsim)
}

qts3 <- apply(Dsim, 1, quantile, probs = c(0.025, 0.975))

# Plot

plot(NULL, xlab = "r", ylab = "Estimated K function", xlim = range(rk),
     ylim = range(Dsim))
polygon(c(rk, rev(rk)), c(qts3[1, ], rev(qts3[2, ])), col = "lightgrey",
        border = NA)
lines(rk, D)
abline(h = 0, col = "red", lty = "dashed")

### Estimating Intensity ###

den1 <- density(patt1, positive = TRUE)
plot(den1,main="Density of cases")

xvals1 <- den1$xcol # the x-values
yvals1 <- den1$yrow # the y-values
zvals1 <- t(den1$v) # the estimated intensity, transposed
image.plot(xvals1, yvals1, zvals1, asp = 1)

### Generalised Linear Model ###