Untitled

iter <- 19
for (d in c(3,4)){
  for (n in c(1,3,10)){
    for (k in c(.1, .3, .6)){
      library(RMySQL)
      library(DBI)
      library(partitions)
      library(igraph)
      library(pbapply)
      ########################################### Population and Database initialization ###################################
      load("listDf_Final.rda") #Loads into object "listDf" of all contacts and base transmission probabilities for each individual as a list of data frames
      #Database Connection
      #con <- dbConnect(MySQL(), user="g1081391", password="Wif1hasnomeaning", dbname="g1081391", host="mydb.ics.purdue.edu")
      #on.exit(dbDisconnect(con))

      con <- dbConnect(MySQL(), user="g1081391", password="Wif1hasnomeaning", dbname="g1081391", host="mydb.ics.purdue.edu")
      on.exit(dbDisconnect(con))

      ################################################# Queries ###############################################

      QEnd <- ") ORDER BY RAND() LIMIT " #ending query fragment to pull out only poisson # of contacts

      # Select a poisson number of random Person_ID who belong to same ____ as our infected indiv
      QWork <- "SELECT I.Person_ID AS Person_ID
      FROM Individuals I, Works_At W
      WHERE I.Person_ID = W.Person_ID AND W.Work_ID = (SELECT W.Work_ID
      FROM Works_At W
      WHERE W.Person_ID = "

      QCommunity <- "SELECT I.Person_ID AS Person_ID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Region_Num = (SELECT H.Region_Num
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID
      AND I.Person_ID = "

      QDensity <- "SELECT R.Pop_Density
      FROM Individuals I, Household H, Region R
      WHERE I.Household_ID = H.Household_ID AND H.Region_Num = R.Region_Num
      AND I.Person_ID = "

      QAvgDensity <- "SELECT AVG(Pop_Density)
      FROM Region;"

      QBus <- "SELECT DISTINCT I.Person_ID AS Person_ID
      FROM Individuals I, Household H, Bus_Routes B
      WHERE I.Household_ID = H.Household_ID AND H.Region_Num = B.Region_Num AND H.PubTrans = 'YES' AND B.Bus_ID IN (SELECT DISTINCT B.Bus_ID
      FROM Individuals I, Household H, Bus_Routes B
      WHERE I.Household_ID = H.Household_ID AND H.Region_Num = B.Region_Num AND H.PubTrans = 'YES' AND I.Person_ID = "

      QHousehold <- "SELECT I.Person_ID AS Person_ID
      FROM Individuals I
      WHERE I.Household_ID = (SELECT I.Household_ID
      FROM Individuals I
      WHERE I.Person_ID = "

      #Multiplier Queries (Attributes of Individuals)

      QWhite <- "SELECT I.Person_ID AS WhiteID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'White';"

      QAsian <- "SELECT I.Person_ID AS AsianID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'Asian';"

      QBlack <- "SELECT I.Person_ID AS BlackID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'Black';"

      QHispanic <- "SELECT I.Person_ID AS HispanicID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'Hispanic';"

      QMultirace <- "SELECT I.Person_ID AS MultiraceID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'Multirace';"

      QAmerIndian <- "SELECT I.Person_ID AS AmerIndianID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Race = 'Amer Indian';"

      QPov <- "SELECT I.Person_ID AS povID
      FROM Individuals I, Household H
      WHERE I.Household_ID = H.Household_ID AND H.Income < 10000;"

      Qinfant <- "SELECT I.Person_ID AS infantID
      FROM Individuals I
      WHERE FLOOR(DATEDIFF(CURDATE(),I.DOB) / 365.25) <= 4;"

      Qchild <- "SELECT I.Person_ID AS childID
      FROM Individuals I
      WHERE FLOOR(DATEDIFF(CURDATE(),I.DOB) / 365.25) BETWEEN 5 AND 18;"

      Qadult <- "SELECT I.Person_ID AS adultID
      FROM Individuals I
      WHERE FLOOR(DATEDIFF(CURDATE(),I.DOB) / 365.25) BETWEEN 19 AND 64;"

      Qsenior <- "SELECT I.Person_ID AS seniorID
      FROM Individuals I
      WHERE FLOOR(DATEDIFF(CURDATE(),I.DOB) / 365.25) > 64;;"

      #Decide who to infect! and how many!
      Qinf_IDs <- "SELECT I.Person_ID AS ID
      FROM Individuals I
      ORDER BY RAND()
      LIMIT "
      ########################################### Parameter intialization ###############################
      popSize <- 140632
      pop <- data.frame(ID = c(1:popSize)) #individual IDs as a column
      duration <- 100 #number of days for simulation
      meanContactsDay <- 13.4 #https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2270306/
      meanDensity <- dbGetQuery(con, QAvgDensity) #mean density of all regions

      ##############################################User inputs##############################################
      diseaseType <- d #Enter type of disease 1= Influenza, 2 = Smallpox, 3 = Measles, 4 = Ebola
      #Initial infected individuals ID by query
      initialNumber <- n #1, 3, or 10
      kPct <- k#k nodes percentage to identify 0.1, 0.3, 0.6
      ####################################################################################################################

      #variable declarations+ initialization:
      tempdf <- dbGetQuery(con, paste0(Qinf_IDs,initialNumber))
      inf_IDs <- tempdf$ID
      totalPeople <- length(pop$ID) #total number of people
      numInfected <- rep(length(inf_IDs),1)
      numRecovered <- rep(0,1)
      numDead <- rep(0,1)
      susceptible <- rep((totalPeople - length(inf_IDs)),1)
      nextInf_IDs <- vector()
      cumDead_ID <- vector()
      cumRecov_ID <- vector()
      totalEdges <- data.frame(ID1 = inf_IDs, ID2 = NA) #initialize for all contacts in sim

      #Disease Rate
      avgMult <- mean(c(1,1,2.4,6)) #taken from plot of disease contagiousness
      contagious <- 1/avgMult
      ifelse(diseaseType == 2, contagious <- 2.4/avgMult,
             ifelse(diseaseType == 3, contagious <- 6/avgMult, contagious <- 1/avgMult)) #contagious multiplier for disease type

      #Normalized to White multipliers for each race to infectious disease (CDC)
      raceMean <- mean(c(3.0,8.1,10.9,8.2,4.1,100))
      raceWeight <- c(8.1/raceMean, 10.9/raceMean, 8.2/raceMean, 4.1/raceMean,4.1/raceMean) #asian, black, hispanic, amerindian, multirace

      povWeight <- 1.5 #No exact multiplier can be found, but two biosecurity papers suggest strong correlations between poverty and influenza

      #Age weight: 0-4, 5-18, 19-64, > 65
      ageWeight <- c(0.0005775, 0.000575, 0.0003175, 0.000855)#transmission weight based on age and seasonal influenza
      #Data taken from summing the columns of the transition probability matrix from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2725959/
      deathRate <- c(0.00005, 0.00003, 0.001,0.027) #for each age group: 0-4, 5-18, 19-64, > 65 for influenza
      RecoveryRate <- 1/6 #rate = 1/ average time spent in infected class

      #Produces logical vectors of Individual IDs of different demographics
      whiteID <- dbGetQuery(con, QWhite)
      asianID <- dbGetQuery(con, QAsian)
      blackID <- dbGetQuery(con, QBlack)
      hispanicID <- dbGetQuery(con, QHispanic)
      multiraceID <- dbGetQuery(con, QMultirace)
      amerindianID <- dbGetQuery(con, QAmerIndian)
      povID <- dbGetQuery(con, QPov)
      infantID <- dbGetQuery(con, Qinfant)
      childID <- dbGetQuery(con, Qchild)
      adultID <- dbGetQuery(con, Qadult)
      seniorID <- dbGetQuery(con, Qsenior)

      #Population characteristics into pop dataframe
      pop$Race[pop$ID %in% whiteID$WhiteID] <- "White"
      pop$Race[pop$ID %in% asianID$AsianID] <- "Asian"
      pop$Race[pop$ID %in% blackID$BlackID] <- "Black"
      pop$Race[pop$ID %in% hispanicID$HispanicID] <- "Hispanic"
      pop$Race[pop$ID %in% multiraceID$MultiraceID] <- "Multirace"
      pop$Race[pop$ID %in% amerindianID$AmerIndianID] <- "American Indian"
      pop$Race[pop$ID %in% asianID$AsianID] <- "Asian"
      age <- dbGetQuery(con, "SELECT FLOOR(DATEDIFF(CURDATE(),I.DOB) / 365.25) AS Age FROM Individuals I;")
      pop$Age <- age$Age
      pop$Income[pop$ID %in% povID] <- "Poverty"
      pop$Income[!(pop$ID %in% povID)] <- "No Poverty"
      #pop$Region <- dbGetQuery(con, "SELECT H.Region_Num FROM Household H, Individuals I WHERE I.Household_ID = H.Household_ID;") FIX THIS REGION ORDERING
      pop$health_status <- 0 #everyone healthy
      pop$health_status[pop$ID %in% inf_IDs] <- 1 #initial infected in health_status
      pop$inf_count <- 0 #0 people initially infected
      pop$k_node <- 0 #everyone not k_node
      pop$Purdue <- "No"
      purdue <- dbGetQuery(con, "SELECT I.Person_ID AS Person_ID FROM Individuals I, Works_At W WHERE I.Person_ID = W.Person_ID AND W.Work_ID = 1000")
      pop$Purdue[purdue$Person_ID] <- "Yes"
      region <- dbGetQuery(con, "SELECT I.Person_ID AS ID, R.Region_Num AS Region FROM Region R, Individuals I, Household H WHERE I.Household_ID = H.Household_ID AND H.Region_Num = R.Region_Num;")
      pop$Region[region$ID] <- region$Region[pop$ID %in% region$ID]
      schools <- dbGetQuery(con, "SELECT I.Person_ID AS ID FROM Individuals I, Works_At W WHERE I.Person_ID = W.Person_ID AND W.Work_ID IN (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);")
      pop$School <- "No"
      pop$School[schools$ID] <- "Yes"
      bus <- dbGetQuery(con, "SELECT I.Person_ID AS ID FROM Individuals I, Household H WHERE I.Household_ID = H.Household_ID AND H.PubTrans = 'YES';")
      pop$Bus <- "No"
      pop$Bus[bus$ID] <- "Yes"
      ################################################ Functions ###########################################
      #Disease factor function
      f <- function(df, contagious){
        within(df, Weight <- Weight * (contagious)) ## CHANGE DISEASE MULTIPLIER HERE
      }

      #randomize order of contacts
      randomize <- function(df)
      {
        df <- df[sample(nrow(df)),]
      }

      #number of contacts shortened to 50
      shorten <- function(df)
      {
        df <- df[c(1:50),]
      }

      #Determines number of contacts per person per day, and split up into work, bus community
      contacts <- function(meanContactsDay){
        nContacts <- rpois(1,meanContactsDay)
        if(nContacts >50 ){nContacts = 50}
        else if (nContacts == 0){nContacts = 1}
        return(nContacts)
      }

      #Returns queried edges by concatenating strings
      getEdges <- function(inf_ID, Query, QueryEnd, numContacts){
        SQL <- paste0(Query, inf_ID, QueryEnd, numContacts, ";")
        SQL <- gsub("[\n]", " ", SQL)
        edges <- dbGetQuery(con, SQL) #concatenates inf_ID with query
        return(edges)
      }

      #Gets multiplier for region density for community contacts
      getDenMultiplier <- function(inf_ID, QDensity, meanDensity){
        SQL <- paste0(QDensity, inf_ID, ";")
        SQL <- gsub("[\n]", " ", SQL)
        density <- dbGetQuery(con, SQL) #concatenates inf_ID with query
        multiplier <- density/meanDensity
        return(as.numeric(multiplier))
      }

      raceMult <- function(all_edges, asianID, blackID, hispanicID, amerindianID, multiraceID, raceWeight){
        all_edges$Weight[all_edges$Person_ID %in% asianID$AsianID] <-  all_edges$Weight[all_edges$Person_ID %in% asianID$AsianID] * raceWeight[1]
        all_edges$Weight[all_edges$Person_ID %in% blackID$BlackID] <-  all_edges$Weight[all_edges$Person_ID %in% blackID$BlackID] * raceWeight[2]
        all_edges$Weight[all_edges$Person_ID %in% hispanicID$HispanicID] <-  all_edges$Weight[all_edges$Person_ID %in% hispanicID$HispanicID] * raceWeight[3]
        all_edges$Weight[all_edges$Person_ID %in% amerindianID$AmerIndianID] <-  all_edges$Weight[all_edges$Person_ID %in% amerindianID$AmerIndianID] * raceWeight[4]
        all_edges$Weight[all_edges$Person_ID %in% multiraceID$MultiraceID] <-  all_edges$Weight[all_edges$Person_ID %in% multiraceID$MultiraceID] * raceWeight[5]
        return(all_edges)
      }

      ageMult <- function(all_edges, infantID, childID, adultID, seniorID, ageWeight){
        all_edges$Weight[all_edges$Person_ID %in% infantID$infantID] <- all_edges$Weight[all_edges$Person_ID %in% infantID$infantID] * ageWeight[1]
        all_edges$Weight[all_edges$Person_ID %in% childID$childID] <- all_edges$Weight[all_edges$Person_ID %in% childID$childID] * ageWeight[2]
        all_edges$Weight[all_edges$Person_ID %in% adultID$adultID] <- all_edges$Weight[all_edges$Person_ID %in% adultID$adultID] * ageWeight[3]
        all_edges$Weight[all_edges$Person_ID %in% seniorID$seniorID] <- all_edges$Weight[all_edges$Person_ID %in% seniorID$seniorID] * ageWeight[4]
        return(all_edges)
      }


      Weight2Prob <- function(all_edges){
        all_edges$Weight <- 1 - exp(-all_edges$Weight)
        return(all_edges)
      }

      gets_sick <- function(all_edges){
        randNo <- runif(length(all_edges$Weight),0,1)
        sickID <- all_edges$Person_ID[which(randNo < all_edges$Weight)]
        return(sickID)
      }

      dead <- function(inf_IDs, diseaseType, infantID, childID, adultID, seniorID, deathRate){
        diseaseWeight <- 1 #base
        if (diseaseType == 2){diseaseWeight <- 150 #smallpox
        }else if (diseaseType == 3){diseaseWeight <- 3 #measles
        }else if (diseaseType == 4){diseaseWeight <- 500} #Ebola
        inf <- data.frame(Person_ID = inf_IDs, Weight = diseaseWeight)
        temp <- ageMult(inf, infantID, childID, adultID, seniorID, deathRate)
        inf$Weight <- temp$Weight #put updated weight onto inf_people
        randNo <- runif(length(inf_IDs),0,1)
        dead_ID <- inf$Person_ID[which(randNo < inf$Weight)]
        return(dead_ID)
      }

      recovered <- function(inf_IDs, diseaseType, RecoveryRate, dead_ID){
        if (diseaseType == 2){RecoveryRate <- RecoveryRate*(6/20)} #smallpox #See project report citations
        else if (diseaseType == 3){RecoveryRate <- RecoveryRate*(6/8)} #Measels
        else if (diseaseType == 4){RecoveryRate <- RecoveryRate*(6/5)} #Ebola ? data
        inf <- data.frame(Person_ID = inf_IDs, Weight = RecoveryRate)
        randNo <- runif(length(inf_IDs),0,1)
        recov_ID <- inf$Person_ID[which(randNo < inf$Weight)]
        recov_ID <- recov_ID[!(recov_ID %in% dead_ID)]
        return(recov_ID)
      }
      #
      ###########################################Disease spread model#########################################
      #Loaded edge list of POSSIBLE contacts per person precalculated in EdgelistRetrieve.R


      listDf2 <- pblapply(listDf, f, contagious) #factor in disease type multiplier
      listDf2 <- pblapply(listDf2, Weight2Prob) #Converts weight to probability with 1-exp(-time)
      samp <- list()
      #List of sample indicies for each number of contacts
      for (num in c(1:50)){
        samp[[num]] <- sample(50, num)
      }

      for (day in 2:duration){ #time step loop, starting on day 2
        cat("Current Day:", day, "\n")
        for (inf_ID in inf_IDs){ #loop through all infected individuals on day "day"

          #Acquire contacted individuals + weight values
          numContacts <- contacts(meanContactsDay) #function to determine poisson random variable total contacts
          contactsDay <- listDf2[[inf_ID]][samp[[numContacts]],]  #use corresponding random sample from the 50 contacts of this person
          #compiles all edges for entire simulation (for network viz)
          #tempID <- data.frame(ID1 = rep(inf_ID, nrow(contactsDay)), ID2 = contactsDay$Person_ID)
          #totalEdges <- merge(totalEdges, tempID, all = T) TAKES TOO LONG

          #Factor in race and disease
          sickID <- gets_sick(contactsDay) #returns IDs of all people who newly get sick :

          #gets_sick function (in: data frame of contacted individID and weights, infection rate | out: individID of infected)
          pop$inf_count[inf_ID] <- pop$inf_count[inf_ID] + length(sickID) #Updates # people this individual infected
          nextInf_IDs <- c(nextInf_IDs, sickID) #compiles those who will be sick tomorrow
        }
        #######End of Day i! Who recovered? Who died? ###############
        nextInf_IDs <- unique(nextInf_IDs) #gets rid of repeated contacts during that day
        nextInf_IDs <- union(nextInf_IDs, inf_IDs) #include those who were infected at start of day with nextInf_IDs
        if (length(inf_IDs) != 0){ #only see who is dead/recovered if ppl are infected
          dead_ID <- dead(inf_IDs, diseaseType, infantID, childID, adultID, seniorID, deathRate)
          cumDead_ID <- union(cumDead_ID, dead_ID)
          recov_ID <- recovered(inf_IDs, diseaseType, RecoveryRate, cumDead_ID)
          cumRecov_ID <- union(cumRecov_ID, recov_ID)  #gets cumulative unique IDs of recovered people (for igraph)
        }
        inf_IDs <- nextInf_IDs[!(nextInf_IDs %in% c(cumRecov_ID, cumDead_ID))] #remove those who recovered or died from infected list, forms group of infected for next day
        #Update health status of pop data frame
        pop$health_status[pop$ID %in% inf_IDs] <- 1
        pop$health_status[pop$ID %in% recov_ID] <- 2
        pop$health_status[pop$ID %in% dead_ID] <- 3
        #Get the number of individuals infected, recovered, dead, susceptible for this day into vector index
        numInfected[day] <- length(inf_IDs)
        numRecovered[day] <- length(cumRecov_ID)
        numDead[day] <- length(cumDead_ID)
        susceptible[day] <- totalPeople - numInfected[day] - numRecovered[day] - numDead[day]
        #New live-updating infected group for next day
        nextInf_IDs <- inf_IDs
        cat(numInfected[day], "Infected as of Day", day, "\n")
      }

      #If no one infected on last day of simulation, extend data until end of 100 days.
      if(numInfected[length(numInfected)] == 0){numInfected[length(numInfected):100] <- 0
      numRecovered[length(numRecovered):100] <- numRecovered[length(numRecovered)]
      numDead[length(numDead):100] <- numDead[length(numDead)]
      susceptible[length(susceptible):100] <- susceptible[length(susceptible)]}

      plot(0,0,xlim = c(0,duration + 1),ylim = c(0,totalPeople), type = "n", xlab = "Day", ylab = "Number of People", main = "SIR Model of West Lafayette and Lafayette Population over Time")
      legend(80,120000, c("Infected", "Recovered", "Dead", "Susceptible"), col = c("red", "green", "black", "blue"), lwd = 1)
      lines(1:duration, numInfected, col = "red")
      lines(1:duration, numRecovered, col = "green")
      lines(1:duration, numDead, col = "black")
      lines(1:duration, susceptible, col = "blue")
      #nextInf_Ids: real-time updating ID vector of currently sick
      #inf_Ids: ID vector of sick at start of day (have chance of recover/die at end of day)
      #sickID: ID vector of those who get sick from one inf_ID iteration
      #inf_Id : infected individual lcv

      #K-Node determination
      k_number <- floor(kPct * max(numInfected))
      k_nodes <- head(pop[order(-pop$inf_count),],n = k_number) #Sort in descending order by scored individuals (inf_count)
      #Number of rows to use based on user input of k-node pct
      pop$k_node[pop$ID %in% k_nodes$ID] <- 1 #Assign K-node status in pop

      infDays <- sum(numInfected) #Use in cost calculation for policy
      #pop is now our dataset for machine learning with pop$k_node as the prediction variable
      #
      # #Percentage of SIRD from total population vectors for SIRD plot (input to animate function)
      # pctSusc <- (susceptible/totalPeople) * 100
      # pctInf <- (infected/totalPeople)*100
      # pctRecov <- (recovered/totalPeople)*100
      # pctDead <- (dead/totalPeople)*100
      #
      # #igraph visualization ONLY IF WE HAVE TIME
      # netEdges$Weight <- NULL #unweighted network (probably too large of network to use weights)
      # simplify(netEdges) #remove redundant edges
      # g <- graph_from_edgelist(netEdges, directed=F, vertices=nodes)
      # #color of nodes
      # V(g)$color <- ifelse(V(g)$ID1 %in% inf_Ids | V(g)$ID2 %in% inf_Ids, "red",
      #                      ifelse(V(g)$ID1 %in% cumRecov_ID | V(g)$ID2 %in% cumRecov_ID, "green", "black"))
      #
      #Disconnect from DB

      save(pop,numInfected, numRecovered, numDead, susceptible, infDays, file = sprintf("Case%d.rda", iter))
      all_cons <- dbListConnections(MySQL())
      for (con in all_cons)
        dbDisconnect(con)
      rm(list=setdiff(ls(), c("iter", "d", "n", "k"))) #clears workspace
      iter <- iter + 1
    }
  }
}