Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(reshape) #for unpacking df
- library(sqldf) #for executing query for JOIN
- library(RMYSQL) #for connecting to MySQL
- #Data import
- Data <- read.csv("generatedPop.csv", header = T)
- schoolData <- read.csv('schools.csv', header = T, stringsAsFactors=FALSE)
- uniform_sample <-function(n,j,k) sample(j:k,n,replace=T)
- #Unwrap our Household Population based on number of people
- df1 <- untable(Data[,c(1,8)], num=Data[,5])
- IndivID <- rownames(df1)
- df1 <- cbind(IndivID, df1)
- rownames(df1) <- 1:nrow(df1)
- #Unwrap Household Population based on number of children
- df2 <- untable(Data[,c(1,8)], num = Data[,4])
- IndivID <- rownames(df2)
- df2 <- cbind(IndivID, df2)
- rownames(df2) <- 1:nrow(df2)
- #Label all in df2 as "Child"
- df2$Child <- "Child" #label as child
- #Assign Age
- Age <- uniform_sample(nrow(df2),4,18)
- df2 <- cbind(df2, Age)
- School <- rep(0,nrow(df2))
- temp <- vector('character')
- for (x in c(1:nrow(df2))){
- ifelse(df2$Age[x] < 11, School[x] <- sample(x = schoolData[df2$Region[x],c(1,2)], size = 1, prob = c(0.5, 0.5)),
- ifelse(df2$Age[x] < 15, School[x] <- schoolData$Middle.Schools[df2$Region[x]],
- School[x] <- schoolData$High.Schools[df2$Region[x]]))
- }
- df2 <- cbind(df2, School = unlist(School)) #add School into df2
- df2$Region <- NULL #Avoid repeat with df1
- #Join all population and children together via IndivID
- df3 <- sqldf("SELECT IndivID, Age, Child, School
- FROM df1
- LEFT JOIN df2 USING(IndivID)")
- #Assign ages to general population based on exponential distribution (cited)
- df3$Age[is.na(df3$Age)] <- floor(rexp(length(df3$Age[df3$Child != "Child"]), rate = 0.05))+18
- df3$Age[df3$Age > 100] = 100
- df3 <- cbind(ID = c(1:nrow(df3)),df3)
- #Floor IndivID to Household ID
- df3$IndivID <- floor(as.numeric(df3$IndivID))
- colnames(df3)[colnames(df3)=="IndivID"] <- "HouseID"
- individualPop <- df3
- #Health Status/Workplace TBD
- individualPop$Health_Status <- "Healthy"
- individualPop$Workplace <- "TBD"
- #Write to csv file
- write.csv(individualPop, file = "individualPop.csv",row.names=FALSE)
- #Connect to MySQL
- # con <- dbConnect(MySQL(),
- # user="g1081391", password="W1fihasnomeaning",
- # dbname="g1081391", host="mydb.ics.purdue.edu")
- # dbWriteTable(con, "Individuals", individualPop, append = TRUE)
- # #Exiting
- # on.exit(dbDisconnect(MySQL(),
- # user="root", password="MSWang100",
- # dbname="prelab", host="localhost"))
- # all_cons <- dbListConnections(MySQL())
- # for (con in all_cons)
- # dbDisconnect(con)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement