Advertisement
Guest User

Untitled

a guest
Mar 25th, 2018
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. library(reshape) #for unpacking df
  2. library(sqldf) #for executing query for JOIN
  3. library(RMYSQL) #for connecting to MySQL
  4.  
  5. #Data import
  6. Data <- read.csv("generatedPop.csv", header = T)
  7. schoolData <- read.csv('schools.csv', header = T, stringsAsFactors=FALSE)
  8.  
  9. uniform_sample <-function(n,j,k) sample(j:k,n,replace=T)
  10. #Unwrap our Household Population based on number of people
  11. df1 <- untable(Data[,c(1,8)], num=Data[,5])
  12. IndivID <- rownames(df1)
  13. df1 <- cbind(IndivID, df1)
  14. rownames(df1) <- 1:nrow(df1)
  15.  
  16. #Unwrap Household Population based on number of children
  17. df2 <- untable(Data[,c(1,8)], num = Data[,4])
  18. IndivID <- rownames(df2)
  19. df2 <- cbind(IndivID, df2)
  20. rownames(df2) <- 1:nrow(df2)
  21. #Label all in df2 as "Child"
  22. df2$Child <- "Child" #label as child
  23.  
  24. #Assign Age
  25. Age <- uniform_sample(nrow(df2),4,18)
  26. df2 <- cbind(df2, Age)
  27. School <- rep(0,nrow(df2))
  28. temp <- vector('character')
  29. for (x in c(1:nrow(df2))){
  30. ifelse(df2$Age[x] < 11, School[x] <- sample(x = schoolData[df2$Region[x],c(1,2)], size = 1, prob = c(0.5, 0.5)),
  31. ifelse(df2$Age[x] < 15, School[x] <- schoolData$Middle.Schools[df2$Region[x]],
  32. School[x] <- schoolData$High.Schools[df2$Region[x]]))
  33. }
  34. df2 <- cbind(df2, School = unlist(School)) #add School into df2
  35.  
  36. df2$Region <- NULL #Avoid repeat with df1
  37.  
  38. #Join all population and children together via IndivID
  39. df3 <- sqldf("SELECT IndivID, Age, Child, School
  40. FROM df1
  41. LEFT JOIN df2 USING(IndivID)")
  42.  
  43. #Assign ages to general population based on exponential distribution (cited)
  44. df3$Age[is.na(df3$Age)] <- floor(rexp(length(df3$Age[df3$Child != "Child"]), rate = 0.05))+18
  45. df3$Age[df3$Age > 100] = 100
  46. df3 <- cbind(ID = c(1:nrow(df3)),df3)
  47.  
  48. #Floor IndivID to Household ID
  49. df3$IndivID <- floor(as.numeric(df3$IndivID))
  50. colnames(df3)[colnames(df3)=="IndivID"] <- "HouseID"
  51. individualPop <- df3
  52.  
  53. #Health Status/Workplace TBD
  54. individualPop$Health_Status <- "Healthy"
  55. individualPop$Workplace <- "TBD"
  56. #Write to csv file
  57. write.csv(individualPop, file = "individualPop.csv",row.names=FALSE)
  58.  
  59. #Connect to MySQL
  60. # con <- dbConnect(MySQL(),
  61. # user="g1081391", password="W1fihasnomeaning",
  62. # dbname="g1081391", host="mydb.ics.purdue.edu")
  63. # dbWriteTable(con, "Individuals", individualPop, append = TRUE)
  64. # #Exiting
  65. # on.exit(dbDisconnect(MySQL(),
  66. # user="root", password="MSWang100",
  67. # dbname="prelab", host="localhost"))
  68. # all_cons <- dbListConnections(MySQL())
  69. # for (con in all_cons)
  70. # dbDisconnect(con)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement