Advertisement
Guest User

Untitled

a guest
May 16th, 2019
317
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.75 KB | None | 0 0
  1. setwd("/Users/liufangyi/Documents/sql/CP4")
  2.  
  3. list.files()
  4.  
  5. install.packages('RPostgreSQL')
  6. install.packages('dplyr')
  7. install.packages('tidyr')
  8. library('RPostgreSQL')
  9. library('dplyr')
  10. library('tidyr')
  11. drv <- dbDriver('PostgreSQL')
  12.  
  13. con <- dbConnect(drv, dbname = 'web_4',
  14. host = 's19db.apan5310.com', port = 50202,
  15. user = 'postgres', password = '64ddswmi')
  16.  
  17. df1 <- read.csv(file = 'DIM_ONLINE_PANELIST - DIM_ONLINE_PANELIST.csv',stringsAsFactors=FALSE)
  18. df2 <- read.csv(file = 'FACT_ONLINE_BEHAVIOR - FACT_ONLINE_BEHAVIOR.csv',stringsAsFactors=FALSE)
  19. df3 <- read.csv(file = 'DIM_ONLINE_DICTIONARY_CATEGORY - DIM_ONLINE_DICTIONARY_CATEGORY.csv',stringsAsFactors=FALSE)
  20. df4 <- read.csv(file = 'CONNEXIONS(update).csv',stringsAsFactors=FALSE)
  21. df4 <- df4[1:53,]
  22. df5 <- read.csv(file = 'employment.csv',stringsAsFactors=FALSE)
  23. df1 <- df1 [1:100000,]
  24. sub <- read.csv(file = 'sub.csv',stringsAsFactors=FALSE)
  25.  
  26. #employment
  27. employment <- df5
  28. dbWriteTable(con,name = 'employment', value = employment,
  29. row.names = FALSE, append = TRUE)
  30. #race
  31. df6 <- df4 %>% select(race_id,ethnicity) %>% distinct()
  32. df6 <- bind_cols('employee_id' = sample(101:106,nrow(df6), replace = TRUE),df6)
  33. dbWriteTable(con,name = 'race', value = df6,
  34. row.names = FALSE, append = TRUE)
  35.  
  36. #education
  37. df6 <- df4 %>% select(edu_id,edu_level) %>% distinct()
  38. df6 <- bind_cols('employee_id' = sample(102:107,nrow(df6)),df6)
  39. dbWriteTable(con,name = 'education', value = df6,
  40. row.names = FALSE, append = TRUE)
  41.  
  42. #house_income
  43. df6 <- df4 %>% select(hi_id,hi_level) %>% distinct()
  44. df6 <- bind_cols('employee_id' = sample(104:111,nrow(df6)),df6)
  45. dbWriteTable(con,name = 'housing_income', value = df6,
  46. row.names = FALSE, append = TRUE)
  47.  
  48. #occpucation
  49. df6 <- df4 %>% select(oc_id,occupation) %>% distinct()
  50. df6 <- bind_cols('employee_id' = sample(108:114,nrow(df6)),df6)
  51. dbWriteTable(con,name = 'occupation', value = df6,
  52. row.names = FALSE, append = TRUE)
  53.  
  54. #age
  55. df6 <-df4 %>% select(age_id,age_range) %>% distinct()
  56. df6 <- bind_cols('employee_id' = sample(102:108,nrow(df6)),df6)
  57. dbWriteTable(con,name = 'age', value = df6,
  58. row.names = FALSE, append = TRUE)
  59.  
  60. #urbanicity
  61. df6 <- df4 %>% select(urb_id,urb_level) %>% distinct()
  62. df6 <- bind_cols('employee_id' = sample(107:110,nrow(df6)),df6)
  63. dbWriteTable(con,name = 'urbanicity', value = df6,
  64. row.names = FALSE, append = TRUE)
  65.  
  66. #technodoption#####
  67. df6 <- df4 %>% select(tech_id,tech_level) %>% distinct()
  68. df6 <- bind_cols('employee_id' = sample(109:114,nrow(df6)),df6)
  69. df6 <- df6[-c(4),]
  70. dbWriteTable(con,name = 'technodoption_class', value = df6,
  71. row.names = FALSE, append = TRUE)
  72.  
  73. # tenure
  74. df6 <- df4 %>% select(tenure_id,tenure_status) %>% distinct()
  75. df6 <- bind_cols('employee_id' = sample(103:108,nrow(df6)),df6)
  76. dbWriteTable(con,name = 'tenure', value = df6,
  77. row.names = FALSE, append = TRUE)
  78.  
  79. #companion
  80. df6 <- df4 %>% select(comp_id,comp_status) %>% distinct()
  81. df6 <- bind_cols('employee_id' = sample(101:104,nrow(df6)),df6)
  82. dbWriteTable(con,name = 'companion', value = df6,
  83. row.names = FALSE, append = TRUE)
  84.  
  85. #action_time
  86. df6 <- df2 %>% select('action_time' = 'ACTION_TIMESTAMP', 'action_date_id' = 'ACTION_DATE_ID',
  87. 'action_time_id' = 'ACTION_TIME_ID') %>% distinct()
  88. dbWriteTable(con,name = 'action_time', value = df6,
  89. row.names = FALSE, append = TRUE)
  90.  
  91.  
  92. #Cat_ID, category, subcategory
  93. sub <- read.csv("sub.csv")
  94. sub <- sub[1:86,]
  95. df10 <- df3 %>% select('category_id' = 'CATEGORY_ID', 'subcategory_name' = 'SUBCATEGORY_NAME') %>% distinct()
  96. df10$subcategory_name <- factor(df10$subcategory_name)
  97. sub$subcategory_name <- factor(sub$subcategory_name)
  98. levels(sub$subcategory_name) = levels(df10$subcategory_name)
  99. df20 <- df10 %>% inner_join(y = sub, by = 'subcategory_name')
  100. Cat_IDs <- df20 %>% select(category_id, subcategory_id) %>% distinct()
  101. Category <- df3 %>% select('category_id' = 'CATEGORY_ID', 'category_name' = 'CATEGORY_NAME') %>% distinct()
  102. subcategories <- sub
  103.  
  104. dbWriteTable(con,name = 'categories', value = Category,
  105. row.names = FALSE, append = TRUE)
  106. dbWriteTable(con,name = 'subcategories', value = subcategories,
  107. row.names = FALSE, append = TRUE)
  108.  
  109. dbWriteTable(con,name = 'category_ids', value = Cat_IDs,
  110. row.names = FALSE, append = TRUE)
  111.  
  112.  
  113.  
  114.  
  115. #segmentation
  116. df6 <- df4 %>% select('connections_code' = 'CODE', 'segment_nickname' = 'SEGMENT_NICKNAME', urb_id, hi_id, age_id, comp_id,
  117. tenure_id, edu_id, race_id, tech_id, oc_id) %>% distinct()
  118. dbWriteTable(con,name = 'segmentation', value = df6,
  119. row.names = FALSE, append = TRUE)
  120. #subdomain
  121. df6 <- df2 %>% select('domain_name' = 'DOMAIN_NAME', 'subdomain_name' = 'SUBDOMAIN_NAME') %>% distinct(domain_name,.keep_all = TRUE)
  122. dbWriteTable(con,name = 'subdomain', value = df6,
  123. row.names = FALSE, append = TRUE)
  124.  
  125. #online_behavior
  126. df6 <- df2 %>% select('id' = 'ID', 'meter_member_hk' = 'METER_MEMBER_HK', 'category_id' = 'CATEGORY_ID', 'subcategory_id' = 'SUBCATEGORY_ID',
  127. 'domain_name' = 'DOMAIN_NAME', 'referring_domain' = 'REFERRING_DOMAIN', 'duration_in_sec' = 'DURATION_IN_SECONDS',
  128. 'action_time' = 'ACTION_TIMESTAMP') %>% distinct(meter_member_hk,.keep_all = TRUE)
  129. df6 <- na.omit(df6)
  130. dbWriteTable(con,name = 'online_behavior', value = df6,
  131. row.names = FALSE, append = TRUE)
  132.  
  133. #user_info
  134. df6 <- df1 %>% select('meter_member_hk' = 'METER_MEMBER_HK', 'dma'='DMA', 'connections_code' = 'CONNEXIONS_CODE') %>% distinct(meter_member_hk,.keep_all = TRUE)
  135. df6 <- df6 %>% dplyr::filter(!(connections_code==''))
  136. df6 <- unique(df6)
  137. df6 <- subset(df6,connections_code <=53)
  138. dbWriteTable(con,name = 'user_info', value = df6,
  139. row.names = FALSE, append = TRUE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement