Advertisement
Guest User

Untitled

a guest
Mar 16th, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.80 KB | None | 0 0
  1. customer = read.csv("/Users/macbook/Desktop/GymBeam/customer-entity.csv",header = T, na.strings = c("","NA"))
  2. colnames(customer)[colnames(customer)=="entity_id"] <- "customer_id"
  3. colnames(customer)[colnames(customer)=="created_at"] <- "customer_created_at"
  4. colnames(customer)[colnames(customer)=="updated_at"] <- "customer_updated_at"
  5.  
  6. customer_address = read.csv("/Users/macbook//Desktop/GymBeam/customer-address-entity.csv")
  7. colnames(customer_address)[colnames(customer_address)=="entity_id"] <- "customer_address_id"
  8. colnames(customer_address)[colnames(customer_address)=="parent_id"] <- "customer_address_customer_id"
  9. colnames(customer_address)[colnames(customer_address)=="created_at"] <- "cusaddr_created_at"
  10. colnames(customer_address)[colnames(customer_address)=="updated_at"] <- "cusaddr_created_at"
  11.  
  12. #merge customer dataframes
  13. customer_df = merge(customer, customer_address,by.x = "customer_id" ,by.y="customer_address_customer_id")
  14.  
  15. #merge sales
  16. data_sales_order = read.csv("/Users/macbook//Desktop/GymBeam/train-sales-order.csv")
  17. colnames(data_sales_order)[colnames(data_sales_order)=="entity_id"] <- "order_id"
  18. colnames(data_sales_order)[colnames(data_sales_order)=="created_at"] <- "order_created_at"
  19. colnames(data_sales_order)[colnames(data_sales_order)=="updated_at"] <- "order_created_at"
  20. data_sales_order_item = read.csv("/Users/macbook//Desktop/GymBeam/train-sales-order-item.csv")
  21. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="created_at"] <- "order_item_created_at"
  22. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="updated_at"] <- "order_item_created_at"
  23. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="product_id"] <- "product_id_ordered"
  24. sales_df = merge(data_sales_order, data_sales_order_item, by = "order_id")
  25.  
  26.  
  27. #merge data about order with product information
  28. products = read.csv("/Users/macbook//Desktop/GymBeam/product-list.csv",header = T, na.strings = c("","NA"))
  29. colnames(products)[colnames(products)=="category_id"] <- "product_category_id"
  30. colnames(products)[colnames(products)=="parent_id"] <- "parent_prod_id"
  31. ps_df = merge(sales_df, products,by.x = "product_id_ordered" ,by.y="product_id")
  32.  
  33. #final merge customer data with product sales data
  34. finall_df = merge(customer_df, ps_df, by.x = "email", by.y = "customer_email")
  35.  
  36.  
  37.  
  38. #Vyjebanie store_id.y, x lebo je to to iste ako store name
  39. #Store.id obsahuje chybajuce hodnoty pricom store_name nema prazdne hodnoty
  40. drops <- c("store_id.x", "store_id.y")
  41. finall_df = finall_df[ , !(names(finall_df) %in% drops)]
  42. head(finall_df)
  43. unique(product_type.x)
  44. unique(product_type.y)
  45.  
  46. library(dplyr)
  47. finall_df %>% group_by(email) %>% summarise(Freq=n())
  48. zgrupovane = group_by(finall_df,email, order)$email
  49.  
  50.   status
  51.  
  52. write.csv(finall_df, file = "/Users/macbook/Desktop/GymBeam/GymBeam.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement