Advertisement
Guest User

Untitled

a guest
Mar 16th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.26 KB | None | 0 0
  1. customer = read.csv("/Users/adamtomasov/Desktop/data/out.c_hackathon.customer_entity.csv",header = T, na.strings = c("","NA"))
  2. colnames(customer)[colnames(customer)=="entity_id"] <- "customer_id"
  3. colnames(customer)[colnames(customer)=="created_at"] <- "customer_created_at"
  4. colnames(customer)[colnames(customer)=="updated_at"] <- "customer_updated_at"
  5.  
  6. customer_address = read.csv("/Users/adamtomasov/Desktop/data/out.c_hackathon.customer_address_entity.csv")
  7. colnames(customer_address)[colnames(customer_address)=="entity_id"] <- "customer_address_id"
  8. colnames(customer_address)[colnames(customer_address)=="parent_id"] <- "customer_address_customer_id"
  9. colnames(customer_address)[colnames(customer_address)=="created_at"] <- "cusaddr_created_at"
  10. colnames(customer_address)[colnames(customer_address)=="updated_at"] <- "cusaddr_created_at"
  11.  
  12. #merge customer dataframes
  13. customer_df = merge(customer, customer_address,by.x = "customer_id" ,by.y="customer_address_customer_id")
  14.  
  15. #merge sales
  16. data_sales_order = read.csv("/Users/adamtomasov/Desktop/data/out.c_hackathon.train_sales_order.csv")
  17. colnames(data_sales_order)[colnames(data_sales_order)=="entity_id"] <- "order_id"
  18. colnames(data_sales_order)[colnames(data_sales_order)=="created_at"] <- "order_created_at"
  19. colnames(data_sales_order)[colnames(data_sales_order)=="updated_at"] <- "order_created_at"
  20. data_sales_order_item = read.csv("/Users/adamtomasov/Desktop/data/out.c_hackathon.train_sales_order_item.csv")
  21. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="created_at"] <- "order_item_created_at"
  22. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="updated_at"] <- "order_item_created_at"
  23. colnames(data_sales_order_item)[colnames(data_sales_order_item)=="product_id"] <- "product_id_ordered"
  24. sales_df = merge(data_sales_order, data_sales_order_item, by = "order_id")
  25.  
  26.  
  27. #merge data about order with product information
  28. products = read.csv("/Users/adamtomasov/Desktop/data/out.c_hackathon.product_list.csv",header = T, na.strings = c("","NA"))
  29. colnames(products)[colnames(products)=="category_id"] <- "product_category_id"
  30. colnames(products)[colnames(products)=="parent_id"] <- "parent_prod_id"
  31. ps_df = merge(sales_df, products,by.x = "product_id_ordered" ,by.y="product_id")
  32.  
  33. #final merge customer data with product sales data
  34. finall_df = merge(customer_df, ps_df, by.x = "email", by.y = "customer_email")
  35.  
  36.  
  37.  
  38. #Vyjebanie store_id.y, x lebo je to to iste ako store name
  39. #Store.id obsahuje chybajuce hodnoty pricom store_name nema prazdne hodnoty
  40. drops <- c("store_id.x", "store_id.y")
  41. finall_df = finall_df[ , !(names(finall_df) %in% drops)]
  42. head(finall_df)
  43. unique(product_type.x)
  44. unique(product_type.y)
  45.  
  46. library(dplyr)
  47. finall_df %>% group_by(email) %>% summarise(Freq=n())
  48. zgrupovane = group_by(finall_df,email, order)$email
  49.  
  50. status
  51.  
  52. #vytvorenie noveho atributu price_in_EUR a odstranenie base_subtotal, currency atd
  53. change_cur = finall_df$base_subtotal * finall_df$base_to_global_rate
  54. finall_df$price_in_EUR = change_cur
  55. finall_df$base_subtotal = NULL
  56. finall_df$base_currency_code = NULL
  57. finall_df$global_currency_code = NULL
  58.  
  59. #replace NA -> 0
  60. finall_df$gender = replace(finall_df$gender, is.na(finall_df$gender),0)
  61.  
  62.  
  63. write.csv(finall_df, file = "/Users/macbook/Desktop/GymBeam/GymBeam.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement