Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- customer = read.csv("/Users/macbook/Desktop/GymBeam/customer-entity.csv",header = T, na.strings = c("","NA"))
- colnames(customer)[colnames(customer)=="entity_id"] <- "customer_id"
- colnames(customer)[colnames(customer)=="created_at"] <- "customer_created_at"
- colnames(customer)[colnames(customer)=="updated_at"] <- "customer_updated_at"
- customer_address = read.csv("/Users/macbook//Desktop/GymBeam/customer-address-entity.csv")
- colnames(customer_address)[colnames(customer_address)=="entity_id"] <- "customer_address_id"
- colnames(customer_address)[colnames(customer_address)=="parent_id"] <- "customer_address_customer_id"
- colnames(customer_address)[colnames(customer_address)=="created_at"] <- "cusaddr_created_at"
- colnames(customer_address)[colnames(customer_address)=="updated_at"] <- "cusaddr_created_at"
- #merge customer dataframes
- customer_df = merge(customer, customer_address,by.x = "customer_id" ,by.y="customer_address_customer_id")
- #merge sales
- data_sales_order = read.csv("/Users/macbook//Desktop/GymBeam/train-sales-order.csv")
- colnames(data_sales_order)[colnames(data_sales_order)=="entity_id"] <- "order_id"
- colnames(data_sales_order)[colnames(data_sales_order)=="created_at"] <- "order_created_at"
- colnames(data_sales_order)[colnames(data_sales_order)=="updated_at"] <- "order_created_at"
- data_sales_order_item = read.csv("/Users/macbook//Desktop/GymBeam/train-sales-order-item.csv")
- colnames(data_sales_order_item)[colnames(data_sales_order_item)=="created_at"] <- "order_item_created_at"
- colnames(data_sales_order_item)[colnames(data_sales_order_item)=="updated_at"] <- "order_item_created_at"
- colnames(data_sales_order_item)[colnames(data_sales_order_item)=="product_id"] <- "product_id_ordered"
- sales_df = merge(data_sales_order, data_sales_order_item, by = "order_id")
- #merge data about order with product information
- products = read.csv("/Users/macbook//Desktop/GymBeam/product-list.csv",header = T, na.strings = c("","NA"))
- colnames(products)[colnames(products)=="category_id"] <- "product_category_id"
- colnames(products)[colnames(products)=="parent_id"] <- "parent_prod_id"
- ps_df = merge(sales_df, products,by.x = "product_id_ordered" ,by.y="product_id")
- #final merge customer data with product sales data
- finall_df = merge(customer_df, ps_df, by.x = "email", by.y = "customer_email")
- #Vyjebanie store_id.y, x lebo je to to iste ako store name
- #Store.id obsahuje chybajuce hodnoty pricom store_name nema prazdne hodnoty
- drops <- c("store_id.x", "store_id.y")
- finall_df = finall_df[ , !(names(finall_df) %in% drops)]
- head(finall_df)
- unique(product_type.x)
- unique(product_type.y)
- library(dplyr)
- finall_df %>% group_by(email) %>% summarise(Freq=n())
- zgrupovane = group_by(finall_df,email, order)$email
- status
- write.csv(finall_df, file = "/Users/macbook/Desktop/GymBeam/GymBeam.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement