Advertisement
Guest User

Untitled

a guest
Sep 4th, 2019
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.50 KB | None | 0 0
  1. library(dplyr)
  2. file <- read.csv('amazon_co-ecommerce_sample.csv',header = TRUE)
  3. file[file==""]<-NA
  4. count_na = data.frame(sapply(file, function(x) sum(is.na(x))))
  5. file = file[0:16]
  6. lapply(file,class)  
  7. new_file <- file %>%
  8.   mutate(product_name =as.character(product_name))%>%
  9.   mutate(price =as.numeric(str_remove(as.character(price),'£')))%>%
  10.   mutate(number_available_in_stock = as.numeric(gsub("([0-9]+).*$", "\\1", as.character(number_available_in_stock))))%>%
  11.   mutate(number_of_reviews = as.numeric(number_of_reviews))%>%
  12.   mutate(number_of_answered_questions = as.numeric(number_of_answered_questions))%>%
  13.   mutate(average_review_rating = as.numeric(gsub(" out of 5 stars", "", as.character(average_review_rating))))%>%
  14.   mutate(description = as.character(description))%>%
  15.   mutate(customers_who_bought_this_item_also_bought = as.character(customers_who_bought_this_item_also_bought))
  16. new_file$ID = seq.int(nrow(new_file))
  17.  
  18. ###split category column
  19. library(tidyr)
  20. a <-data.frame(str_split_fixed(new_file$amazon_category_and_sub_category, " > ", 5))
  21. a[a==""]<-NA
  22. colnames(a) <-c('sub_category1','sub_category2','sub_category3','sub_category4','sub_category5')
  23. a$ID= seq.int(nrow(a))
  24. new_file <- new_file %>%
  25.   left_join(a,by ='ID')
  26. ##### split customers_who_bought_this_item_also_bought column
  27. library(splitstackshape)
  28. b<- cSplit(new_file, "customers_who_bought_this_item_also_bought", " | ")
  29. library(dataQualityR)
  30. checkDataQuality(data= b, out.file.num= "dq_num.csv", out.file.cat= "dq_cat.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement