Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.81 KB | None | 0 0
  1. library(dplyr)
  2. library(tidyr)
  3. library(arules)
  4. library(arulesViz)
  5. library(grid)
  6. library(methods)
  7. library(ggplot2)
  8.  
  9. # where brand_google_tags follows the simple structure
  10. # campaign_id. | tag. |
  11. # could as easily be
  12. # basket_id | item |
  13.  
  14. google_baskets <- brand_google_tags %>%
  15. group_by(campaign_id) %>%
  16. summarise(basket = as.vector(list(tag)))
  17.  
  18.  
  19. transactions <- as(google_baskets$basket, "transactions")
  20.  
  21. #Analyzing the baskets
  22. item_frequencies <- itemFrequency(transactions, type="a")
  23. support <- 0.02
  24. freq_items <- sort(item_frequencies, decreasing = F)
  25. freq_items <- freq_items[freq_items>support*length(transactions)]
  26.  
  27.  
  28. # run the apriori algorithm
  29. support <- 0.02
  30. itemsets <- apriori(transactions, parameter=list(target= "frequent itemsets",minlen = 3, support=0.02, conf = 0.8))
  31. # sort and display frequent itemsets
  32. itemsets <- sort(itemsets, by="support")
  33. inspect(head(itemsets, n=10))
  34. length(itemsets)
  35.  
  36.  
  37. # get maximal itemsets
  38. is_max <- itemsets[is.maximal(itemsets)]
  39. inspect(head(sort(is_max, by="support")))
  40. length(is_max)
  41.  
  42. # generate some rules from the frequent itemsets
  43. rules <- apriori(transactions, parameter = list(minlen=3,supp=0.02, conf=0.8))
  44. inspect(head(rules, n=10))
  45. quality(head(rules))
  46.  
  47. rules <- sort(rules, by="lift")
  48. inspect(head(rules, n=10))
  49.  
  50. plot(rules)
  51. plot(rules, method="grouped matrix")
  52. inspect(rules)
  53. length(rules)
  54.  
  55. #write(rules, file='2019-07-19-rules.csv')
  56. rules_len4_df <- as(rules, "data.frame");
  57. write.csv(rules_len4_df, file="3itemset_2019-07-19-rules.csv")
  58.  
  59.  
  60. par(mar=c(5,18,2,2)+.1)
  61. sets_order_supp <- DATAFRAME(sort(itemsets, by="support", decreasing = F))
  62. barplot(sets_order_supp$support, names.arg=sets_order_supp$items, xlim=c(0,0.02), horiz = T, las = 2, cex.names = 0.2, main = "Frequent Itemsets")
  63. mtext(paste("support:",support), padj = .8)
  64.  
  65. plot(rules)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement