Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(dplyr)
- library(tidyr)
- library(arules)
- library(arulesViz)
- library(grid)
- library(methods)
- library(ggplot2)
- # where brand_google_tags follows the simple structure
- # campaign_id. | tag. |
- # could as easily be
- # basket_id | item |
- google_baskets <- brand_google_tags %>%
- group_by(campaign_id) %>%
- summarise(basket = as.vector(list(tag)))
- transactions <- as(google_baskets$basket, "transactions")
- #Analyzing the baskets
- item_frequencies <- itemFrequency(transactions, type="a")
- support <- 0.02
- freq_items <- sort(item_frequencies, decreasing = F)
- freq_items <- freq_items[freq_items>support*length(transactions)]
- # run the apriori algorithm
- support <- 0.02
- itemsets <- apriori(transactions, parameter=list(target= "frequent itemsets",minlen = 3, support=0.02, conf = 0.8))
- # sort and display frequent itemsets
- itemsets <- sort(itemsets, by="support")
- inspect(head(itemsets, n=10))
- length(itemsets)
- # get maximal itemsets
- is_max <- itemsets[is.maximal(itemsets)]
- inspect(head(sort(is_max, by="support")))
- length(is_max)
- # generate some rules from the frequent itemsets
- rules <- apriori(transactions, parameter = list(minlen=3,supp=0.02, conf=0.8))
- inspect(head(rules, n=10))
- quality(head(rules))
- rules <- sort(rules, by="lift")
- inspect(head(rules, n=10))
- plot(rules)
- plot(rules, method="grouped matrix")
- inspect(rules)
- length(rules)
- #write(rules, file='2019-07-19-rules.csv')
- rules_len4_df <- as(rules, "data.frame");
- write.csv(rules_len4_df, file="3itemset_2019-07-19-rules.csv")
- par(mar=c(5,18,2,2)+.1)
- sets_order_supp <- DATAFRAME(sort(itemsets, by="support", decreasing = F))
- barplot(sets_order_supp$support, names.arg=sets_order_supp$items, xlim=c(0,0.02), horiz = T, las = 2, cex.names = 0.2, main = "Frequent Itemsets")
- mtext(paste("support:",support), padj = .8)
- plot(rules)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement