Untitled

library(dplyr)
library(tidyr)
library(arules)
library(arulesViz)
library(grid)
library(methods)
library(ggplot2)

# where brand_google_tags follows the simple structure
#  campaign_id. |   tag.  |
#  could as easily be
#   basket_id   |  item   |

google_baskets <- brand_google_tags %>%
                  group_by(campaign_id) %>%
                  summarise(basket = as.vector(list(tag)))


transactions <- as(google_baskets$basket, "transactions")

#Analyzing the baskets
item_frequencies <- itemFrequency(transactions, type="a")
support <- 0.02
freq_items <- sort(item_frequencies, decreasing = F)
freq_items <- freq_items[freq_items>support*length(transactions)]


# run the apriori algorithm
support <- 0.02
itemsets <- apriori(transactions, parameter=list(target= "frequent itemsets",minlen = 3, support=0.02, conf = 0.8))
# sort and display frequent itemsets
itemsets <- sort(itemsets, by="support")
inspect(head(itemsets, n=10))
length(itemsets)


# get maximal itemsets
is_max <- itemsets[is.maximal(itemsets)]
inspect(head(sort(is_max, by="support")))
length(is_max)

# generate some rules from the frequent itemsets
rules <- apriori(transactions, parameter = list(minlen=3,supp=0.02, conf=0.8))
inspect(head(rules, n=10))
quality(head(rules))

rules <- sort(rules, by="lift")
inspect(head(rules, n=10))

plot(rules)
plot(rules, method="grouped matrix")
inspect(rules)
length(rules)

#write(rules, file='2019-07-19-rules.csv')
rules_len4_df <- as(rules, "data.frame");
write.csv(rules_len4_df, file="3itemset_2019-07-19-rules.csv")


par(mar=c(5,18,2,2)+.1)
sets_order_supp <- DATAFRAME(sort(itemsets, by="support", decreasing = F))
barplot(sets_order_supp$support, names.arg=sets_order_supp$items, xlim=c(0,0.02), horiz = T, las = 2, cex.names = 0.2, main = "Frequent Itemsets")
mtext(paste("support:",support), padj = .8)

plot(rules)