Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(arules)
- ftrmedio <- merged.geotweetsmadcat4sq.dfs[,c(4,6)] %>% as.data.frame() #Se selecciona cat_bajo y user_id_str
- ftr.geotweetsmad4sq.medio <- as(split(ftrmedio[,"cat_medio"],ftrmedio[,"user_id_str"]),"transactions") # ftr: frequent travel route Se convierte en transaccion y se eliminan los items dulicados en la transaciones
- ## Warning in asMethod(object): removing duplicated items in transactions
- LIST(ftr.geotweetsmad4sq.medio) %>% head(n=4) #Listado de todas las transacciones sin tomar en cuenta la componente temporal.
- ## $`7007`
- ## [1] "Art Museum" "College Academic Building"
- ## [3] "Hotel" "Office"
- ## [5] "Park" "Plaza"
- ## [7] "Scenic Lookout"
- ##
- ## $`635803`
- ## [1] "Bank" "Bar" "Plaza"
- ##
- ## $`731833`
- ## [1] "Art Gallery" "Building" "Chinese Restaurant"
- ## [4] "Convention" "Coworking Space" "Farmers Market"
- ## [7] "Gym / Fitness Center" "Hotel" "Multiplex"
- ## [10] "Music Venue" "Park" "Restaurant"
- ## [13] "Spanish Restaurant" "Tapas Restaurant" "Tech Startup"
- ## [16] "Turkish Restaurant"
- ##
- ## $`732673`
- ## [1] "Airport" "Bank" "Hotel" "Restaurant"
- summary(ftr.geotweetsmad4sq.medio)
- ## transactions as itemMatrix in sparse format with
- ## 12382 rows (elements/itemsets/transactions) and
- ## 413 columns (items) and a density of 0.008856487
- ##
- ## most frequent items:
- ## Train Station Plaza Park Bank Hotel
- ## 3759 3746 2417 1846 1615
- ## (Other)
- ## 31907
- ##
- ## element (itemset/transaction) length distribution:
- ## sizes
- ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- ## 384 5369 2550 1432 821 530 368 239 136 118 102 78 60 44 25
- ## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
- ## 26 16 11 7 10 13 5 6 2 4 1 3 3 2 2
- ## 31 32 33 34 35 36 46 47 49 75
- ## 1 3 2 1 1 3 1 1 1 1
- ##
- ## Min. 1st Qu. Median Mean 3rd Qu. Max.
- ## 1.000 2.000 3.000 3.658 4.000 75.000
- ##
- ## includes extended item information - examples:
- ## labels
- ## 1 Accessories Store
- ## 2 Adult Education Center
- ## 3 Advertising Agency
- ##
- ## includes extended transaction information - examples:
- ## transactionID
- ## 1 7007
- ## 2 635803
- ## 3 731833
- reglas.ftr.mad4sq.medio <- apriori(ftr.geotweetsmad4sq.medio, parameter = list(supp=0.001, conf=0.60,target="rules"))
- ## Apriori
- ##
- ## Parameter specification:
- ## confidence minval smax arem aval originalSupport maxtime support minlen
- ## 0.6 0.1 1 none FALSE TRUE 5 0.001 1
- ## maxlen target ext
- ## 10 rules FALSE
- ##
- ## Algorithmic control:
- ## filter tree heap memopt load sort verbose
- ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
- ##
- ## Absolute minimum support count: 12
- ##
- ## set item appearances ...[0 item(s)] done [0.00s].
- ## set transactions ...[413 item(s), 12382 transaction(s)] done [0.02s].
- ## sorting and recoding items ... [218 item(s)] done [0.00s].
- ## creating transaction tree ... done [0.01s].
- ## checking subsets of size 1 2 3 4 5 6 done [0.02s].
- ## writing ... [2287 rule(s)] done [0.00s].
- ## creating S4 object ... done [0.01s].
- summary(reglas.ftr.mad4sq.medio)
- ## set of 2287 rules
- ##
- ## rule length distribution (lhs + rhs):sizes
- ## 2 3 4 5 6
- ## 1 514 1320 434 18
- ##
- ## Min. 1st Qu. Median Mean 3rd Qu. Max.
- ## 2.00 4.00 4.00 3.98 4.00 6.00
- ##
- ## summary of quality measures:
- ## support confidence lift
- ## Min. :0.001050 Min. :0.6000 Min. : 1.976
- ## 1st Qu.:0.001131 1st Qu.:0.6500 1st Qu.: 2.267
- ## Median :0.001292 Median :0.7143 Median : 2.610
- ## Mean :0.001515 Mean :0.7322 Mean : 3.237
- ## 3rd Qu.:0.001615 3rd Qu.:0.8000 3rd Qu.: 3.202
- ## Max. :0.010176 Max. :1.0000 Max. :16.416
- ##
- ## mining info:
- ## data ntransactions support confidence
- ## ftr.geotweetsmad4sq.medio 12382 0.001 0.6
- #Eliminación de reglas redundantes
- rules.sorted<- sort(reglas.ftr.mad4sq.medio,by="lift")
- subset.matrix <- is.subset(rules.sorted,rules.sorted)
- subset.matrix[lower.tri(subset.matrix, diag = T)]=NA
- redundant<- colSums(subset.matrix,na.rm=T)>=1
- rules.pruned.medio <- rules.sorted[!redundant]
- inspect(rules.pruned.medio)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement