Advertisement
Guest User

Untitled

a guest
Jul 25th, 2017
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.60 KB | None | 0 0
  1. library(arules)
  2. ftrmedio <- merged.geotweetsmadcat4sq.dfs[,c(4,6)] %>% as.data.frame() #Se selecciona cat_bajo y user_id_str
  3. ftr.geotweetsmad4sq.medio <- as(split(ftrmedio[,"cat_medio"],ftrmedio[,"user_id_str"]),"transactions") # ftr: frequent travel route Se convierte en transaccion y se eliminan los items dulicados en la transaciones
  4. ## Warning in asMethod(object): removing duplicated items in transactions
  5. LIST(ftr.geotweetsmad4sq.medio) %>% head(n=4) #Listado de todas las transacciones sin tomar en cuenta la componente temporal.
  6. ## $`7007`
  7. ## [1] "Art Museum" "College Academic Building"
  8. ## [3] "Hotel" "Office"
  9. ## [5] "Park" "Plaza"
  10. ## [7] "Scenic Lookout"
  11. ##
  12. ## $`635803`
  13. ## [1] "Bank" "Bar" "Plaza"
  14. ##
  15. ## $`731833`
  16. ## [1] "Art Gallery" "Building" "Chinese Restaurant"
  17. ## [4] "Convention" "Coworking Space" "Farmers Market"
  18. ## [7] "Gym / Fitness Center" "Hotel" "Multiplex"
  19. ## [10] "Music Venue" "Park" "Restaurant"
  20. ## [13] "Spanish Restaurant" "Tapas Restaurant" "Tech Startup"
  21. ## [16] "Turkish Restaurant"
  22. ##
  23. ## $`732673`
  24. ## [1] "Airport" "Bank" "Hotel" "Restaurant"
  25. summary(ftr.geotweetsmad4sq.medio)
  26. ## transactions as itemMatrix in sparse format with
  27. ## 12382 rows (elements/itemsets/transactions) and
  28. ## 413 columns (items) and a density of 0.008856487
  29. ##
  30. ## most frequent items:
  31. ## Train Station Plaza Park Bank Hotel
  32. ## 3759 3746 2417 1846 1615
  33. ## (Other)
  34. ## 31907
  35. ##
  36. ## element (itemset/transaction) length distribution:
  37. ## sizes
  38. ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  39. ## 384 5369 2550 1432 821 530 368 239 136 118 102 78 60 44 25
  40. ## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
  41. ## 26 16 11 7 10 13 5 6 2 4 1 3 3 2 2
  42. ## 31 32 33 34 35 36 46 47 49 75
  43. ## 1 3 2 1 1 3 1 1 1 1
  44. ##
  45. ## Min. 1st Qu. Median Mean 3rd Qu. Max.
  46. ## 1.000 2.000 3.000 3.658 4.000 75.000
  47. ##
  48. ## includes extended item information - examples:
  49. ## labels
  50. ## 1 Accessories Store
  51. ## 2 Adult Education Center
  52. ## 3 Advertising Agency
  53. ##
  54. ## includes extended transaction information - examples:
  55. ## transactionID
  56. ## 1 7007
  57. ## 2 635803
  58. ## 3 731833
  59. reglas.ftr.mad4sq.medio <- apriori(ftr.geotweetsmad4sq.medio, parameter = list(supp=0.001, conf=0.60,target="rules"))
  60. ## Apriori
  61. ##
  62. ## Parameter specification:
  63. ## confidence minval smax arem aval originalSupport maxtime support minlen
  64. ## 0.6 0.1 1 none FALSE TRUE 5 0.001 1
  65. ## maxlen target ext
  66. ## 10 rules FALSE
  67. ##
  68. ## Algorithmic control:
  69. ## filter tree heap memopt load sort verbose
  70. ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
  71. ##
  72. ## Absolute minimum support count: 12
  73. ##
  74. ## set item appearances ...[0 item(s)] done [0.00s].
  75. ## set transactions ...[413 item(s), 12382 transaction(s)] done [0.02s].
  76. ## sorting and recoding items ... [218 item(s)] done [0.00s].
  77. ## creating transaction tree ... done [0.01s].
  78. ## checking subsets of size 1 2 3 4 5 6 done [0.02s].
  79. ## writing ... [2287 rule(s)] done [0.00s].
  80. ## creating S4 object ... done [0.01s].
  81. summary(reglas.ftr.mad4sq.medio)
  82. ## set of 2287 rules
  83. ##
  84. ## rule length distribution (lhs + rhs):sizes
  85. ## 2 3 4 5 6
  86. ## 1 514 1320 434 18
  87. ##
  88. ## Min. 1st Qu. Median Mean 3rd Qu. Max.
  89. ## 2.00 4.00 4.00 3.98 4.00 6.00
  90. ##
  91. ## summary of quality measures:
  92. ## support confidence lift
  93. ## Min. :0.001050 Min. :0.6000 Min. : 1.976
  94. ## 1st Qu.:0.001131 1st Qu.:0.6500 1st Qu.: 2.267
  95. ## Median :0.001292 Median :0.7143 Median : 2.610
  96. ## Mean :0.001515 Mean :0.7322 Mean : 3.237
  97. ## 3rd Qu.:0.001615 3rd Qu.:0.8000 3rd Qu.: 3.202
  98. ## Max. :0.010176 Max. :1.0000 Max. :16.416
  99. ##
  100. ## mining info:
  101. ## data ntransactions support confidence
  102. ## ftr.geotweetsmad4sq.medio 12382 0.001 0.6
  103. #Eliminación de reglas redundantes
  104. rules.sorted<- sort(reglas.ftr.mad4sq.medio,by="lift")
  105. subset.matrix <- is.subset(rules.sorted,rules.sorted)
  106. subset.matrix[lower.tri(subset.matrix, diag = T)]=NA
  107. redundant<- colSums(subset.matrix,na.rm=T)>=1
  108.  
  109. rules.pruned.medio <- rules.sorted[!redundant]
  110. inspect(rules.pruned.medio)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement