Advertisement
Guest User

Untitled

a guest
Jun 25th, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.01 KB | None | 0 0
  1. library(tidyverse)
  2. library(tidytext)
  3. library(tm)
  4. library(topicmodels)
  5.  
  6. glimpse(datsub)
  7. Observations: 14,108
  8. Variables: 6
  9. $ Product.Name <chr> "iphone 4s", "iphone 4s", "iphone 4s", "iphone 4s", "iphone 4s", "iphone 4...
  10. $ Brand.Name <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""...
  11. $ Price <dbl> 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,...
  12. $ Rating <int> 5, 1, 4, 5, 5, 3, 5, 5, 5, 1, 5, 5, 1, 5, 2, 5, 5, 4, 5, 1, 4, 1, 1, 1, 4,...
  13. $ Reviews <chr> "new great price phone really quick great seller", "star product false adv...
  14. $ Review.Votes <int> 2, 1, 0, 1, 2, 2, 2, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,...
  15.  
  16. corpus = Corpus(VectorSource(datsub$Reviews))
  17.  
  18. dtm = DocumentTermMatrix(corpus)
  19.  
  20.  
  21. ap_lda = LDA(dtm,
  22. method = "Gibbs",
  23. k = 4,
  24. control = list(seed = 1))
  25.  
  26. ap_topics = tidy(ap_lda, matrix = "beta")
  27.  
  28. ap_top_terms = ap_topics %>%
  29. group_by(topic) %>%
  30. top_n(5, beta) %>%
  31. arrange(topic, -beta)
  32.  
  33. ap_top_terms %>%
  34. ggplot(aes(reorder(term, beta), beta, fill = factor(topic))) +
  35. geom_col(show.legend = FALSE) +
  36. facet_wrap(~ topic, scales = "free") +
  37. labs(x = "terms") +
  38. coord_flip()
  39.  
  40. cleaned_reviews = datsub %>%
  41. unnest_tokens(word, Reviews) %>%
  42. rename(term = word)
  43.  
  44. top_terms1 = ap_topics %>%
  45. filter(topic == 1) %>%
  46. arrange(-beta) %>%
  47. top_n(10, beta)
  48.  
  49. merged_dat1 =left_join(top_terms1, cleaned_reviews, by = "term") %>%
  50. group_by(Product.Name, term, beta) %>%
  51. summarise(n = n()) %>%
  52. group_by(Product.Name) %>%
  53. mutate(N = sum(n)) %>%
  54. group_by(Product.Name, term) %>%
  55. mutate(freq = n/N) %>%
  56. mutate(weight = freq * beta) %>%
  57. select(Product.Name, term, weight) %>%
  58. spread(term, weight)
  59.  
  60. glimpse(merged_dat1)
  61. merged_dat1
  62. # A tibble: 4 x 11
  63. # Groups: Product.Name [4]
  64. Product.Name camera fast feature good love low nice picture price quality
  65. <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
  66. 1 BLU Studio 5.0 0.00990 0.000747 0.000970 0.208 0.000201 0.000686 0.00428 0.00205 0.0115 0.00632
  67. 2 iphone 4s 0.00121 0.000626 0.000247 0.327 0.000204 0.000554 0.00625 0.000549 0.00588 0.00303
  68. 3 Motorola Moto E 0.0129 0.000573 0.00128 0.227 0.0000610 0.000981 0.00376 0.00162 0.0139 0.00451
  69. 4 Samsung Galaxy II 0.00520 0.000432 0.00150 0.277 0.000182 0.000541 0.00651 0.00117 0.00881 0.00305
  70.  
  71. dist1 = dist(merged_dat1, method = "euclidean", upper = TRUE, diag = TRUE)
  72. dist1 = as.matrix(dist1)
  73.  
  74. dist2 = dist(merged_dat2, method = "euclidean", upper = TRUE, diag = TRUE)
  75. dist2 = as.matrix(dist2)
  76.  
  77. mds1 = cmdscale(dist1, k= 1, eig = TRUE, x.ret = TRUE)
  78. mds2 = cmdscale(dist2, k= 1, eig = TRUE, x.ret = TRUE)
  79.  
  80. dim1 = mds1$points
  81. dim2 = mds2$points
  82.  
  83.  
  84. data.frame(dim1, dim2) %>%
  85. ggplot() +
  86. geom_point(aes(x = dim1, y = dim2, color = merged_dat1$Product.Name)) +
  87. labs(x = "dim 1", y = "dim 1", color = "Brand") +
  88. theme_minimal()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement