Advertisement
Guest User

Untitled

a guest
Oct 26th, 2016
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 0.96 KB | None | 0 0
  1. # Create flags, percentage score, and bins for reviews$helpful
  2. reviews$helpful.nan = sapply(reviews$helpful,
  3.                              function(x) ifelse(x[2]==0, 1,
  4.                                                 ifelse(x[1]>x[2], 1, 0)))
  5. reviews$helpful.perc = sapply(reviews$helpful,
  6.                               function(x) ifelse(x[2]==0, NA,
  7.                                                  ifelse(x[1]>x[2], NA, x[1]/x[2])))
  8. reviews$helpful.bins = cut(reviews$helpful.perc,
  9.                            breaks = 3,
  10.                            include.lowest = T,
  11.                            labels = c("Lower", "Middle", "Upper"))
  12.  
  13. # Subset reviews for EDA, text mining, and modeling
  14. reviews.mod = reviews[reviews$helpful.nan==0, ]
  15.  
  16. #count number of words per review
  17. reviews.mod$numWords <- sapply(gregexpr("\\W+", reviews.mod$reviewText), length)
  18.  
  19. #correlation of Helpful percentage and depth of review
  20. cor(reviews.mod$helpful.perc, reviews.mod$numWords)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement