Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Create flags, percentage score, and bins for reviews$helpful
- reviews$helpful.nan = sapply(reviews$helpful,
- function(x) ifelse(x[2]==0, 1,
- ifelse(x[1]>x[2], 1, 0)))
- reviews$helpful.perc = sapply(reviews$helpful,
- function(x) ifelse(x[2]==0, NA,
- ifelse(x[1]>x[2], NA, x[1]/x[2])))
- reviews$helpful.bins = cut(reviews$helpful.perc,
- breaks = 3,
- include.lowest = T,
- labels = c("Lower", "Middle", "Upper"))
- # Subset reviews for EDA, text mining, and modeling
- reviews.mod = reviews[reviews$helpful.nan==0, ]
- #count number of words per review
- reviews.mod$numWords <- sapply(gregexpr("\\W+", reviews.mod$reviewText), length)
- #correlation of Helpful percentage and depth of review
- cor(reviews.mod$helpful.perc, reviews.mod$numWords)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement