Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(sparklyr)
- library(ggplot2)
- library(dplyr)
- sc <- spark_connect(master = "local")
- dir.create("source")
- reactiveCount <- stream_read_text(sc, "source/") %>%
- ft_tokenizer("line", "tokens") %>%
- ft_stop_words_remover("tokens", "words") %>%
- transmute(words = explode(words)) %>%
- filter(nchar(words) > 0) %>%
- group_by(words) %>%
- summarize(n = n()) %>%
- arrange(desc(n)) %>%
- filter(n > 100) %>%
- reactiveSpark()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement