Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- pacman::p_load(quanteda, magrittr, skmeans)
- setwd("/Users/James/Documents/University/rfiles")
- load("data_nc.R")
- text <- as.character(dataraw_nc$speech)
- rm(dataraw_nc)
- text <- gsub("’", "", text)
- dtm <- dfm(text, tolower = TRUE, stem = TRUE, remove = c(stopwords("english"), "will", "hon") ,
- valuetype = "fixed", verbose = TRUE, remove_numbers = TRUE,
- remove_punct = TRUE, remove_separators = TRUE, remove_symbols = TRUE)
- rm(text)
- dtm <- dfm_trim(dtm, min_count = 6, verbose = TRUE) %>%
- dfm_weight(., type = "tfidf")
- dtm <- dtm[rowSums(dtm) > 0, ]
- clust_sk <- lapply(5:20, function(x) skmeans(dtm, x, method='pclust', control=list(verbose=TRUE)))
Add Comment
Please, Sign In to add comment