Guest User

Untitled

a guest
Nov 25th, 2017
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.68 KB | None | 0 0
  1. pacman::p_load(quanteda, magrittr, skmeans)
  2.  
  3. setwd("/Users/James/Documents/University/rfiles")
  4. load("data_nc.R")
  5.  
  6. text <- as.character(dataraw_nc$speech)
  7. rm(dataraw_nc)
  8.  
  9. text <- gsub("’", "", text)
  10.  
  11. dtm <- dfm(text, tolower = TRUE, stem = TRUE, remove = c(stopwords("english"), "will", "hon") ,
  12. valuetype = "fixed", verbose = TRUE, remove_numbers = TRUE,
  13. remove_punct = TRUE, remove_separators = TRUE, remove_symbols = TRUE)
  14.  
  15. rm(text)
  16.  
  17. dtm <- dfm_trim(dtm, min_count = 6, verbose = TRUE) %>%
  18. dfm_weight(., type = "tfidf")
  19.  
  20. dtm <- dtm[rowSums(dtm) > 0, ]
  21.  
  22. clust_sk <- lapply(5:20, function(x) skmeans(dtm, x, method='pclust', control=list(verbose=TRUE)))
Add Comment
Please, Sign In to add comment