Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #documents with fewer than 10 word tokens (after cleaning) are discarded
- dtm.lee = dtm[rowSums(as.matrix(dtm))>10,]
- # terms that occur below frequency (100) cut-offs
- dtm.lee = dtm.lee[,colSums(as.matrix(dtm))>100]
- # converts pre-processed document matrices to stm format
- corp.final = readCorpus(dtm.lee,type="slam")
- # removing words and renumbering word indices
- corp.prep = prepDocuments(corp.final$documents,corp.final$vocab,corp.final$meta)
- *********************************************************************
- ****** This is where I get "Detected missing terms, renumbering" ****
- ### stm ###
- tm_dtm = stm(corp.prep$documents, corp.prep$vocab, K=0, max.em.its = 500,data = corp.prep$meta, init.type = "Spectral", verbose = T,seed=100)
- theta = tm_dtm$theta
- rownames(theta) = rownames(dtm.lee) <<<- this is how I keep my original id
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement