Untitled

#documents with fewer than 10 word tokens (after cleaning) are discarded
dtm.lee = dtm[rowSums(as.matrix(dtm))>10,]

# terms that occur below frequency (100) cut-offs
dtm.lee = dtm.lee[,colSums(as.matrix(dtm))>100]

# converts pre-processed document matrices to stm format
corp.final = readCorpus(dtm.lee,type="slam")

# removing words and renumbering word indices
corp.prep = prepDocuments(corp.final$documents,corp.final$vocab,corp.final$meta)


*********************************************************************
****** This is where I get "Detected missing terms, renumbering" ****

### stm  ###
tm_dtm = stm(corp.prep$documents, corp.prep$vocab, K=0, max.em.its = 500,data = corp.prep$meta, init.type = "Spectral", verbose = T,seed=100)

theta = tm_dtm$theta
rownames(theta) = rownames(dtm.lee)  <<<- this is how I keep my original id