Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # read documents
- FILEDIR <- (path)
- txts <- readtext(paste0(FILEDIR, "/", "*.txt"))
- my_corpus <- corpus(txts)
- #start processing
- typedPrefix <- my_corpus
- typedPrefix <- tokens(gsub("\s", "_", typedPrefix), "character", ngrams=1:3, conc="", remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE)
- dfm2 <- dfm(typedPrefix)
- tdm2 <- as.TermDocumentMatrix(t(dfm2), weighting=weightTf)
- as.matrix(tdm2)
- #write output file
- write.csv2(as.matrix(tdm2), file = "typedPrefix.csv")
Add Comment
Please, Sign In to add comment