Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- text <- c('saying text is good',
- 'saying text once and saying text twice is better',
- 'saying text text text is best',
- 'saying text once is still ok',
- 'not saying it at all is bad',
- 'because text is a good thing',
- 'we all like text',
- 'even though sometimes it is missing')
- validationText <- c("This has different words in it.",
- "But I still want to count",
- "the occurence of text",
- "for example")
- TextCorpus <- Corpus(VectorSource(text))
- ValiTextCorpus <- Corpus(VectorSource(validationText))
- Control = list(stopwords=TRUE, removePunctuation=TRUE, removeNumbers=TRUE, MinDocFrequency=5)
- TextDTM = DocumentTermMatrix(TextCorpus, Control)
- ValiTextDTM = DocumentTermMatrix(ValiTextCorpus, Control)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement