Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- corpus <- Corpus(VectorSource(data$text[1:3030]))
- corpus <- tm_map(corpus, content_transformer(removePunctuation)) #quitar puntuacion
- corpus <- tm_map(corpus, content_transformer(removeWords), stopwords("english")) #quitar stop words
- corpus <- tm_map(corpus, removeWords, c("NUMBER","CITATION","FORMULA")) #quitar estas palabras en especifico
- corpus <- tm_map(corpus, content_transformer(tolower)) #aplicamos minusculas
- corpus <- tm_map(corpus, stemDocument) #dejamos la raiz de las palabras
- corpus <- tm_map(corpus, stripWhitespace) #quitamos los espacios
- corpus <- tm_map(corpus, content_transformer(removeNumbers)) #quitamos numeros
- # corpus <- tm_map(corpus, PlainTextDocument)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement