Advertisement
Guest User

Untitled

a guest
Dec 5th, 2019
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.75 KB | None | 0 0
  1. ReadText <- function(book,accent="NON") {
  2. #book <- read_lines(file)
  3. #book <- book[!is.na(book)]
  4. book <- str_replace_all(book, "'", " ")
  5. book <- str_replace_all(book, "\n", " ")
  6. book <- str_replace_all(book, "_", " ")
  7. book <- str_replace_all(book, "-", " ")
  8. #book <- str_to_lower(book) ici ou dans itoken
  9. empty_lines <- grepl('^\\s*$', book)
  10. book <- str_to_lower(book)
  11. book <- book[!empty_lines]
  12. if(accent=="NON"){
  13. book <- sub("['`^~\"]", " ", book)
  14. book <- iconv(book, from = "UTF-8",to="ASCII//TRANSLIT//IGNORE")
  15. book <- gsub("['`^~\"]", "", book)}
  16. else book <- book
  17.  
  18. return(book)
  19. }
  20.  
  21. library(readtext)
  22. t4=readtext("liste_texte//texte_4.txt",encoding = "UTF-8")
  23. t4$text = ReadText(t4$text,"NON")
  24.  
  25. t4$text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement