Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ReadText <- function(book,accent="NON") {
- #book <- read_lines(file)
- #book <- book[!is.na(book)]
- book <- str_replace_all(book, "'", " ")
- book <- str_replace_all(book, "\n", " ")
- book <- str_replace_all(book, "_", " ")
- book <- str_replace_all(book, "-", " ")
- #book <- str_to_lower(book) ici ou dans itoken
- empty_lines <- grepl('^\\s*$', book)
- book <- str_to_lower(book)
- book <- book[!empty_lines]
- if(accent=="NON"){
- book <- sub("['`^~\"]", " ", book)
- book <- iconv(book, from = "UTF-8",to="ASCII//TRANSLIT//IGNORE")
- book <- gsub("['`^~\"]", "", book)}
- else book <- book
- return(book)
- }
- library(readtext)
- t4=readtext("liste_texte//texte_4.txt",encoding = "UTF-8")
- t4$text = ReadText(t4$text,"NON")
- t4$text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement