Untitled

#Getting path for all files within a particular directory.
files = list.files(path = dirs[j], full.names = T)

#The files are in csv format and in the ideal case have exactly 5 columns. However, the 5th column can contain an arbitary number of commas. If I try to fread with sep = ",", certain rows can be of arbitarily high length. If I use select = 1:5 to subset each row, I lose data.

#My solution was to read each line into a single column and then seperate into columns within the script based on the location of the first 4 commas.
data <- rbindlist(lapply(files,fread,sep = "n",fill = T,header = F))

#Removing empty rows.
retain <- unlist(lapply(data, function(x) {
       str_detect(x,".")
   }))
data[retain,] -> data

#Removing rows where there is no data in the 5th column.
retain <- unlist(lapply(data, function(x) {
       str_detect(trimws(x,which ='both') ,".+,.+,.*,.*,.+")
   }))
data[retain,] -> data

#This replaces the first 4 commas with a tab-delimiter.
for(i in 1:4){
data <- data.frame(lapply(data, function(x) {
                    str_replace(x,",","t")
              }),stringsAsFactors = F)
}

#This splits the row into 5 seperate columns, always.
data <- unlist(lapply(data, function(x) {
  unlist(strsplit(x,"t",fixed = T))
}))

#Changes the format from a character vector to a data table.
data = data.frame(matrix(data,ncol=5,byrow = T),stringsAsFactors = F)