Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- header = """
- @RELATION adtracking
- @ATTRIBUTE ip NUMERIC
- @ATTRIBUTE app NUMERIC
- @ATTRIBUTE device NUMERIC
- @ATTRIBUTE os NUMERIC
- @ATTRIBUTE channel NUMERIC
- @ATTRIBUTE click_time_y NUMERIC
- @ATTRIBUTE click_time_m NUMERIC
- @ATTRIBUTE click_time_d NUMERIC
- @ATTRIBUTE is_attributed {0, 1}
- @DATA
- """
- def separated_date(lista):
- date = lista[:3]
- horario = lista[3:]
- date_string = ""
- for el in date:
- date_string += el + ","
- horario_string = ""
- for el in horario:
- horario_string += el + ","
- return date_string + horario_string
- def resultset_toString(lista):
- date = lista[:3]
- horario = lista[3:]
- date_string = ""
- for el in date:
- date_string += el + "-"
- horario_string = ""
- for el in horario:
- horario_string += el + ":"
- return """'""" + date_string[:-1] + " " + horario_string[:-1] + """'"""
- def format_line(linez):
- one_date = False
- result = re.findall(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})', linez)
- if len(result) <= 0:
- return linez
- dates_formated = []
- for i in result:
- dates_formated.append(resultset_toString(i))
- if(len(dates_formated) <= 1):
- one_date = True
- dates_decomposited = []
- for i in result:
- dates_decomposited.append(separated_date(i))
- for el in list(zip(dates_formated, dates_decomposited)):
- data_formatada = el[0][:-1]
- data_formatada = data_formatada[1:]
- if(one_date):
- linez = linez.replace(data_formatada, el[1])
- linez = linez.replace(",,", "")
- else:
- linez = linez.replace(data_formatada, el[1])
- return linez
- fname = "train_sample.csv"
- file_to_write = open("test1.csv", 'a')
- file_to_write.write(header)
- with open(fname) as file_to_read:
- for line in file_to_read:
- file_to_write.write(format_line(line))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement