daily pastebin goal
15%
SHARE
TWEET

Untitled

a guest Apr 20th, 2018 161 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import re
  2.  
  3. header = """
  4. @RELATION adtracking
  5.  
  6.  
  7. @ATTRIBUTE ip NUMERIC
  8. @ATTRIBUTE app NUMERIC
  9. @ATTRIBUTE device NUMERIC
  10. @ATTRIBUTE os NUMERIC
  11. @ATTRIBUTE channel NUMERIC
  12. @ATTRIBUTE click_time_y NUMERIC
  13. @ATTRIBUTE click_time_m NUMERIC
  14. @ATTRIBUTE click_time_d NUMERIC
  15. @ATTRIBUTE is_attributed {0, 1}
  16.  
  17.  
  18. @DATA
  19.  
  20. """
  21.  
  22.  
  23. def separated_date(lista):
  24.     date = lista[:3]
  25.     horario = lista[3:]
  26.     date_string = ""
  27.     for el in date:
  28.         date_string += el + ","
  29.    
  30.     horario_string = ""
  31.     for el in horario:
  32.         horario_string += el + ","  
  33.     return date_string + horario_string
  34.    
  35.  
  36.  
  37. def resultset_toString(lista):
  38.     date = lista[:3]
  39.     horario = lista[3:]
  40.     date_string = ""
  41.     for el in date:
  42.         date_string += el + "-"
  43.    
  44.     horario_string = ""
  45.     for el in horario:
  46.         horario_string += el + ":"
  47.  
  48.     return """'""" + date_string[:-1] + " " + horario_string[:-1] + """'"""
  49.  
  50. def format_line(linez):
  51.     one_date = False
  52.     result = re.findall(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})', linez)
  53.     if len(result) <= 0:
  54.         return linez
  55.     dates_formated = []
  56.     for i in result:
  57.         dates_formated.append(resultset_toString(i))
  58.  
  59.     if(len(dates_formated) <= 1):
  60.         one_date = True
  61.     dates_decomposited = []
  62.    
  63.     for i in result:
  64.         dates_decomposited.append(separated_date(i))
  65.    
  66.     for el in list(zip(dates_formated, dates_decomposited)):
  67.         data_formatada = el[0][:-1]
  68.         data_formatada = data_formatada[1:]
  69.         if(one_date):
  70.             linez = linez.replace(data_formatada, el[1])
  71.             linez = linez.replace(",,", "")
  72.         else:
  73.             linez = linez.replace(data_formatada, el[1])
  74.     return linez
  75.  
  76. fname = "train_sample.csv"
  77.  
  78. file_to_write = open("test1.csv", 'a')
  79. file_to_write.write(header)
  80. with open(fname) as file_to_read:
  81.     for line in file_to_read:
  82.         file_to_write.write(format_line(line))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top