Advertisement
Guest User

Untitled

a guest
Apr 20th, 2018
182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.95 KB | None | 0 0
  1. import re
  2.  
  3. header = """
  4. @RELATION adtracking
  5.  
  6.  
  7. @ATTRIBUTE ip NUMERIC
  8. @ATTRIBUTE app NUMERIC
  9. @ATTRIBUTE device NUMERIC
  10. @ATTRIBUTE os NUMERIC
  11. @ATTRIBUTE channel NUMERIC
  12. @ATTRIBUTE click_time_y NUMERIC
  13. @ATTRIBUTE click_time_m NUMERIC
  14. @ATTRIBUTE click_time_d NUMERIC
  15. @ATTRIBUTE is_attributed {0, 1}
  16.  
  17.  
  18. @DATA
  19.  
  20. """
  21.  
  22.  
  23. def separated_date(lista):
  24. date = lista[:3]
  25. horario = lista[3:]
  26. date_string = ""
  27. for el in date:
  28. date_string += el + ","
  29.  
  30. horario_string = ""
  31. for el in horario:
  32. horario_string += el + ","
  33. return date_string + horario_string
  34.  
  35.  
  36.  
  37. def resultset_toString(lista):
  38. date = lista[:3]
  39. horario = lista[3:]
  40. date_string = ""
  41. for el in date:
  42. date_string += el + "-"
  43.  
  44. horario_string = ""
  45. for el in horario:
  46. horario_string += el + ":"
  47.  
  48. return """'""" + date_string[:-1] + " " + horario_string[:-1] + """'"""
  49.  
  50. def format_line(linez):
  51. one_date = False
  52. result = re.findall(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})', linez)
  53. if len(result) <= 0:
  54. return linez
  55. dates_formated = []
  56. for i in result:
  57. dates_formated.append(resultset_toString(i))
  58.  
  59. if(len(dates_formated) <= 1):
  60. one_date = True
  61. dates_decomposited = []
  62.  
  63. for i in result:
  64. dates_decomposited.append(separated_date(i))
  65.  
  66. for el in list(zip(dates_formated, dates_decomposited)):
  67. data_formatada = el[0][:-1]
  68. data_formatada = data_formatada[1:]
  69. if(one_date):
  70. linez = linez.replace(data_formatada, el[1])
  71. linez = linez.replace(",,", "")
  72. else:
  73. linez = linez.replace(data_formatada, el[1])
  74. return linez
  75.  
  76. fname = "train_sample.csv"
  77.  
  78. file_to_write = open("test1.csv", 'a')
  79. file_to_write.write(header)
  80. with open(fname) as file_to_read:
  81. for line in file_to_read:
  82. file_to_write.write(format_line(line))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement