Guest User

Untitled

a guest
Apr 26th, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.97 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import pandas as pd
  6. from sklearn import linear_model
  7. from sklearn.ensemble import GradientBoostingRegressor
  8.  
  9.  
  10. DATA_PATH = "./train.csv"
  11. WINDOW_SIZE = 28
  12. PREDICT_LENGTH = 33
  13. OUTPUT_FILE_PATH = "./submission.csv"
  14.  
  15.  
  16. def init(data):
  17. """
  18. DataFrame list init
  19. """
  20. dfs = []
  21. for idx in data['ATM_ID'].unique():
  22. df = data[data['ATM_ID'] == idx]
  23. df = df.reset_index(drop=True)
  24. dfs.append(df)
  25. return dfs
  26.  
  27.  
  28. def create_windows(df):
  29. """
  30. Training windows init
  31. """
  32. x = [
  33. df['CLIENT_OUT'].as_matrix()[i:i+WINDOW_SIZE]
  34. for i in range(0, df['CLIENT_OUT'].size - WINDOW_SIZE)
  35. ]
  36. y = df['CLIENT_OUT'].as_matrix()[WINDOW_SIZE:]
  37. return x, y
  38.  
  39.  
  40. def gradient_boosting_model(x, y):
  41. """
  42. Gradient Boost regression model fitting
  43. """
  44. model = GradientBoostingRegressor()
  45. model.fit(x, y)
  46. return model
  47.  
  48.  
  49. def predict_atms(df, model):
  50. """
  51. Future data prediction
  52. """
  53. prediction = df['CLIENT_OUT'].as_matrix().tolist()
  54. for i in range(PREDICT_LENGTH):
  55. next_value = int(model.predict([prediction[i:i+WINDOW_SIZE]])[0])
  56. prediction.append(next_value)
  57. result = pd.DataFrame({
  58. 'DATE': pd.date_range('2017-08-16', '2017-09-17'),
  59. 'ATM_ID': [df['ATM_ID'].values[0] for _ in range(PREDICT_LENGTH)],
  60. 'CLIENT_OUT': prediction[-PREDICT_LENGTH:]
  61. })
  62. return result
  63.  
  64.  
  65. def main():
  66. print("Started")
  67. data = pd.read_csv(DATA_PATH, parse_dates=True)
  68. dfs = init(data)
  69. prediction = pd.DataFrame(columns=['DATE', 'ATM_ID', 'CLIENT_OUT'])
  70. for df in dfs:
  71. x, y = create_windows(df)
  72. model = gradient_boosting_model(x, y)
  73. prediction = prediction.append(predict_atms(df, model))
  74. prediction = prediction.set_index("DATE")
  75. prediction.to_csv(OUTPUT_FILE_PATH)
  76. return 0
  77.  
  78.  
  79. if __name__ == "__main__":
  80. main()
Add Comment
Please, Sign In to add comment