Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- from sklearn import linear_model
- from sklearn.ensemble import GradientBoostingRegressor
- DATA_PATH = "./train.csv"
- WINDOW_SIZE = 28
- PREDICT_LENGTH = 33
- OUTPUT_FILE_PATH = "./submission.csv"
- def init(data):
- """
- DataFrame list init
- """
- dfs = []
- for idx in data['ATM_ID'].unique():
- df = data[data['ATM_ID'] == idx]
- df = df.reset_index(drop=True)
- dfs.append(df)
- return dfs
- def create_windows(df):
- """
- Training windows init
- """
- x = [
- df['CLIENT_OUT'].as_matrix()[i:i+WINDOW_SIZE]
- for i in range(0, df['CLIENT_OUT'].size - WINDOW_SIZE)
- ]
- y = df['CLIENT_OUT'].as_matrix()[WINDOW_SIZE:]
- return x, y
- def gradient_boosting_model(x, y):
- """
- Gradient Boost regression model fitting
- """
- model = GradientBoostingRegressor()
- model.fit(x, y)
- return model
- def predict_atms(df, model):
- """
- Future data prediction
- """
- prediction = df['CLIENT_OUT'].as_matrix().tolist()
- for i in range(PREDICT_LENGTH):
- next_value = int(model.predict([prediction[i:i+WINDOW_SIZE]])[0])
- prediction.append(next_value)
- result = pd.DataFrame({
- 'DATE': pd.date_range('2017-08-16', '2017-09-17'),
- 'ATM_ID': [df['ATM_ID'].values[0] for _ in range(PREDICT_LENGTH)],
- 'CLIENT_OUT': prediction[-PREDICT_LENGTH:]
- })
- return result
- def main():
- print("Started")
- data = pd.read_csv(DATA_PATH, parse_dates=True)
- dfs = init(data)
- prediction = pd.DataFrame(columns=['DATE', 'ATM_ID', 'CLIENT_OUT'])
- for df in dfs:
- x, y = create_windows(df)
- model = gradient_boosting_model(x, y)
- prediction = prediction.append(predict_atms(df, model))
- prediction = prediction.set_index("DATE")
- prediction.to_csv(OUTPUT_FILE_PATH)
- return 0
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment