Advertisement
Guest User

Untitled

a guest
Feb 17th, 2020
1,219
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import train_test_split
  4.  
  5.  
  6. data = pd.read_csv('/datasets/energy_consumption.csv', index_col=[0], parse_dates=[0])
  7. data.sort_index(inplace=True)
  8. data = data.resample('1D').sum()
  9.  
  10. def make_features(data, max_lag, rolling_mean_size):
  11. data['year'] = data.index.year
  12. data['month'] = data.index.month
  13. data['day'] = data.index.day
  14. data['dayofweek'] = data.index.dayofweek
  15.  
  16. for lag in range(1, max_lag + 1):
  17. data['lag_{}'.format(lag)] = data['PJME_MW'].shift(lag)
  18.  
  19. data['rolling_mean'] = data['PJME_MW'].shift().rolling(rolling_mean_size).mean()
  20.  
  21. # мы выбрали произвольные значения аргументов
  22. make_features(data, 1, 1)
  23.  
  24. # < напишите код здесь >
  25. train, test = train_test_split(data, train_size=0.8)
  26. train = train.dropna()
  27.  
  28. print(train.shape)
  29. print(test.shape)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement