Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split
- data = pd.read_csv('/datasets/energy_consumption.csv', index_col=[0], parse_dates=[0])
- data.sort_index(inplace=True)
- data = data.resample('1D').sum()
- def make_features(data, max_lag, rolling_mean_size):
- data['year'] = data.index.year
- data['month'] = data.index.month
- data['day'] = data.index.day
- data['dayofweek'] = data.index.dayofweek
- for lag in range(1, max_lag + 1):
- data['lag_{}'.format(lag)] = data['PJME_MW'].shift(lag)
- data['rolling_mean'] = data['PJME_MW'].shift().rolling(rolling_mean_size).mean()
- # мы выбрали произвольные значения аргументов
- make_features(data, 1, 1)
- # < напишите код здесь >
- train, test = train_test_split(data, train_size=0.8)
- train = train.dropna()
- print(train.shape)
- print(test.shape)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement