Untitled

import seaborn as sns
import pandas as pd
import numpy as np


dom    = range(0,11)
H      = 8

## Noiseless paths
# Upward exponential --> 'Real' increase
f1  = lambda x : 10 + np.exp(0.33*x) / 3
y1  = [f1(x) for x in dom]

# Downward exponential --> 'Real' decrease
f2  = lambda x : 10 - np.exp(0.33*x) / 3
y2  = [f2(x) for x in dom]

# Upward exponential --> 'Fake' increase
f3  = lambda x : 10 + (-(x-5)**2 + 5**2) / 9
y3  = [f3(x) for x in dom]

# Downward exponential --> 'Fake' increase
f4  = lambda x : 10 - (-(x-5)**2 + 5**2) / 9
y4  = [f4(x) for x in dom]


## Noisy Paths
sigma = 0.75
y1_ = [f1(x) + np.random.randn()*sigma for x in dom]
y2_ = [f2(x) + np.random.randn()*sigma for x in dom]
y3_ = [f3(x) + np.random.randn()*sigma for x in dom]
y4_ = [f4(x) + np.random.randn()*sigma for x in dom]


# Plots
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(14,5))
colors = ['g', 'r', 'orange', 'lightgreen']

ax1.set_title("Noiseless")
ax1.axhline(10, color='k');
ax1.axvline( H, color='k',linestyle=':');
ax1.plot(dom,y1, marker='o', color=colors[0], label='Real Increase');
ax1.plot(dom,y3, marker='o', color=colors[2], label='Fake Increase');
ax1.plot(dom,y2, marker='o', color=colors[1], label='Real Decrease');
ax1.plot(dom,y4, marker='o', color=colors[3], label='Fake Decrease');
ax1.legend(frameon=True);

ax2.set_title("Noisy")
ax2.axhline(10, color='k');
ax2.axvline( 8, color='k',linestyle=':');
ax2.plot(dom,y1_, marker='o', color=colors[0], label='Real Increase');
ax2.plot(dom,y3_, marker='o', color=colors[2], label='Fake Increase');
ax2.plot(dom,y2_, marker='o', color=colors[1], label='Real Decrease');
ax2.plot(dom,y4_, marker='o', color=colors[3], label='Fake Decrease');
ax2.legend(frameon=True);

plt.tight_layout();
plt.savefig("noiseless_and_noisy.png");


# Simulated noisy data
N = 100
y = []
for i in range(N):
    y1  = [f1(x) + np.random.randn()*sigma for x in dom] + [100]; y.append(y1)
    y2  = [f2(x) + np.random.randn()*sigma for x in dom] + [200]; y.append(y2)
    y3  = [f3(x) + np.random.randn()*sigma for x in dom] + [300]; y.append(y3)
    y4  = [f4(x) + np.random.randn()*sigma for x in dom] + [400]; y.append(y4)

ds = pd.DataFrame(y) / 100
ds.rename({11:'c'}, axis=1, inplace=True)
ds['y'] = ds[10] - ds[7]


# Train, validation and test
N     = len(ds)
N1    = int(N * 0.6)
N2    = int(N * 0.8)

train = ds.iloc[:N1]
vali  = ds.iloc[N1:N2]
test  = ds.iloc[N2:]

lags  = np.arange(0,11)

# For Random Forest
train_X_RF = train[lags[:H]].values
train_y_RF = train['y'].values
vali_X_RF  = vali [lags[:H]].values
vali_y_RF  = vali ['y'].values
test_X_RF  = test [lags[:H]].values
test_y_RF  = test ['y'].values

# For LSTM
train_X_LS = train[lags[:H]].values.reshape(len(train), H, 1)
train_y_LS = train[['y']].values
vali_X_LS  = vali [lags[:H]].values.reshape(len(vali ), H, 1)
vali_y_LS  = vali [['y']].values
test_X_LS  = test [lags[:H]].values.reshape(len(test ), H, 1)
test_y_LS  = test [['y']].values

# Copy the test set (predictions will be added here)
te = ds.iloc[N2:].copy()


# Train the Random Forest Model
RF = RandomForestRegressor(random_state=0, n_estimators=20)
RF.fit(train_X_RF, train_y_RF);

# Out-of-sample Prediction (Random Forest)
te['RF'] = RF.predict(test_X_RF)

# Train the LSTM Model
model = Sequential()
model.add(LSTM((1), batch_input_shape=(None, H, 1), return_sequences=True))
model.add(LSTM((1), return_sequences=False))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
history = model.fit(train_X_LS, train_y_LS, epochs=100, validation_data=(vali_X_LS, vali_y_LS), verbose=0)

# Out-of-sample Prediction (LSTM)
te['LSTM'] = model.predict(test_X_LS)


# Plot Results
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(15,5), sharey=True)

ax1.set_title("Random Forest")
(te[te['c']==1]*100).plot.scatter('RF', 'y', ax=ax1, color=colors[0], s=50, alpha=0.75);
(te[te['c']==2]*100).plot.scatter('RF', 'y', ax=ax1, color=colors[1], s=50, alpha=0.75);
(te[te['c']==3]*100).plot.scatter('RF', 'y', ax=ax1, color=colors[2], s=50, alpha=0.75);
(te[te['c']==4]*100).plot.scatter('RF', 'y', ax=ax1, color=colors[3], s=50, alpha=0.75);
ax1.xaxis.set_label_text("Prediction");
ax1.yaxis.set_label_text("Target");

ax2.set_title("LSTM")
(te[te['c']==1]*100).plot.scatter('LSTM', 'y', ax=ax2, color=colors[0], s=50, alpha=0.75);
(te[te['c']==2]*100).plot.scatter('LSTM', 'y', ax=ax2, color=colors[1], s=50, alpha=0.75);
(te[te['c']==3]*100).plot.scatter('LSTM', 'y', ax=ax2, color=colors[2], s=50, alpha=0.75);
(te[te['c']==4]*100).plot.scatter('LSTM', 'y', ax=ax2, color=colors[3], s=50, alpha=0.75);
ax2.xaxis.set_label_text("Prediction");
ax2.yaxis.set_label_text("Target");

plt.tight_layout();

plt.savefig("experimental_results.png");