Untitled

import numpy as np
import pandas as pd
import dill as pickle
from bandits.empirical import EmpiricalWorld
from bandits.armspec import ArmSpec
from bandits.bandit_estimators import LinearRegression
from sklearn.linear_model import Ridge
N = 590
p = 10
armspec = ArmSpec(3)
context_history = np.random.randn(N, p)
context_history = pd.DataFrame(context_history)
arm_history = armspec.sample(N)

all_rewards = pd.DataFrame({'0': context_history.iloc[:, 0],
                            '1': 1 - context_history.iloc[:, 1],
                            '2': context_history.iloc[:, 2]})
reward_history = all_rewards.lookup(arm_history.index, arm_history['arm'])

world = EmpiricalWorld(
    context_history=context_history,
    arm_history=arm_history,
    reward_history=reward_history,
    armspec=armspec,
    outcome_model_estimator=Ridge(alpha=1e-5)
)

FNAME = f'toronto_world_dummy_ridge_witheffect.pkl'
with open(FNAME, 'wb') as fl:
    pickle.dump(world, fl)