Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import deque
- import math
- import numpy as np
- import pandas as pd
- import tensorflow as tf
- from sklearn.ensemble import ExtraTreesRegressor
- from sklearn.metrics import mean_squared_error
- from sklearn.model_selection import train_test_split
- from tensorflow import keras
- from tensorflow.keras import layers
- from automl.ml_pipeline.feature_engineering.feature_engineering_pipeline import FeatureEngineeringPipeline
- from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
- from sklearn.model_selection import GridSearchCV
- class DeepQAgentFullPipelineRegression:
- def __init__(self, dataset, X, y):
- self.model = self.create_model()
- self.dataset = dataset
- self.X = X
- self.y = y
- self.replay_memory = deque()
- self.best_mse_score = 0
- self.best_model = None
- self.best_history = None
- self.read_replay_memory()
- @staticmethod
- def create_model():
- model = keras.Sequential(
- [
- layers.Dense(
- 35,
- activation="relu",
- input_dim=33,
- kernel_initializer="lecun_uniform",
- ),
- layers.Dense(
- 200, activation="relu", kernel_initializer="lecun_uniform"
- ),
- layers.Dense(
- 200, activation="relu", kernel_initializer="lecun_uniform"
- ),
- layers.Dense(
- 1, activation="linear", kernel_initializer="lecun_uniform"
- ),
- ]
- )
- model.compile(loss="mse", optimizer="Adadelta", metrics=["mae", "mse"])
- return model
- @staticmethod
- def create_model_grid_search(activation="relu",
- dropout_rate=0.2,
- weight_constraint=0,
- hidden_neurons=30,
- optimizer="Adam",
- kernel_init="uniform", ):
- model = keras.Sequential(
- [
- layers.Dense(
- 35,
- activation=activation,
- input_dim=33,
- kernel_initializer=kernel_init,
- ),
- layers.Dense(
- hidden_neurons, activation=activation, kernel_initializer=kernel_init
- ),
- layers.Dense(
- hidden_neurons, activation=activation, kernel_initializer=kernel_init
- ),
- layers.Dense(
- 1, activation=activation, kernel_initializer=kernel_init
- ),
- ]
- )
- model.compile(loss="mse", optimizer=optimizer, metrics=["mae", "mse"])
- return model
- def run_grid_search(self):
- param_grid = {
- 'epochs': [100, 500, 1000],
- "batch_size": [20, 50, 85],
- #"optimizer": ["SGD", "RMSprop", "Adadelta", "Adam"],
- "kernel_init": ["uniform", "normal", "zero"],
- "activation": ["relu", "linear", "softmax"],
- #"weight_constraint": [1, 2, 3, 4, 5],
- #"dropout_rate": [0.1, 0.3, 0.5, 0.7, 0.9],
- "hidden_neurons": [50, 75, 150],
- }
- model = KerasRegressor(
- build_fn=self.create_model_grid_search, verbose=0, epochs=500,
- )
- grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=5, verbose=2)
- X, y = self.get_training_data()
- print(len(X))
- print(type(X))
- grid_result = grid.fit(X, np.array(y))
- print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
- means = grid_result.cv_results_["mean_test_score"]
- stds = grid_result.cv_results_["std_test_score"]
- params = grid_result.cv_results_["params"]
- for mean, stdev, param in zip(means, stds, params):
- print("%f (%f) with: %r" % (mean, stdev, param))
- print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
- @staticmethod
- def get_score_regression(X, y):
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
- model = ExtraTreesRegressor(n_estimators=100, random_state=42)
- model.fit(X_train, y_train)
- predictions = model.predict(X_test)
- mse_score = mean_squared_error(y_test, predictions)
- return mse_score
- def read_replay_memory(self):
- df = pd.read_csv(
- "automl/ml_pipeline/feature_engineering/agents/regression/replay_memory_data/replay_memory_regression_full.csv"
- )
- self.replay_memory = deque()
- print(df.shape)
- for index, row in df.iterrows():
- row['0'] = row['0'].strip("[").strip('\n').strip(']').replace('\n', '')
- row['1'] = row['1'].strip("[").strip('\n').strip(']').replace('\n', '')
- current_state = [float(state) for state in row['0'].split(' ')]
- new_state = [float(state) for state in row['0'].split(' ')]
- reward = row["2"]
- if math.inf in current_state or math.inf in new_state:
- continue
- self.replay_memory.append((current_state, new_state, reward))
- print(self.replay_memory)
- print(len(self.replay_memory))
- def get_state(self, X, y):
- meta_features = self.dataset.get_meta_features(X, y)
- dataset_score = self.get_score_regression(X, y)
- return meta_features, dataset_score
- def update_replay_memory(self, transition):
- self.replay_memory.append(transition)
- def get_training_data(self):
- X = []
- y = []
- for index, (current_state, new_state, reward) in enumerate(self.replay_memory):
- X.append(tf.convert_to_tensor(current_state, dtype=tf.float32))
- y.append([reward])
- return X, y
- def train(self):
- if len(self.replay_memory) <= 2:
- return
- X, y = self.get_training_data()
- X_train, X_test, y_train, y_test = train_test_split(
- np.array(X), np.array(y), test_size=0.33)
- history = self.model.fit(X_train, y_train, epochs=10, validation_split=0.2)
- loss, mae, mse = self.model.evaluate(X_test, y_test)
- if self.best_mse_score > mse:
- self.best_mse_score = mse
- self.best_model = self.model
- hist = pd.DataFrame(history.history)
- hist["epoch"] = history.epoch
- self.best_history = hist
- def run_episodes(self, n_episodes):
- for i in range(n_episodes):
- print('Episode', i)
- working_X = self.X.copy()
- working_y = self.y.copy()
- current_meta_features, current_dataset_score = self.get_state(working_X, working_y)
- current_state = np.append(current_meta_features.values, current_dataset_score)
- feature_eng = FeatureEngineeringPipeline(working_y, problem_type=self.dataset.problem_type)
- working_X, working_y, pipeline = feature_eng.execute_random_pipeline(working_X, working_y)
- if pipeline is None:
- print('Upsss, no pipeline')
- continue
- print("JUST GOT PIPELINE: ", pipeline)
- current_state = np.append(current_state, np.array(list(pipeline.values())))
- transformed_dataset_meta_features, transformed_dataset_score = self.get_state(working_X, working_y)
- transformed_state = np.append(transformed_dataset_meta_features.values, transformed_dataset_score)
- reward = current_dataset_score-transformed_dataset_score
- self.update_replay_memory((current_state, transformed_state, reward))
- #self.train()
- df = pd.DataFrame(self.replay_memory)
- df.to_csv(
- "automl/ml_pipeline/feature_engineering/agents/regression/replay_memory_data/replay_memory_regression_full.csv"
- ,index=False,
- mode="a",
- header=False
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement