Advertisement
Guest User

uN

a guest
Apr 6th, 2020
375
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.12 KB | None | 0 0
  1. from collections import deque
  2. import math
  3. import numpy as np
  4. import pandas as pd
  5. import tensorflow as tf
  6. from sklearn.ensemble import ExtraTreesRegressor
  7. from sklearn.metrics import mean_squared_error
  8. from sklearn.model_selection import train_test_split
  9. from tensorflow import keras
  10. from tensorflow.keras import layers
  11.  
  12. from automl.ml_pipeline.feature_engineering.feature_engineering_pipeline import FeatureEngineeringPipeline
  13.  
  14.  
  15. from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
  16. from sklearn.model_selection import GridSearchCV
  17. class DeepQAgentFullPipelineRegression:
  18.     def __init__(self, dataset, X, y):
  19.         self.model = self.create_model()
  20.         self.dataset = dataset
  21.         self.X = X
  22.         self.y = y
  23.         self.replay_memory = deque()
  24.  
  25.         self.best_mse_score = 0
  26.         self.best_model = None
  27.         self.best_history = None
  28.         self.read_replay_memory()
  29.  
  30.     @staticmethod
  31.     def create_model():
  32.         model = keras.Sequential(
  33.             [
  34.                 layers.Dense(
  35.                     35,
  36.                     activation="relu",
  37.                     input_dim=33,
  38.                     kernel_initializer="lecun_uniform",
  39.                 ),
  40.                 layers.Dense(
  41.                     200, activation="relu", kernel_initializer="lecun_uniform"
  42.                 ),
  43.                 layers.Dense(
  44.                     200, activation="relu", kernel_initializer="lecun_uniform"
  45.                 ),
  46.                 layers.Dense(
  47.                     1, activation="linear", kernel_initializer="lecun_uniform"
  48.                 ),
  49.             ]
  50.         )
  51.  
  52.         model.compile(loss="mse", optimizer="Adadelta", metrics=["mae", "mse"])
  53.         return model
  54.  
  55.     @staticmethod
  56.     def create_model_grid_search(activation="relu",
  57.                                  dropout_rate=0.2,
  58.                                  weight_constraint=0,
  59.                                  hidden_neurons=30,
  60.                                  optimizer="Adam",
  61.                                  kernel_init="uniform", ):
  62.         model = keras.Sequential(
  63.             [
  64.                 layers.Dense(
  65.                     35,
  66.                     activation=activation,
  67.                     input_dim=33,
  68.                     kernel_initializer=kernel_init,
  69.                 ),
  70.                 layers.Dense(
  71.                     hidden_neurons, activation=activation, kernel_initializer=kernel_init
  72.                 ),
  73.                 layers.Dense(
  74.                     hidden_neurons, activation=activation, kernel_initializer=kernel_init
  75.                 ),
  76.                 layers.Dense(
  77.                     1, activation=activation, kernel_initializer=kernel_init
  78.                 ),
  79.             ]
  80.         )
  81.  
  82.         model.compile(loss="mse", optimizer=optimizer, metrics=["mae", "mse"])
  83.         return model
  84.  
  85.     def run_grid_search(self):
  86.  
  87.         param_grid = {
  88.             'epochs': [100, 500, 1000],
  89.             "batch_size": [20, 50, 85],
  90.             #"optimizer": ["SGD", "RMSprop", "Adadelta", "Adam"],
  91.             "kernel_init": ["uniform", "normal", "zero"],
  92.             "activation": ["relu", "linear", "softmax"],
  93.             #"weight_constraint": [1, 2, 3, 4, 5],
  94.             #"dropout_rate": [0.1, 0.3, 0.5, 0.7, 0.9],
  95.             "hidden_neurons": [50, 75, 150],
  96.         }
  97.  
  98.         model = KerasRegressor(
  99.             build_fn=self.create_model_grid_search, verbose=0, epochs=500,
  100.         )
  101.  
  102.         grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=5, verbose=2)
  103.  
  104.         X, y = self.get_training_data()
  105.         print(len(X))
  106.         print(type(X))
  107.         grid_result = grid.fit(X, np.array(y))
  108.  
  109.         print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
  110.         means = grid_result.cv_results_["mean_test_score"]
  111.         stds = grid_result.cv_results_["std_test_score"]
  112.         params = grid_result.cv_results_["params"]
  113.         for mean, stdev, param in zip(means, stds, params):
  114.             print("%f (%f) with: %r" % (mean, stdev, param))
  115.         print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
  116.  
  117.     @staticmethod
  118.     def get_score_regression(X, y):
  119.         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
  120.  
  121.         model = ExtraTreesRegressor(n_estimators=100, random_state=42)
  122.         model.fit(X_train, y_train)
  123.  
  124.         predictions = model.predict(X_test)
  125.         mse_score = mean_squared_error(y_test, predictions)
  126.  
  127.         return mse_score
  128.  
  129.     def read_replay_memory(self):
  130.         df = pd.read_csv(
  131.             "automl/ml_pipeline/feature_engineering/agents/regression/replay_memory_data/replay_memory_regression_full.csv"
  132.         )
  133.         self.replay_memory = deque()
  134.         print(df.shape)
  135.         for index, row in df.iterrows():
  136.             row['0'] = row['0'].strip("[").strip('\n').strip(']').replace('\n', '')
  137.             row['1'] = row['1'].strip("[").strip('\n').strip(']').replace('\n', '')
  138.             current_state = [float(state) for state in row['0'].split(' ')]
  139.             new_state = [float(state) for state in row['0'].split(' ')]
  140.             reward = row["2"]
  141.  
  142.             if math.inf in current_state or math.inf in new_state:
  143.                 continue
  144.             self.replay_memory.append((current_state, new_state, reward))
  145.         print(self.replay_memory)
  146.         print(len(self.replay_memory))
  147.  
  148.     def get_state(self, X, y):
  149.         meta_features = self.dataset.get_meta_features(X, y)
  150.         dataset_score = self.get_score_regression(X, y)
  151.         return meta_features, dataset_score
  152.  
  153.     def update_replay_memory(self, transition):
  154.         self.replay_memory.append(transition)
  155.  
  156.     def get_training_data(self):
  157.         X = []
  158.         y = []
  159.  
  160.         for index, (current_state, new_state, reward) in enumerate(self.replay_memory):
  161.             X.append(tf.convert_to_tensor(current_state, dtype=tf.float32))
  162.             y.append([reward])
  163.         return X, y
  164.  
  165.     def train(self):
  166.         if len(self.replay_memory) <= 2:
  167.             return
  168.         X, y = self.get_training_data()
  169.  
  170.         X_train, X_test, y_train, y_test = train_test_split(
  171.             np.array(X), np.array(y), test_size=0.33)
  172.  
  173.         history = self.model.fit(X_train, y_train, epochs=10, validation_split=0.2)
  174.  
  175.         loss, mae, mse = self.model.evaluate(X_test, y_test)
  176.  
  177.         if self.best_mse_score > mse:
  178.             self.best_mse_score = mse
  179.             self.best_model = self.model
  180.             hist = pd.DataFrame(history.history)
  181.             hist["epoch"] = history.epoch
  182.             self.best_history = hist
  183.  
  184.     def run_episodes(self, n_episodes):
  185.         for i in range(n_episodes):
  186.             print('Episode', i)
  187.             working_X = self.X.copy()
  188.             working_y = self.y.copy()
  189.  
  190.             current_meta_features, current_dataset_score = self.get_state(working_X, working_y)
  191.             current_state = np.append(current_meta_features.values, current_dataset_score)
  192.  
  193.             feature_eng = FeatureEngineeringPipeline(working_y, problem_type=self.dataset.problem_type)
  194.  
  195.             working_X, working_y, pipeline = feature_eng.execute_random_pipeline(working_X, working_y)
  196.  
  197.             if pipeline is None:
  198.                 print('Upsss, no pipeline')
  199.                 continue
  200.             print("JUST GOT PIPELINE: ", pipeline)
  201.  
  202.             current_state = np.append(current_state, np.array(list(pipeline.values())))
  203.  
  204.             transformed_dataset_meta_features, transformed_dataset_score = self.get_state(working_X, working_y)
  205.             transformed_state = np.append(transformed_dataset_meta_features.values, transformed_dataset_score)
  206.  
  207.             reward = current_dataset_score-transformed_dataset_score
  208.  
  209.             self.update_replay_memory((current_state, transformed_state, reward))
  210.             #self.train()
  211.  
  212.         df = pd.DataFrame(self.replay_memory)
  213.  
  214.         df.to_csv(
  215.             "automl/ml_pipeline/feature_engineering/agents/regression/replay_memory_data/replay_memory_regression_full.csv"
  216.             ,index=False,
  217.             mode="a",
  218.             header=False
  219.         )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement