Advertisement
Jim421616

nn-MWE-cross_val_score

Jan 9th, 2023
1,472
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.29 KB | Source Code | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Jan  9 08:30:36 2023
  4.  
  5. @author: JeremyMoss
  6. """
  7.  
  8. import numpy as np
  9. from sklearn.model_selection import train_test_split, cross_val_score
  10. from sklearn.model_selection import GridSearchCV
  11. from tensorflow import keras
  12. import tensorflow_docs.modeling
  13. import tensorflow_docs as tfdocs
  14. import pandas as pd
  15. import time
  16. import matplotlib.pyplot as plt
  17.  
  18. start_time = time.time()
  19.  
  20. def make_test_df(n_galaxies=10, n_mags=5, seed=0, file_name = 'test_dataset.csv'):
  21.     # Make a toy dataset
  22.     if seed:
  23.         seed = seed
  24.     else:
  25.         seed = np.random.seed()
  26.     np.random.seed(seed)
  27.     data = np.random.uniform(10, 20, (n_galaxies,n_mags))
  28.     try:
  29.         data[np.diag_indices(n_mags)] = np.nan
  30.     except IndexError:
  31.         print('Cannot generate dataset: n_galaxies ({0}) must be >= n_mags ({1})'.format(n_galaxies, n_mags))
  32.     np.random.shuffle(data)
  33.    
  34.     magnames = [f'mag{i}' for i in range(1, n_mags + 1)]
  35.    
  36.     df = pd.DataFrame(data, columns=magnames)
  37.     df.insert(0, 'Name', [f'Galaxy {i}' for i in range(1, n_galaxies + 1)])
  38.  
  39.     # Generate redshift, RA and dec
  40.     df['redshift'] = np.random.uniform(0.01, 5, n_galaxies) # generate redshift col
  41.     df['RAJ2000'] = np.random.uniform(8, 8.1, n_galaxies)   # generate RA col
  42.     df['DEJ2000'] = np.random.uniform(5, 5.1, n_galaxies)   # generate dec col
  43.  
  44.     # Move RA and dec to positions 1 and 2
  45.     df.insert(1, 'RAJ2000', df.pop('RAJ2000'))
  46.     df.insert(2, 'DEJ2000', df.pop('DEJ2000'))
  47.  
  48.     # Save as file
  49.     path = ''
  50.     df.to_csv(path + file_name, index = False)
  51.  
  52. def loaddata(name, colours = False, impute_method = None, cols = None,
  53.              dropna = True, number_of_rows = 'all'):
  54.     # Load a dataset
  55.     path = ''
  56.     df = pd.read_csv(path + 'test_dataset.csv',
  57.                      sep = ',', index_col = False, header = 0)
  58.    
  59.     datasetname = 'Test dataset'
  60.     print('Colours cannot be computed for the test frame')
  61.     magnames = df.columns[3:-1]
  62.    
  63.     mgf = df[magnames]
  64.     df = df.where(df != -999, np.nan)
  65.     mgf = mgf.where(mgf != -999, np.nan)
  66.    
  67.     return df, datasetname, magnames, mgf
  68.  
  69. def build_nn_model(n, hyperparameters, loss, metrics, opt):
  70.     model = keras.Sequential([
  71.     keras.layers.Dense(hyperparameters[0], activation=hyperparameters[1], # number of outputs to next layer
  72.                            input_shape=[n]),  # number of features
  73.     keras.layers.Dense(hyperparameters[2], activation=hyperparameters[3]),
  74.     keras.layers.Dense(hyperparameters[4], activation=hyperparameters[5]),
  75.  
  76.     keras.layers.Dense(1) # 1 output (redshift)
  77.     ])
  78.  
  79.     model.compile(loss=loss,
  80.                   optimizer = opt,
  81.             metrics = metrics)
  82.     return model
  83.  
  84. #%% Load data
  85. make_test_df(100, 20, 0)
  86. dataset, datasetname, magnames, mags = loaddata('test',
  87.                                                    dropna = False,  # to drop NaNs
  88.                                                    colours = False, # to compute colours of mags
  89.                                                    impute_method = None) # to impute max vals for missing data
  90.  
  91. #%% Main body
  92.  
  93. hyperparams = [100, 'relu', 100, 'relu', 100, 'relu']
  94. loss = 'mae'
  95. metrics = ['mae']
  96. epochs = 100
  97. opt = 'Nadam'
  98.  
  99. train_frac = 0.8
  100.  
  101. X_train, X_test, y_train, y_test = train_test_split(mags, # features
  102.                                                 dataset['redshift'], # target
  103.                                                 train_size = train_frac)
  104.  
  105. model = build_nn_model(len(mags.columns), hyperparams, loss, metrics, opt)
  106. model.summary()
  107. early_stop = keras.callbacks.EarlyStopping(patience=100)
  108.  
  109. history = model.fit(X_train, y_train, epochs = epochs,
  110.                     validation_split = 1 - train_frac,
  111.                     verbose = 0, callbacks = [early_stop,
  112.                                               tfdocs.modeling.EpochDots()])
  113. y_pred = model.predict(X_test)
  114.  
  115. optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
  116. epochs = [10, 50, 100]
  117.  
  118. param_grid = dict(epochs=epochs, optimizer=optimizer)
  119. grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', n_jobs=-1, refit='boolean')
  120. grid_result = grid.fit(X_train, y_train)
  121.  
  122. print("Model completed in", time.time() - start_time, "seconds")
  123.  
Advertisement
Comments
  • Jim421616
    2 years
    # Python 0.27 KB | 0 0
    1. Why does this throw an error:
    2. TypeError: Cannot clone object '<keras.engine.sequential.Sequential object at 0x0000028B8C50C0D0>' (type <class 'keras.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.
Add Comment
Please, Sign In to add comment
Advertisement