Advertisement
Jim421616

nn-MWE

Jan 8th, 2023 (edited)
3,282
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.94 KB | Source Code | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Jan  9 08:30:36 2023
  4.  
  5. @author: JeremyMoss
  6. """
  7.  
  8. import numpy as np
  9. from sklearn.model_selection import train_test_split
  10. from tensorflow import keras
  11. import tensorflow_docs.modeling
  12. import tensorflow_docs as tfdocs
  13. import pandas as pd
  14. import time
  15. import matplotlib.pyplot as plt
  16.  
  17. start_time = time.time()
  18.  
  19. def make_test_df(n_galaxies=10, n_mags=5, seed=0, file_name = 'test_dataset.csv'):
  20.     # Make a toy dataset
  21.     if seed:
  22.         seed = seed
  23.     else:
  24.         seed = np.random.seed()
  25.     np.random.seed(seed)
  26.     data = np.random.uniform(10, 20, (n_galaxies,n_mags))
  27.     try:
  28.         data[np.diag_indices(n_mags)] = np.nan
  29.     except IndexError:
  30.         print('Cannot generate dataset: n_galaxies ({0}) must be >= n_mags ({1})'.format(n_galaxies, n_mags))
  31.     np.random.shuffle(data)
  32.    
  33.     magnames = [f'mag{i}' for i in range(1, n_mags + 1)]
  34.    
  35.     df = pd.DataFrame(data, columns=magnames)
  36.     df.insert(0, 'Name', [f'Galaxy {i}' for i in range(1, n_galaxies + 1)])
  37.  
  38.     # Generate redshift, RA and dec
  39.     df['redshift'] = np.random.uniform(0.01, 5, n_galaxies) # generate redshift col
  40.     df['RAJ2000'] = np.random.uniform(8, 8.1, n_galaxies)   # generate RA col
  41.     df['DEJ2000'] = np.random.uniform(5, 5.1, n_galaxies)   # generate dec col
  42.  
  43.     # Move RA and dec to positions 1 and 2
  44.     df.insert(1, 'RAJ2000', df.pop('RAJ2000'))
  45.     df.insert(2, 'DEJ2000', df.pop('DEJ2000'))
  46.  
  47.     # Save as file
  48.     path = ''
  49.     df.to_csv(path + file_name, index = False)
  50.  
  51. def loaddata(name, colours = False, impute_method = None, cols = None,
  52.              dropna = True, number_of_rows = 'all'):
  53.     # Load a dataset
  54.     path = ''
  55.     df = pd.read_csv(path + 'test_dataset.csv',
  56.                      sep = ',', index_col = False, header = 0)
  57.    
  58.     datasetname = 'Test dataset'
  59.     print('Colours cannot be computed for the test frame')
  60.     magnames = df.columns[3:-1]
  61.    
  62.     mgf = df[magnames]
  63.     df = df.where(df != -999, np.nan)
  64.     mgf = mgf.where(mgf != -999, np.nan)
  65.    
  66.     return df, datasetname, magnames, mgf
  67.  
  68. def build_nn_model(n, hyperparameters, loss, metrics, opt):
  69.     model = keras.Sequential([
  70.     keras.layers.Dense(hyperparameters[0], activation=hyperparameters[1], # number of outputs to next layer
  71.                            input_shape=[n]),  # number of features
  72.     keras.layers.Dense(hyperparameters[2], activation=hyperparameters[3]),
  73.     keras.layers.Dense(hyperparameters[4], activation=hyperparameters[5]),
  74.  
  75.     keras.layers.Dense(1) # 1 output (redshift)
  76.     ])
  77.  
  78.     model.compile(loss=loss,
  79.                   optimizer = opt,
  80.             metrics = metrics)
  81.     return model
  82.  
  83. #%% Load data
  84. make_test_df(100, 20, 0)
  85. dataset, datasetname, magnames, mags = loaddata('test',
  86.                                                    dropna = False,  # to drop NaNs
  87.                                                    colours = False, # to compute colours of mags
  88.                                                    impute_method = 'max') # to impute max vals for missing data
  89.  
  90. #%% Main body
  91.  
  92. hyperparams = [100, 'relu', 100, 'relu', 100, 'relu']
  93. loss = 'mae'
  94. metrics = ['mae']
  95. epochs = 100
  96. opt = 'Nadam'
  97.  
  98. num_trials = 3
  99. mean_list = []
  100. std_list = []
  101. train_frac = 0.8
  102.  
  103. for i in range(num_trials):
  104.     # Create a new model and predictions on each iteration
  105.     print('*'*58);print('Run {0} of {1}'.format(i+1, num_trials)); print('*'*58)
  106.     X_train, X_test, y_train, y_test = train_test_split(mags, # features
  107.                                                 dataset['redshift'], # target
  108.                                                 train_size = train_frac)
  109.     model = build_nn_model(len(mags.columns), hyperparams, loss, metrics, opt)
  110.     model.summary()
  111.     early_stop = keras.callbacks.EarlyStopping(patience=100)
  112.    
  113.     history = model.fit(X_train, y_train, epochs = epochs,
  114.                         validation_split = 1 - train_frac,
  115.                         verbose = 0, callbacks = [early_stop,
  116.                                                   tfdocs.modeling.EpochDots()])
  117.     y_pred = model.predict(X_test)
  118.    
  119.     # Record the redshift predictions in the test set
  120.     X_test['z_spec'] = y_test
  121.     X_test['z_phot'] = y_pred
  122.     X_test['delta_z'] = X_test['z_spec'] - X_test['z_phot']
  123.    
  124.     stats = X_test['delta_z'].describe().transpose()
  125.     mean, std = stats['mean'], stats['std'] # add means and std devs to lists
  126.     mean_list.append(mean)
  127.     std_list.append(std)
  128.  
  129. print("Model completed in", time.time() - start_time, "seconds")
  130.  
  131. #%% Display means and standard deviations
  132. border = '-'*25
  133. separator = '\t\t|\t'
  134. results_list = zip(mean_list, std_list)
  135. print('Means' + separator + 'Std devs')
  136. print(border)
  137. for mean, dev, *_ in results_list:
  138.     print(f"{mean:7f}\t|\t{dev:7f}")
  139. print(border)
  140. print('Average mean = {avg_mean}\nAverage std dev = {avg_std}'.format(
  141.     avg_mean=np.mean(mean_list),
  142.     avg_std=np.mean(std_list)))
  143.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement