Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jan 9 08:30:36 2023
- @author: JeremyMoss
- """
- import numpy as np
- from sklearn.model_selection import train_test_split
- from tensorflow import keras
- import tensorflow_docs.modeling
- import tensorflow_docs as tfdocs
- import pandas as pd
- import time
- import matplotlib.pyplot as plt
- start_time = time.time()
- def make_test_df(n_galaxies=10, n_mags=5, seed=0, file_name = 'test_dataset.csv'):
- # Make a toy dataset
- if seed:
- seed = seed
- else:
- seed = np.random.seed()
- np.random.seed(seed)
- data = np.random.uniform(10, 20, (n_galaxies,n_mags))
- try:
- data[np.diag_indices(n_mags)] = np.nan
- except IndexError:
- print('Cannot generate dataset: n_galaxies ({0}) must be >= n_mags ({1})'.format(n_galaxies, n_mags))
- np.random.shuffle(data)
- magnames = [f'mag{i}' for i in range(1, n_mags + 1)]
- df = pd.DataFrame(data, columns=magnames)
- df.insert(0, 'Name', [f'Galaxy {i}' for i in range(1, n_galaxies + 1)])
- # Generate redshift, RA and dec
- df['redshift'] = np.random.uniform(0.01, 5, n_galaxies) # generate redshift col
- df['RAJ2000'] = np.random.uniform(8, 8.1, n_galaxies) # generate RA col
- df['DEJ2000'] = np.random.uniform(5, 5.1, n_galaxies) # generate dec col
- # Move RA and dec to positions 1 and 2
- df.insert(1, 'RAJ2000', df.pop('RAJ2000'))
- df.insert(2, 'DEJ2000', df.pop('DEJ2000'))
- # Save as file
- path = ''
- df.to_csv(path + file_name, index = False)
- def loaddata(name, colours = False, impute_method = None, cols = None,
- dropna = True, number_of_rows = 'all'):
- # Load a dataset
- path = ''
- df = pd.read_csv(path + 'test_dataset.csv',
- sep = ',', index_col = False, header = 0)
- datasetname = 'Test dataset'
- print('Colours cannot be computed for the test frame')
- magnames = df.columns[3:-1]
- mgf = df[magnames]
- df = df.where(df != -999, np.nan)
- mgf = mgf.where(mgf != -999, np.nan)
- return df, datasetname, magnames, mgf
- def build_nn_model(n, hyperparameters, loss, metrics, opt):
- model = keras.Sequential([
- keras.layers.Dense(hyperparameters[0], activation=hyperparameters[1], # number of outputs to next layer
- input_shape=[n]), # number of features
- keras.layers.Dense(hyperparameters[2], activation=hyperparameters[3]),
- keras.layers.Dense(hyperparameters[4], activation=hyperparameters[5]),
- keras.layers.Dense(1) # 1 output (redshift)
- ])
- model.compile(loss=loss,
- optimizer = opt,
- metrics = metrics)
- return model
- #%% Load data
- make_test_df(100, 20, 0)
- dataset, datasetname, magnames, mags = loaddata('test',
- dropna = False, # to drop NaNs
- colours = False, # to compute colours of mags
- impute_method = 'max') # to impute max vals for missing data
- #%% Main body
- hyperparams = [100, 'relu', 100, 'relu', 100, 'relu']
- loss = 'mae'
- metrics = ['mae']
- epochs = 100
- opt = 'Nadam'
- num_trials = 3
- mean_list = []
- std_list = []
- train_frac = 0.8
- for i in range(num_trials):
- # Create a new model and predictions on each iteration
- print('*'*58);print('Run {0} of {1}'.format(i+1, num_trials)); print('*'*58)
- X_train, X_test, y_train, y_test = train_test_split(mags, # features
- dataset['redshift'], # target
- train_size = train_frac)
- model = build_nn_model(len(mags.columns), hyperparams, loss, metrics, opt)
- model.summary()
- early_stop = keras.callbacks.EarlyStopping(patience=100)
- history = model.fit(X_train, y_train, epochs = epochs,
- validation_split = 1 - train_frac,
- verbose = 0, callbacks = [early_stop,
- tfdocs.modeling.EpochDots()])
- y_pred = model.predict(X_test)
- # Record the redshift predictions in the test set
- X_test['z_spec'] = y_test
- X_test['z_phot'] = y_pred
- X_test['delta_z'] = X_test['z_spec'] - X_test['z_phot']
- stats = X_test['delta_z'].describe().transpose()
- mean, std = stats['mean'], stats['std'] # add means and std devs to lists
- mean_list.append(mean)
- std_list.append(std)
- print("Model completed in", time.time() - start_time, "seconds")
- #%% Display means and standard deviations
- border = '-'*25
- separator = '\t\t|\t'
- results_list = zip(mean_list, std_list)
- print('Means' + separator + 'Std devs')
- print(border)
- for mean, dev, *_ in results_list:
- print(f"{mean:7f}\t|\t{dev:7f}")
- print(border)
- print('Average mean = {avg_mean}\nAverage std dev = {avg_std}'.format(
- avg_mean=np.mean(mean_list),
- avg_std=np.mean(std_list)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement