Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import seaborn as sns
- import tensorflow as tf
- from tensorflow import keras
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- import tensorflow_docs as tfdocs
- import tensorflow_docs.plots
- import tensorflow_docs.modeling
- from scipy.stats import gaussian_kde
- ################################################################################
- def build_model(n):
- # Define the prediction model. The NN takes 5 input features (magnitudes), and outputs
- # the redshift.
- model = keras.Sequential([
- keras.layers.Dense(5, activation='relu', input_shape=(n,)), # 5 inputs (mags)
- keras.layers.Dense(4, activation='relu'), # do I need this layer?
- keras.layers.Dense(1) # 1 output (z)
- ])
- model.compile(loss='mae',
- optimizer = tf.keras.optimizers.RMSprop(0.001),
- metrics = ['mae', 'mse'])
- return model
- ################################################################################
- # These are convenience functions for plotting the results of the model's predictions.
- def plot_mae():
- plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
- plotter.plot({'Basic': history}, metric = "mae")
- plt.title('Mean Absolute Error evolution for %s'%datasetname)
- plt.ylabel(r'$\Delta z$')
- plt.show()
- def plot_mse():
- plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
- plotter.plot({'Basic': history}, metric = "mse")
- plt.title('Mean Standard Error evolution for %s'%datasetname)
- plt.ylabel(r'$\Delta z$')
- plt.show()
- def plot_z():
- x, y = valid_dataset['z'], valid_dataset['Predicted z']
- xy = np.vstack([x,y])
- z = gaussian_kde(xy)(xy)
- # Sort the points by density, so that the densest points are plotted last
- # idx = z.argsort()
- # x, y, z = x[idx], y[idx], z[idx]
- plt.scatter(valid_dataset['z'], valid_dataset['Predicted z'],
- s = 50,
- alpha = 0.5,
- marker = '.',
- # edgecolor = '',
- c = z)
- plt.colorbar()
- plt.title('Redshift predictions for %s\ncompared to spectroscopic redshift'%datasetname)
- plt.xlabel(r'$z_{spec}$')
- plt.ylabel('Predicted z')
- plt.show()
- def plot_deltaz():
- valid_dataset['Delta z'] = valid_dataset['Predicted z'] - valid_dataset['z']
- plt.scatter(valid_dataset['z'], valid_dataset['Delta z'],
- s = 2, alpha = 0.5, marker = '.')
- plt.title('Deviation of redshift predictions for %s\nfrom spectroscopic redshift'%datasetname)
- plt.xlabel(r'$z_{spec}$')
- plt.ylabel(r'$\Delta z$')
- # plt.xlim([0, 1.5])
- plt.show()
- def plot_z_boxplot(outliers = False):
- columns = [valid_dataset['Predicted z'], valid_dataset['z']]
- df = pd.DataFrame(data = valid_dataset,
- columns = columns)
- sns.boxplot(data = pd.melt(df),
- x = None, y = None, # This screws it up
- linewidth = 0.5,
- flierprops = dict(markerfacecolor = '0.1', markersize = 0.2),
- showfliers = outliers
- )
- # valid_dataset.boxplot(column = ['Predicted z', 'z'])
- plt.title('Distribution of statistical parameters\nfor %s'%datasetname)
- plt.tight_layout()
- plt.show()
- def plot_delta_z_hist():
- stats = valid_dataset['Delta z'].describe()
- valid_dataset['Delta z'] = valid_dataset['Predicted z'] - valid_dataset['z']
- valid_dataset['Delta z'].hist(label = ' mean = %.3f\n std dev = %.3f'%
- (stats[1], stats[2]),
- bins = 100)
- plt.title(r'Distribution of $\Delta z$ for %s'%datasetname)
- plt.legend()
- plt.xlabel(r'$\Delta z$')
- plt.ylabel('Count')
- plt.show()
- ################################################################################
- dataset = pd.read_csv('sdss12.csv')
- datasetname = 'SDSS DR12 QSOs'
- mags = ['umag', 'gmag', 'rmag', 'imag', 'zmag']
- columns = mags + ['z']
- num_features = len(columns)
- dataset = dataset[columns]
- train_dataset = dataset.sample(frac = 0.8, random_state = 1)
- valid_dataset = dataset.drop(train_dataset.index)
- train_labels = train_dataset.iloc[:, len(mags):].values # all but the last column
- valid_labels = valid_dataset.iloc[:, len(mags):].values # all but the last column
- print('Training set: \n',train_dataset)
- print('Validation set: \n', valid_dataset)
- model = build_model(num_features)
- model.summary()
- N = 50
- early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=500)
- history = model.fit(train_dataset.iloc[:,:num_features], train_labels, epochs = N,
- validation_split = 0.2, verbose = 0,
- callbacks = [
- early_stop,
- tfdocs.modeling.EpochDots()
- ])
- model.save("model2.h5")
- # Model testing
- valid_predictions = model.predict(valid_dataset.iloc[:,:num_features])
- valid_dataset['Predicted z'] = valid_predictions
- print("\nPredicted\n")
- print(valid_dataset)
- # Visualise the model's training progress using the stats in the history object
- hist = pd.DataFrame(history.history)
- hist['epoch'] = history.epoch
- print(hist.tail)
- print('Predicted z stats: \n', valid_dataset['Predicted z'].describe())
- print('Spectroscopic z stats: \n', valid_dataset['z'].describe())
- plot_mse()
- plot_mae()
- plot_z()
- plot_deltaz()
- plot_delta_z_hist()
- plot_z_boxplot(True)
- plot_z_boxplot(False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement