Untitled

#!/usr/bin/env python

"""
    simple-random-nasbench.py
"""

import numpy as np
import pandas as pd
from tqdm import tqdm, trange
from matplotlib import pyplot as plt

from nasbench.api import NASBench

np.random.seed(123)

# --
# Helpers

cummax = np.maximum.accumulate
cumsum = np.cumsum

def cumargmax(x):
    z = np.arange(x.shape[0], dtype=np.float)
    z[x != cummax(x)] = np.nan
    z = pd.Series(z).fillna(method='ffill')
    return z.values.astype(int)

def sample_one_column(x):
    i = np.arange(x.shape[0])
    j = np.random.choice(x.shape[1], x.shape[0], replace=True)
    return x[(i, j)]

# --
# IO

path = 'data/nasbench_only108.tfrecord'
api  = NASBench(path)

# --
# ETL

hashes   = np.array(list(api.hash_iterator()))
n_models = len(hashes)
n_runs   = 3

test_acc  = np.zeros((n_models, n_runs))
valid_acc = np.zeros((n_models, n_runs))
cost      = np.zeros((n_models, n_runs))

for i, h in tqdm(enumerate(hashes), total=len(hashes)):
    _, result = api.get_metrics_from_hash(h)
    result = result[108]

    valid_acc[i] = [r['final_validation_accuracy'] for r in result]
    test_acc[i]  = [r['final_test_accuracy'] for r in result]
    cost[i]      = [r['final_training_time'] for r in result]


mean_valid_acc = valid_acc.mean(axis=-1)
mean_test_acc  = test_acc.mean(axis=-1)
mean_cost      = cost.mean(axis=-1)

# --
# Random runs

def random_run(valid_acc, mean_test_acc, mean_cost, models_per_run=int(1e4)):
    n_models = valid_acc.shape[0]

    # Randomly sample `models_per_run` architectures w/o replacement
    sel = np.random.choice(n_models, models_per_run, replace=False)

    # Get 1 of the validation accuracies for the models
    valid_acc_run = sample_one_column(valid_acc[sel])

    # Compute index of arch. w/ best validation accuracy so far
    best_val_idx = cumargmax(valid_acc_run)

    # Compute mean test accuracy for model w/ best validation accuracy so far
    test_acc_run = mean_test_acc[sel][best_val_idx]

    # Cumulative cost of run
    cum_cost_run = cumsum(mean_cost[sel])

    return test_acc_run, cum_cost_run


rand_results = [random_run(valid_acc, mean_test_acc, mean_cost) for _ in trange(500)]

test_acc_runs, cum_cost_runs = list(zip(*rand_results))

# Average test acc of selected models
mean_test_acc_run = np.stack(test_acc_runs).mean(axis=0)

# Average cumulative cost of random runs
mean_cum_cost_run = np.stack(cum_cost_runs).mean(axis=0)

_ = plt.plot(mean_cum_cost_run, mean_test_acc.max() - mean_test_acc_run, c='red')
_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.ylim(1e-3, 1e-1)
_ = plt.legend()
_ = plt.grid(which='both', alpha=0.5)
_ = plt.axhline(4e-3, c='grey', alpha=0.26)
plt.show()

# Performance at 1e7 seconds is `5.5 * 1e-3`, compared to about `4.1 * 1e-3` in the paper