Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Pipeline import Pipeline
- import PipelineUtils as utils
- from IdentityProcessor import IdentityProcessor
- # UNIT TEST
- import unittest
- import logging
- import pprint
- logging.basicConfig(level=logging.DEBUG)
- from sklearn.feature_selection import SelectKBest
- from sklearn.linear_model import SGDClassifier
- import sklearn.svm as svm
- from sklearn.decomposition import PCA
- # Overriding print:
- def print(element):
- logging.info(pprint.pformat(element))
- # TODO: MORE UNIT TESTS
- class Test(unittest.TestCase):
- small_arg_list = [
- {
- 'name' : 'anova',
- 'processor' : SelectKBest,
- 'parameter_dist' : {'k' : [1,2,3,4]},
- 'sample_count' : 5
- },
- {
- 'name' : 'svc',
- 'processor' : svm.SVC,
- 'parameter_dist' : {'kernel' : ['linear','rbf']},
- 'sample_count' : 5
- }
- ]
- missing_arg_list = [
- {
- 'name' : 'anova',
- 'processor' : SelectKBest,
- 'parameter_dist' : {'k' : [1,2,3,4]}
- },
- {
- 'name' : 'svc',
- 'processor' : svm.SVC,
- 'parameter_dist' : {'kernel' : ['linear','rbf']},
- 'sample_count' : 2
- }
- ]
- arg_list = [
- {
- 'name' : 'anova',
- 'processor' : SelectKBest,
- 'parameter_dist' : {'k' : [2,4]},
- 'sample_count' : 2
- },
- {
- 'name' : 'svc',
- 'processor' : svm.SVC,
- 'parameter_dist' : {'kernel' : ['linear','rbf']},
- 'sample_count' : 2
- }
- ]
- missing_multi_arg_list = [
- [
- {
- 'name' : 'anova',
- 'processor' : SelectKBest,
- 'parameter_dist' : {'k' : [2,4]},
- 'sample_count' : 2
- },
- {
- 'name' : 'pca',
- 'processor' : PCA,
- 'parameter_dist' : {'n_components' : [2,4]},
- }
- ],
- {
- 'name' : 'svc',
- 'processor' : svm.SVC,
- 'parameter_dist' : {'kernel' : ['linear','rbf']},
- 'sample_count' : 2
- }
- ]
- multi_arg_list = [
- [
- {
- 'name' : 'anova',
- 'processor' : SelectKBest,
- 'parameter_dist' : {'k' : [2,4]},
- 'sample_count' : 2
- },
- {
- 'name' : 'pca',
- 'processor' : PCA,
- 'parameter_dist' : {'n_components' : [2,4]},
- 'sample_count' : 2
- }
- ],
- {
- 'name' : 'svc',
- 'processor' : svm.SVC,
- 'parameter_dist' : {'kernel' : ['linear','rbf']},
- 'sample_count' : 2
- }
- ]
- @unittest.expectedFailure
- def test_small_param_space(self):
- # parameter space too small
- pipeline = Pipeline(Test.small_arg_list)
- @unittest.expectedFailure
- def test_missing_param_space(self):
- # parameter space sufficiently large
- pipeline = Pipeline(Test.missing_arg_list)
- @unittest.expectedFailure
- def test_missing_multi_param_space(self):
- # parameter space sufficiently large
- pipeline = Pipeline(Test.missing_multi_arg_list)
- def test_param_space(self):
- # parameter space sufficiently large
- pipeline = Pipeline(Test.arg_list)
- def test_multi_param_space(self):
- # parameter space sufficiently large
- pipeline = Pipeline(Test.multi_arg_list)
- def test_sample_hyperparameters(self):
- anova_set = set([s['param']['k'] for s_list in Pipeline._retrieve_hps_list(Test.arg_list)[0] for s in s_list])
- svc_set = set([s['param']['kernel'] for s_list in Pipeline._retrieve_hps_list(Test.arg_list)[1] for s in s_list])
- self.assertTrue(anova_set == set([2,4]))
- self.assertTrue(svc_set == set(['linear','rbf']))
- def test_sample_hyperparameters_multi(self):
- anova_set = set([s['param']['k'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[0] for s in s_list if s['name'] == 'anova'])
- pca_set = set([s['param']['n_components'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[0] for s in s_list if s['name'] == 'pca'])
- svc_set = set([s['param']['kernel'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[1] for s in s_list])
- self.assertTrue(anova_set == set([2,4]))
- self.assertTrue(pca_set == set([2,4]))
- self.assertTrue(svc_set == set(['linear','rbf']))
- def test_sample_trajectories(self):
- hps_list = [[[{'name': 'anova',
- 'param':{'k': j},
- 'processor': SelectKBest} for j in range(1,11)]],
- [[{'name': 'linear',
- 'param':{'alpha': j},
- 'processor': SGDClassifier} for j in range(1,11)] for k in range(10)]]
- num_trajectories = 100 # fully exploring hps_list
- root = Pipeline._create_processor(IdentityProcessor)
- graph = Pipeline._generate_trajectories(hps_list, num_trajectories, root)
- @unittest.expectedFailure
- def test_small_sample_trajectories(self):
- hps_list = [[{'name':'anova', 'param':{'k': k}} for k in range(10)],
- [{'name':'linear', 'param':{'kernel': k}} for k in range(10)]]
- num_trajectories = 1000 # overpopulating trajectories
- traj_dict = Pipeline._generate_trajectories(hps_list, num_trajectories, None) # fail here
- """
- @unittest.expectedFailure
- def test_small_trajectory_space(self):
- num_trajectories = 100
- hps_list = Pipeline._retrieve_hps_list(Test.small_multi_arg_list)
- utils.trajectory_space_check(hps_list, num_trajectories)
- """
- if __name__ == '__main__':
- unittest.main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement