Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.01 KB | None | 0 0
  1. from Pipeline import Pipeline
  2. import PipelineUtils as utils
  3. from IdentityProcessor import IdentityProcessor
  4.  
  5. # UNIT TEST
  6. import unittest
  7. import logging
  8. import pprint
  9. logging.basicConfig(level=logging.DEBUG)
  10. from sklearn.feature_selection import SelectKBest
  11. from sklearn.linear_model import SGDClassifier
  12.  
  13. import sklearn.svm as svm
  14. from sklearn.decomposition import PCA
  15.  
  16. # Overriding print:
  17. def print(element):
  18. logging.info(pprint.pformat(element))
  19.  
  20. # TODO: MORE UNIT TESTS
  21. class Test(unittest.TestCase):
  22. small_arg_list = [
  23. {
  24. 'name' : 'anova',
  25. 'processor' : SelectKBest,
  26. 'parameter_dist' : {'k' : [1,2,3,4]},
  27. 'sample_count' : 5
  28. },
  29. {
  30. 'name' : 'svc',
  31. 'processor' : svm.SVC,
  32. 'parameter_dist' : {'kernel' : ['linear','rbf']},
  33. 'sample_count' : 5
  34. }
  35. ]
  36.  
  37. missing_arg_list = [
  38. {
  39. 'name' : 'anova',
  40. 'processor' : SelectKBest,
  41. 'parameter_dist' : {'k' : [1,2,3,4]}
  42. },
  43. {
  44. 'name' : 'svc',
  45. 'processor' : svm.SVC,
  46. 'parameter_dist' : {'kernel' : ['linear','rbf']},
  47. 'sample_count' : 2
  48. }
  49. ]
  50.  
  51. arg_list = [
  52. {
  53. 'name' : 'anova',
  54. 'processor' : SelectKBest,
  55. 'parameter_dist' : {'k' : [2,4]},
  56. 'sample_count' : 2
  57. },
  58. {
  59. 'name' : 'svc',
  60. 'processor' : svm.SVC,
  61. 'parameter_dist' : {'kernel' : ['linear','rbf']},
  62. 'sample_count' : 2
  63. }
  64. ]
  65.  
  66. missing_multi_arg_list = [
  67. [
  68. {
  69. 'name' : 'anova',
  70. 'processor' : SelectKBest,
  71. 'parameter_dist' : {'k' : [2,4]},
  72. 'sample_count' : 2
  73. },
  74. {
  75. 'name' : 'pca',
  76. 'processor' : PCA,
  77. 'parameter_dist' : {'n_components' : [2,4]},
  78. }
  79. ],
  80. {
  81. 'name' : 'svc',
  82. 'processor' : svm.SVC,
  83. 'parameter_dist' : {'kernel' : ['linear','rbf']},
  84. 'sample_count' : 2
  85. }
  86. ]
  87.  
  88. multi_arg_list = [
  89. [
  90. {
  91. 'name' : 'anova',
  92. 'processor' : SelectKBest,
  93. 'parameter_dist' : {'k' : [2,4]},
  94. 'sample_count' : 2
  95. },
  96. {
  97. 'name' : 'pca',
  98. 'processor' : PCA,
  99. 'parameter_dist' : {'n_components' : [2,4]},
  100. 'sample_count' : 2
  101. }
  102. ],
  103. {
  104. 'name' : 'svc',
  105. 'processor' : svm.SVC,
  106. 'parameter_dist' : {'kernel' : ['linear','rbf']},
  107. 'sample_count' : 2
  108. }
  109. ]
  110.  
  111. @unittest.expectedFailure
  112. def test_small_param_space(self):
  113. # parameter space too small
  114. pipeline = Pipeline(Test.small_arg_list)
  115.  
  116. @unittest.expectedFailure
  117. def test_missing_param_space(self):
  118. # parameter space sufficiently large
  119. pipeline = Pipeline(Test.missing_arg_list)
  120.  
  121. @unittest.expectedFailure
  122. def test_missing_multi_param_space(self):
  123. # parameter space sufficiently large
  124. pipeline = Pipeline(Test.missing_multi_arg_list)
  125.  
  126. def test_param_space(self):
  127. # parameter space sufficiently large
  128. pipeline = Pipeline(Test.arg_list)
  129.  
  130. def test_multi_param_space(self):
  131. # parameter space sufficiently large
  132. pipeline = Pipeline(Test.multi_arg_list)
  133.  
  134. def test_sample_hyperparameters(self):
  135. anova_set = set([s['param']['k'] for s_list in Pipeline._retrieve_hps_list(Test.arg_list)[0] for s in s_list])
  136. svc_set = set([s['param']['kernel'] for s_list in Pipeline._retrieve_hps_list(Test.arg_list)[1] for s in s_list])
  137. self.assertTrue(anova_set == set([2,4]))
  138. self.assertTrue(svc_set == set(['linear','rbf']))
  139.  
  140. def test_sample_hyperparameters_multi(self):
  141. anova_set = set([s['param']['k'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[0] for s in s_list if s['name'] == 'anova'])
  142. pca_set = set([s['param']['n_components'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[0] for s in s_list if s['name'] == 'pca'])
  143. svc_set = set([s['param']['kernel'] for s_list in Pipeline._retrieve_hps_list(Test.multi_arg_list)[1] for s in s_list])
  144. self.assertTrue(anova_set == set([2,4]))
  145. self.assertTrue(pca_set == set([2,4]))
  146. self.assertTrue(svc_set == set(['linear','rbf']))
  147.  
  148. def test_sample_trajectories(self):
  149. hps_list = [[[{'name': 'anova',
  150. 'param':{'k': j},
  151. 'processor': SelectKBest} for j in range(1,11)]],
  152. [[{'name': 'linear',
  153. 'param':{'alpha': j},
  154. 'processor': SGDClassifier} for j in range(1,11)] for k in range(10)]]
  155.  
  156. num_trajectories = 100 # fully exploring hps_list
  157. root = Pipeline._create_processor(IdentityProcessor)
  158. graph = Pipeline._generate_trajectories(hps_list, num_trajectories, root)
  159.  
  160. @unittest.expectedFailure
  161. def test_small_sample_trajectories(self):
  162. hps_list = [[{'name':'anova', 'param':{'k': k}} for k in range(10)],
  163. [{'name':'linear', 'param':{'kernel': k}} for k in range(10)]]
  164. num_trajectories = 1000 # overpopulating trajectories
  165. traj_dict = Pipeline._generate_trajectories(hps_list, num_trajectories, None) # fail here
  166.  
  167. """
  168. @unittest.expectedFailure
  169. def test_small_trajectory_space(self):
  170. num_trajectories = 100
  171. hps_list = Pipeline._retrieve_hps_list(Test.small_multi_arg_list)
  172. utils.trajectory_space_check(hps_list, num_trajectories)
  173. """
  174. if __name__ == '__main__':
  175. unittest.main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement