Advertisement
Guest User

Untitled

a guest
Aug 4th, 2015
247
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.57 KB | None | 0 0
  1. """
  2. The :mod:`sklearn.kernel_approximation` module implements several
  3. approximate kernel feature maps base on Fourier transforms.
  4. """
  5.  
  6. # Author: Andreas Mueller <amueller@ais.uni-bonn.de>
  7. #
  8. # License: BSD 3 clause
  9.  
  10. import warnings
  11.  
  12. import numpy as np
  13. import scipy.sparse as sp
  14. from scipy.linalg import svd
  15.  
  16. from sklearn.base import BaseEstimator
  17. from sklearn.base import TransformerMixin
  18. from sklearn.utils import check_array, check_random_state, as_float_array
  19. from sklearn.utils.extmath import safe_sparse_dot
  20. from sklearn.utils.validation import check_is_fitted
  21. from sklearn.metrics.pairwise import pairwise_kernels
  22.  
  23.  
  24.  
  25. class Nystroem(BaseEstimator, TransformerMixin):
  26. """Approximate a kernel map using a subset of the training data.
  27.  
  28. Constructs an approximate feature map for an arbitrary kernel
  29. using a subset of the data as basis.
  30.  
  31. Parameters
  32. ----------
  33. kernel : string or callable, default="rbf"
  34. Kernel map to be approximated. A callable should accept two arguments
  35. and the keyword arguments passed to this object as kernel_params, and
  36. should return a floating point number.
  37.  
  38. n_components : int
  39. Number of features to construct.
  40. How many data points will be used to construct the mapping.
  41.  
  42. gamma : float, default=None
  43. Gamma parameter for the RBF, polynomial, exponential chi2 and
  44. sigmoid kernels. Interpretation of the default value is left to
  45. the kernel; see the documentation for sklearn.metrics.pairwise.
  46. Ignored by other kernels.
  47.  
  48. degree : float, default=3
  49. Degree of the polynomial kernel. Ignored by other kernels.
  50.  
  51. coef0 : float, default=1
  52. Zero coefficient for polynomial and sigmoid kernels.
  53. Ignored by other kernels.
  54.  
  55. kernel_params : mapping of string to any, optional
  56. Additional parameters (keyword arguments) for kernel function passed
  57. as callable object.
  58.  
  59. random_state : {int, RandomState}, optional
  60. If int, random_state is the seed used by the random number generator;
  61. if RandomState instance, random_state is the random number generator.
  62.  
  63.  
  64. Attributes
  65. ----------
  66. components_ : array, shape (n_components, n_features)
  67. Subset of training points used to construct the feature map.
  68.  
  69. component_indices_ : array, shape (n_components)
  70. Indices of ``components_`` in the training set.
  71.  
  72. normalization_ : array, shape (n_components, n_components)
  73. Normalization matrix needed for embedding.
  74. Square root of the kernel matrix on ``components_``.
  75.  
  76.  
  77. References
  78. ----------
  79. * Williams, C.K.I. and Seeger, M.
  80. "Using the Nystroem method to speed up kernel machines",
  81. Advances in neural information processing systems 2001
  82.  
  83. * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou
  84. "Nystroem Method vs Random Fourier Features: A Theoretical and Empirical
  85. Comparison",
  86. Advances in Neural Information Processing Systems 2012
  87.  
  88.  
  89. See also
  90. --------
  91. RBFSampler : An approximation to the RBF kernel using random Fourier
  92. features.
  93.  
  94. sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
  95. """
  96. def __init__(self, basis = None, kernel="rbf", gamma=None, coef0=1, degree=3, kernel_params=None, n_components=100, random_state=None):
  97. self.kernel = kernel
  98. self.gamma = gamma
  99. self.coef0 = coef0
  100. self.degree = degree
  101. self.kernel_params = kernel_params
  102. self.n_components = n_components
  103. self.random_state = random_state
  104. self.basis = basis
  105.  
  106. def fit(self, X, y=None):
  107. """Fit estimator to data.
  108.  
  109. Samples a subset of training points, computes kernel
  110. on these and computes normalization matrix.
  111.  
  112. Parameters
  113. ----------
  114. X : array-like, shape=(n_samples, n_feature)
  115. Training data.
  116. """
  117. X = check_array(X, accept_sparse='csr')
  118. rnd = check_random_state(self.random_state)
  119. n_samples = X.shape[0]
  120.  
  121. # get basis vectors
  122. basis = self.basis
  123. print("Here's what goes into landmarknystroem:")
  124. print(np.shape(X))
  125. print(np.shape(basis))
  126. if basis is not None:
  127. if self.n_components > n_samples:
  128. # XXX should we just bail?
  129. n_components = n_samples
  130.  
  131. else:
  132. n_components = self.n_components
  133. n_components = min(n_samples, n_components)
  134. inds = rnd.permutation(n_samples)
  135. basis_inds = inds[:n_components]
  136. basis = X[basis_inds]
  137. self.components_ = basis
  138. self.component_indices_ = inds
  139. else:
  140. n_components = self.n_components
  141. self.components_ = basis
  142.  
  143. basis_kernel = pairwise_kernels(basis, metric=self.kernel, filter_params=True,**self._get_kernel_params())
  144.  
  145. # sqrt of kernel matrix on basis vectors
  146. U, S, V = svd(basis_kernel)
  147. S = np.maximum(S, 1e-12)
  148. self.normalization_ = np.dot(U * 1. / np.sqrt(S), V)
  149.  
  150. return self
  151.  
  152. def transform(self, X):
  153. """Apply feature map to X.
  154.  
  155. Computes an approximate feature map using the kernel
  156. between some training points and X.
  157.  
  158. Parameters
  159. ----------
  160. X : array-like, shape=(n_samples, n_features)
  161. Data to transform.
  162.  
  163. Returns
  164. -------
  165. X_transformed : array, shape=(n_samples, n_components)
  166. Transformed data.
  167. """
  168. check_is_fitted(self, 'components_')
  169. X = check_array(X, accept_sparse='csr')
  170.  
  171. kernel_params = self._get_kernel_params()
  172. embedded = pairwise_kernels(X, self.components_,
  173. metric=self.kernel,
  174. filter_params=True,
  175. **kernel_params)
  176. return np.dot(embedded, self.normalization_.T)
  177.  
  178. def _get_kernel_params(self):
  179. params = self.kernel_params
  180. if params is None:
  181. params = {}
  182. if not callable(self.kernel):
  183. params['gamma'] = self.gamma
  184. params['degree'] = self.degree
  185. params['coef0'] = self.coef0
  186.  
  187. return params
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement