Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- An example for adapting a non-predicting estimator (i.e.,
- one that doesn't expose a public ``predict`` method, but
- only a ``fit_predict`` one), such as ``LocalOutlierFactor``
- to make predictions on "unseen" data.
- One could argue whether or not this particular approach is
- entirely legitimate, as not exposing a ``predict`` method,
- in most sensible cases will have been due to design and
- semantic constraints.
- Nevertheless, for the adventurous crowd out there, I've
- provided a rudimentary means of making predictions on
- "unseen" data via sub-classing the estimator in question.
- """
- import numpy as np
- from sklearn.model_selection import GridSearchCV, KFold
- from sklearn.neighbors import LocalOutlierFactor
- SEED = 42
- class LOFPredictor(LocalOutlierFactor):
- def predict(self, X=None):
- return self._predict(X)
- rng = np.random.RandomState(SEED)
- # Example settings
- n_samples = 200
- true_outliers_fraction = 0.25
- offset = 2
- xx, yy = np.meshgrid(np.linspace(-7, 7, n_samples / 2),
- np.linspace(-7, 7, n_samples / 2))
- n_outliers = int(true_outliers_fraction * n_samples)
- n_inliers = n_samples - n_outliers
- y_true = np.ones(n_samples, dtype=int)
- y_true[-n_outliers:] = -1
- np.random.seed(SEED)
- # Data generation
- X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
- X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
- X = np.concatenate([X1, X2], axis=0)
- # Add outliers
- X = np.concatenate([X, np.random.uniform(low=-6, high=6,
- size=(n_outliers, 2))], axis=0)
- outliers_fraction = .25
- kfold = KFold(n_splits=3, shuffle=True, random_state=42)
- param_grid = [
- {
- 'n_neighbors': (25, 29, 35),
- 'contamination': (.25, .27, .3),
- },
- ]
- clf = GridSearchCV(LOFPredictor(), param_grid=param_grid, scoring="accuracy",
- cv=kfold, n_jobs=-1)
- clf.fit(X, y_true)
- print("Best params: {}".format(clf.best_params_))
Add Comment
Please, Sign In to add comment