Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.base import BaseEstimator, TransformerMixin
- from sklearn.pipeline import Pipeline, FeatureUnion
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.preprocessing import LabelEncoder
- import pandas as pd
- import numpy as np
- class FeatureSelector(BaseEstimator, TransformerMixin):
- def __init__(self, feature_name):
- self.feature_name = feature_name
- def fit(self, X, y=None):
- return self
- def transform(self, X):
- return X.loc[:, self.feature_name]
- class CustomBinTransformer(BaseEstimator, TransformerMixin):
- def __init__(self, ranges):
- self.ranges = ranges
- def fit(self, X, y=None):
- return self
- def transform(self, X):
- # тут пишешь деление по бинам, можешь использовать стандартные, если подойдут из sklearn. Пишу лишь как пример:
- return pd.cut(X, self.ranges, labels=False).values[:, np.newaxis]
- model = RandomForestClassifier() # Тут для примера твоя модель
- features = FeatureUnion([
- ('НБКИ+', Pipeline([
- ('selector', FeatureSelector('НБКИ+')),
- ('bining', CustomBinTransformer([0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000])) # Списком просто передаю диапазноы для бининга
- ])
- ),
- ('ОКБ+', Pipeline([
- ('selector', FeatureSelector('ОКБ+')),
- ('bining', CustomBinTransformer([0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]))
- ])
- ),
- ('Эквифакс+', Pipeline([
- ('selector', FeatureSelector('Эквифакс+')),
- ('bining', CustomBinTransformer([0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]))
- ])
- ),
- ('mail.ru+', Pipeline([
- ('selector', FeatureSelector('mail.ru+')),
- ('bining', CustomBinTransformer([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]))
- ])
- ),
- ])
- pipeline = Pipeline([
- ('features', features),
- ('estimator', model)
- ])
- df = pd.DataFrame(np.random.rand(10, 5), columns=['НБКИ+', 'ОКБ+', 'Эквифакс+', 'mail.ru+', 'target'])
- df.iloc[:, :3] = df.iloc[:, :3] * 1000
- df['target'] = (df['target'] < 0.5).astype(np.int)
- pipeline.fit(df, df['target'])
- pipeline.predict(df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement