Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class ModelEmbedder :
- def __init__(self, model, rare_threshold) :
- self.model = model
- self.means = {}
- self.rare_threshold = rare_threshold
- self.train = None
- self.origin_train = None
- self.average = 0
- def fit(self,train,target):
- self.origin_train = train.copy().fillna(-1)
- self.train = train.copy()
- self.train = self.train.fillna(-1)
- self.train['target'] = target
- self.average = target.mean()
- for feat in train.columns:
- if feat != 'target' :
- if self.train[feat].dtype=='object' :
- self.train.loc[self.train[feat].value_counts()[self.train[feat]].values < self.rare_threshold, feat] = "RARE"
- self.origin_train.loc[self.origin_train[feat].value_counts()[self.origin_train[feat]].values < self.rare_threshold, feat] = "RARE"
- self.means[feat] = self.train.groupby([feat])['target'].mean()
- self.means[feat]["RARE"] = self.average
- self.train[feat] = self.train[feat].replace(self.means[feat], inplace=False)
- del self.train['target']
- self.model.fit(self.train,target)
- def _pre_treat_test(self,test) :
- test = test.copy()
- test = test.fillna(-1)
- for feat in self.origin_train.columns:
- if self.origin_train[feat].dtype=='object' :
- test.loc[self.origin_train[feat].value_counts()[self.origin_train[feat]].values < self.rare_threshold, feat] = "RARE"
- criterion = ~test[feat].isin(set(self.origin_train[feat]))
- test.loc[criterion,feat] = self.average
- test[feat] = test[feat].replace(self.means[feat], inplace=False)
- return test
- def predict_proba(self,test) :
- test = self._pre_treat_test(test)
- return self.model.predict_proba(test)
- def get_params(self, deep = True):
- return self.model.get_params(deep)
- rf = ensemble.ExtraTreesClassifier(n_jobs=7,
- n_estimators = n_estimators,
- random_state = 11)
- rf_embedded = model_embedder.ModelEmbedder(rf,10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement