Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def _get_interval(t, cut_gaps):
- if pd.isnull(t):
- return 'NA'
- for i, cut_gap in enumerate(cut_gaps):
- if t <= cut_gap:
- if i == 0:
- return "%s-(-inf, %s]" % (i, cut_gap)
- return "%s-(%s, %s]" % (i, cut_gaps[i - 1], cut_gap)
- return "%s-(%s, inf)" % (len(cut_gaps), cut_gaps[-1])
- class BinEncoder(TransformerMixin):
- def __init__(self, cut_gaps):
- self.cut_gaps = cut_gaps
- def fit(self, X, y=None):
- return self
- def transform(self, X):
- return np.array(list(map(lambda t: _get_interval(t, self.cut_gaps), X)))
- class WoeEncoder(TransformerMixin):
- def __init__(self):
- pass
- def fit(self, X, y=None):
- a = at.calc_iv(X, y, return_type='woe')
- self.woe_dict = a.set_index('interval').to_dict()['woe']
- return self
- def transform(self, X):
- res = np.array(list(map(lambda t: self.woe_dict.get(t), X)))
- # return res
- return np.where(pd.isnull(res), 0, res)
- # val和oot集边界值超过dev的边界时,需要处理
- # OneHot编码时使用LabelBinarizer
- #mapper = DataFrameMapper(
- # [(feature_name, [BinEncoder(cut_gaps), LabelBinarizer()])
- # for feature_name, cut_gaps in list(feat_cut_dict.items())]
- #)
- # woe填充时,使用自定义的WoeEncoder
- mapper = DataFrameMapper(
- [(feature_name, [BinEncoder(cut_gaps), WoeEncoder()])
- for feature_name, cut_gaps in list(feat_cut_dict.items())]
- )
- lr_model = LogisticRegression()
- lm = PMMLPipeline([
- ("mapper", mapper),
- ("lr", lr_model),
- ])
Add Comment
Please, Sign In to add comment