Guest User

Untitled

a guest
Dec 16th, 2017
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.58 KB | None | 0 0
  1. def _get_interval(t, cut_gaps):
  2. if pd.isnull(t):
  3. return 'NA'
  4. for i, cut_gap in enumerate(cut_gaps):
  5. if t <= cut_gap:
  6. if i == 0:
  7. return "%s-(-inf, %s]" % (i, cut_gap)
  8. return "%s-(%s, %s]" % (i, cut_gaps[i - 1], cut_gap)
  9. return "%s-(%s, inf)" % (len(cut_gaps), cut_gaps[-1])
  10.  
  11.  
  12.  
  13. class BinEncoder(TransformerMixin):
  14. def __init__(self, cut_gaps):
  15. self.cut_gaps = cut_gaps
  16.  
  17. def fit(self, X, y=None):
  18. return self
  19.  
  20. def transform(self, X):
  21. return np.array(list(map(lambda t: _get_interval(t, self.cut_gaps), X)))
  22.  
  23.  
  24.  
  25. class WoeEncoder(TransformerMixin):
  26. def __init__(self):
  27. pass
  28.  
  29. def fit(self, X, y=None):
  30. a = at.calc_iv(X, y, return_type='woe')
  31. self.woe_dict = a.set_index('interval').to_dict()['woe']
  32. return self
  33.  
  34. def transform(self, X):
  35. res = np.array(list(map(lambda t: self.woe_dict.get(t), X)))
  36. # return res
  37. return np.where(pd.isnull(res), 0, res)
  38.  
  39.  
  40. # val和oot集边界值超过dev的边界时,需要处理
  41. # OneHot编码时使用LabelBinarizer
  42. #mapper = DataFrameMapper(
  43. # [(feature_name, [BinEncoder(cut_gaps), LabelBinarizer()])
  44. # for feature_name, cut_gaps in list(feat_cut_dict.items())]
  45. #)
  46.  
  47. # woe填充时,使用自定义的WoeEncoder
  48. mapper = DataFrameMapper(
  49. [(feature_name, [BinEncoder(cut_gaps), WoeEncoder()])
  50. for feature_name, cut_gaps in list(feat_cut_dict.items())]
  51. )
  52. lr_model = LogisticRegression()
  53. lm = PMMLPipeline([
  54. ("mapper", mapper),
  55. ("lr", lr_model),
  56. ])
Add Comment
Please, Sign In to add comment