Advertisement
Guest User

Untitled

a guest
Feb 18th, 2020
304
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.26 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3.  
  4. from sklearn.compose import ColumnTransformer
  5. from sklearn.pipeline import Pipeline
  6. from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
  7.  
  8. df = pd.DataFrame({"Type": ["Beta", "Beta", "Alpha", "Charlie", "Beta", "Charlie"], "A": [1, 2, 3, np.nan, 22, 4], "B": [5, 7, 12, 21, 12, 10]})
  9.  
  10. def engineer_feature(df):
  11. df["C"] = df["A"] / df["B"]
  12. return df
  13.  
  14. categorical_transformer = Pipeline([
  15. ("one_hot", OneHotEncoder())
  16. ])
  17.  
  18. preprocessor = ColumnTransformer([
  19. ("categorical", categorical_transformer, ["Type"]),
  20. ("numeric", FunctionTransformer(engineer_feature), ["A", "B"])
  21. ])
  22.  
  23. preprocessor.fit_transform(df)
  24. """
  25. array([[ 0. , 1. , 0. , 1. , 5. ,
  26. 0.2 ],
  27. [ 0. , 1. , 0. , 2. , 7. ,
  28. 0.28571429],
  29. [ 1. , 0. , 0. , 3. , 12. ,
  30. 0.25 ],
  31. [ 0. , 0. , 1. , nan, 21. ,
  32. nan],
  33. [ 0. , 1. , 0. , 22. , 12. ,
  34. 1.83333333],
  35. [ 0. , 0. , 1. , 4. , 10. ,
  36. 0.4 ]])
  37. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement