Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.compose import ColumnTransformer
- from sklearn.pipeline import Pipeline
- from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
- df = pd.DataFrame({"Type": ["Beta", "Beta", "Alpha", "Charlie", "Beta", "Charlie"], "A": [1, 2, 3, np.nan, 22, 4], "B": [5, 7, 12, 21, 12, 10]})
- def engineer_feature(df):
- df["C"] = df["A"] / df["B"]
- return df
- categorical_transformer = Pipeline([
- ("one_hot", OneHotEncoder())
- ])
- preprocessor = ColumnTransformer([
- ("categorical", categorical_transformer, ["Type"]),
- ("numeric", FunctionTransformer(engineer_feature), ["A", "B"])
- ])
- preprocessor.fit_transform(df)
- """
- array([[ 0. , 1. , 0. , 1. , 5. ,
- 0.2 ],
- [ 0. , 1. , 0. , 2. , 7. ,
- 0.28571429],
- [ 1. , 0. , 0. , 3. , 12. ,
- 0.25 ],
- [ 0. , 0. , 1. , nan, 21. ,
- nan],
- [ 0. , 1. , 0. , 22. , 12. ,
- 1.83333333],
- [ 0. , 0. , 1. , 4. , 10. ,
- 0.4 ]])
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement