Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.base import BaseEstimator, TransformerMixin
- from sklearn.utils import check_array
- # Elimina todas las variables que tengan un porcentaje de valores
- # iguales a 0 mayor que 0.4
- class MissingValuesTransformer(BaseEstimator, TransformerMixin):
- def __init__(self, missing_values=0, missing_percentage=0.4):
- self.missing_values = missing_values
- self.missing_percentage = missing_percentage
- def fit(self, X, y=None):
- X = check_array(X)
- self.n_features_ = X.shape[1]
- self.columns_ = np.sum(
- X == self.missing_values,
- axis=0) / X.shape[0] <= self.missing_percentage
- return self
- def transform(self, X, y=None):
- X = check_array(X)
- if self.n_features_ != X.shape[1]:
- raise ValueError("Se han recibido de entrada {} características cuando se esperaban {}.".format(X.shape[1],self.n_features_))
- return X[:, self.columns_]
- pima_remover = MissingValuesTransformer()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement