Untitled

class GradientBoostingMSE:
    def __init__(self, n_estimators, learning_rate=0.1, max_depth=5, feature_subsample_size=None,
                 **trees_parameters):

        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.feature_subsample_size = feature_subsample_size

        self.trees = []
        for _ in range(n_estimators):
            self.trees.append(DecisionTreeRegressor(max_depth=max_depth,
                                                    max_features=feature_subsample_size,
                                                    splitter='random',
                                                    **trees_parameters))
        """
        n_estimators : int
            The number of trees in the forest.

        learning_rate : float
            Use learning_rate * gamma instead of gamma

        max_depth : int
            The maximum depth of the tree. If None then there is no limits.

        feature_subsample_size : float
            The size of feature set for each tree. If None then use recommendations.
        """


    def fit(self, X, y):
        """
        X : numpy ndarray
            Array of size n_objects, n_features

        y : numpy ndarray
            Array of size n_objects
        """
        pred_values = 0
        self.opt_params = np.zeros(self.n_estimators)
        for j in range(self.n_estimators):
            b = self.trees[j].fit(X, y)
            values = b.predict(X)
            res = minimize_scalar(lambda alpha: self.MSE(y, pred_values + alpha * values))
            self.opt_params[j] = res.x
            pred_values = values


    def MSE(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)

    def predict(self, X):
        """
        X : numpy ndarray
            Array of size n_objects, n_features

        Returns
        -------
        y : numpy ndarray
            Array of size n_objects
        """

        pred_b = 0
        for j in range(self.n_estimators):
            a_j = self.trees[j].predict(X)
            b = pred_b + self.opt_params[j] * self.learning_rate * a_j
            pred_b = b

        return pred_b