Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Criterion:
- def initialize(self, X, y):
- self._check_Xy_pair(X, y)
- self.X = X; self.y = y
- n_samples, n_features = X.shape
- self.weights = find_weights(X, y)
- self.sigma2 = np.sum((y - np.dot(X, self.weights))**2) / (n_samples - n_features)
- self.sigma = np.sqrt(self.sigma2)
- return self
- def _check_Xy_pair(self, X, y):
- assert isinstance(X, np.ndarray)
- assert isinstance(y, np.ndarray)
- assert X.ndim == 2
- assert y.ndim == 1
- assert X.shape[0] == y.shape[0]
- def __call__(self, indices):
- assert False, "Not implemented"
- def __repr__(self):
- return 'Criterion'
- class CriterionAIC(Criterion):
- def __call__(self, indices):
- X = self.X[:, indices]
- n_samples, n_features = X.shape
- weights = find_weights(X, self.y)
- return (2 * n_features * self.sigma2 + np.sum((np.dot(X, weights) - self.y) ** 2)) / n_samples / self.sigma2
- def __repr__(self):
- return 'AIC'
- class CriterionBIC(Criterion):
- def __call__(self, indices):
- X = self.X[:, indices]
- n_samples, n_features = X.shape
- weights = find_weights(X, self.y)
- return (np.log(n_samples) * n_features * self.sigma2 + np.sum((np.dot(X, weights) - self.y) ** 2)) / n_samples / self.sigma2
- def __repr__(self):
- return 'BIC'
- class CriterionLOO(Criterion):
- def __call__(self, indices):
- X = self.X[:, indices]
- weights = find_weights(X, self.y)
- n_samples, n_features = X.shape
- U = np.dot(np.dot(X, np.linalg.inv(np.dot(X.T, X))), X.T)
- U = U[np.arange(n_samples), np.arange(n_samples)]
- return np.sum(((self.y - np.dot(X, weights)) / (1 - U)) ** 2) / n_samples
- def __repr__(self):
- return 'LOO'
- class CriterionCV(Criterion):
- def __init__(self, n_folds=5, random_state=1):
- self.n_folds = n_folds
- self.random_state = random_state
- def __call__(self, indices):
- X = self.X[:, indices]
- n_samples, n_features = X.shape
- kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.random_state)
- R_CV = 0
- for tr_indices, ts_indices in kfold.split(X):
- X_tr = X[tr_indices]; y_tr = self.y[tr_indices]
- X_ts = X[ts_indices]; y_ts = self.y[ts_indices]
- weights = find_weights(X_tr, y_tr)
- R_CV += np.sum((y_ts - np.dot(X_ts, weights)) ** 2)
- return R_CV / n_samples
- def __repr__(self):
- return 'CV' + str(self.n_folds)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement