Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from math import exp, log
- def sigmoid(x):
- return 1 / (1 + exp(-x))
- class FactorizationMachines:
- def __init__(self, epoch: int=1000, k: int=3, eta: float=0.01, seed: int=0):
- np.random.seed(seed)
- self.n = None
- self.epoch = epoch
- self.k = k
- self.lamb = 0.01
- self.eta = eta
- self.sigma = 0.001
- self.w0 = 0.0
- self.w = None
- self.V = None
- def fit(self, X: np.array, Y: np.array, verbose=True):
- self.n = X.shape[1]
- self.w = np.zeros(self.n, np.float64)
- self.V = self.sigma * np.array(np.random.randn(self.n, self.k))
- for epoch in range(self.epoch):
- for x, y in zip(X, Y):
- assert y in (-1, 1)
- self._update(x=x, y=y, p=self._predict(x))
- if verbose:
- print("epoch:{0} log loss={1}".format(epoch, self.test(X, Y)))
- return self
- def predict(self, X: np.array) -> np.array:
- return np.array([self._predict(x) for x in X])
- def test(self, X: np.array, Y: np.array) -> float:
- return np.mean([-log(sigmoid(y * p)) for p, y in zip(self.predict(X), Y)])
- def _predict(self, x: np.array) -> float:
- wx = np.dot(self.w, x)
- vx = np.zeros((self.k,), dtype=np.float64)
- v2x2 = np.zeros((self.k,), dtype=np.float64)
- for f in range(self.k):
- vx[f] = np.dot(self.V[:, f], x)
- for i in range(len(x)):
- for f in range(self.k):
- v2x2[f] += (self.V[i, f] ** 2) * (x[i] ** 2)
- c = sum((vx[f] ** 2 - v2x2[f]) for f in range(self.k))
- return self.w0 + wx + 0.5 * c
- def _update(self, x, y, p) -> None:
- vx = [np.dot(self.V[:, f], x) for f in range(self.k)]
- delta = y * (sigmoid(y * p) - 1.0)
- self.w0 -= self.eta * (delta + 2 * self.lamb * self.w0)
- for i in range(len(x)):
- self.w[i] -= self.eta * (delta * x[i] + 2 * self.lamb * self.w[i])
- for f in range(self.k):
- h = x[i] * (vx[f] - x[i] * self.V[i, f])
- self.V[i, f] -= self.eta * (delta * h + 2 * self.lamb * self.V[i, f])
- def main():
- from sklearn.datasets import load_breast_cancer
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import confusion_matrix
- from sklearn.preprocessing import StandardScaler
- data = load_breast_cancer()
- X, y = data["data"], data["target"]
- X_train, X_test, y_train, y_test = train_test_split(X, y)
- y_train = [-1 if y == 0 else 1 for y in y_train]
- y_test = [-1 if y == 0 else 1 for y in y_test]
- sc = StandardScaler()
- sc.fit(X_train)
- X_train = sc.transform(X_train)
- X_test = sc.transform(X_test)
- model = FactorizationMachines(epoch=100)
- model.fit(X_train, y_train)
- y_pred = model.predict(X_test)
- print(confusion_matrix(y_test, [1 if p > 0.5 else -1 for p in y_pred]))
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment