from math import log import numpy as np from scipy.optimize import fmin_bfgs def bfgs_sigmoid_train(F, Y, use_grad=True): # Reference: Platt, "Probabilistic Outputs for Support Vector Machines" # Bayesian priors (see Platt end of section 2.2) prior0 = sum(1.0 for y in Y if y <=0) prior1 = Y.shape[0] - prior0 T = np.zeros(Y.shape) T[Y > 0] = (prior1 + 1) / (prior1 + 2) T[Y <= 0] = 1 / (prior0 + 2) T1 = 1.0 - T AB0 = np.array([0.0, log((prior0 + 1.0) / (prior1 + 1.0))]) def objective(AB): # From Platt (beginning of Section 2.2) E = np.exp(AB[0] * F + AB[1]) P = 1.0 / (1.0 + E) return -(np.dot(T, np.log(P)) + np.dot(T1, np.log(1.0 - P))) def grad(AB): # gradient of the objective function E = np.exp(AB[0] * F + AB[1]) P = 1.0 / (1.0 + E) TEP_minus_T1P = P * (T * E - T1) dA = np.dot(TEP_minus_T1P, F) dB = np.sum(TEP_minus_T1P) return np.array([dA, dB]) if use_grad: return fmin_bfgs(objective, AB0, fprime=grad) else: return fmin_bfgs(objective, AB0) if __name__ == '__main__': exF = np.array([5, -4, 1.0]) exY = np.array([1, -1, -1]) # computed from my python port of the C++ code in LibSVM lin_libsvm_solution = np.array([-0.20261354391187855, 0.65236314980010512]) assert np.linalg.norm(lin_libsvm_solution - bfgs_sigmoid_train(exF, exY, use_grad=True)) < 0.001 assert np.linalg.norm(lin_libsvm_solution - bfgs_sigmoid_train(exF, exY, use_grad=False)) < 0.001