Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- # Author : Fabian Pedregosa <fabian@fseoane.net>
- #
- # License : BSD
- def isotonic_regression_new(w, y, x_min=None, x_max=None):
- """
- Solve the isotonic regression with complete ordering model:
- min_x Sum_i{ w_i (y_i - x_i) ** 2 }
- subject to x_min = x_1 <= x_2 ... <= x_n = x_max
- where each w_i is strictly positive and each y_i is an arbitrary
- real number.
- Parameters
- ----------
- w : iterable of floating-point values
- y : iterable of floating-point values
- Returns
- -------
- x : list of floating-point values
- """
- if x_min is not None or x_max is not None:
- y = np.copy(y)
- w = np.copy(w)
- C = np.dot(w, y * y) * 10 # upper bound on the cost function
- if x_min is not None:
- y[0] = x_min
- w[0] = C
- if x_max is not None:
- y[-1] = x_max
- w[-1] = C
- J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))]
- cur = 0
- while cur < len(J) - 1:
- v0, v1, v2 = 0, 0, np.inf
- w0, w1, w2 = 1, 1, 1
- while v0 * w1 <= v1 * w0 and cur < len(J) - 1:
- v0, w0, idx0 = J[cur]
- v1, w1, idx1 = J[cur + 1]
- if v0 * w1 <= v1 * w0:
- cur +=1
- if cur == len(J) - 1:
- break
- # merge two groups
- v0, w0, idx0 = J.pop(cur)
- v1, w1, idx1 = J.pop(cur)
- J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1))
- while v2 * w0 > v0 * w2 and cur > 0:
- v0, w0, idx0 = J[cur]
- v2, w2, idx2 = J[cur - 1]
- if w0 * v2 >= w2 * v0:
- J.pop(cur)
- J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2)
- cur -= 1
- sol = np.empty(len(y))
- for v, w, idx in J:
- sol[idx] = v / w
- return sol
- def isotonic_regression(y, weights=[]):
- """
- Solve the isotonic regression model:
- min Sum{ weights_i (x_i - y_i) ** 2 }
- subject to x_0 <= x_1 < ... < x_n
- Parameters
- ----------
- y : array-like, shape=(n_samples,)
- Input data.
- w : array-like, shape=(n_samples,), optional
- Weights in the cost function (must be strictly
- positive numbers),
- Returns
- -------
- x : array
- """
- y, weights = map(np.asarray, (y, weights))
- assert y.ndim == 1
- if weights.size:
- assert weights.ndim == 1
- assert weights.size == y.size
- n_samples = len(y)
- v = y.copy()
- lvls = np.arange(n_samples)
- lvlsets = np.c_[lvls, lvls]
- viol = np.where(np.diff(v) < 0)[0]
- while viol.size:
- start = lvlsets[viol[0], 0]
- last = lvlsets[viol[0] + 1, 1]
- n = last - start + 1
- idx = slice(start, last + 1)
- if weights.size:
- v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx])
- else:
- v[idx]= np.sum(v[idx]) / n
- lvlsets[idx, 0] = start
- lvlsets[idx, 1] = last
- viol = np.where(np.diff(v) < 0)[0]
- return v
- def isotonic_regression_2(w, y, x_min=None, x_max=None):
- """
- Solve the isotonic regression model:
- min Sum w_i (y_i - x_i) ** 2
- subject to x_min = x_1 <= x_2 ... <= x_n = x_max
- where each w_i is strictly positive and each y_i is an arbitrary
- real number.
- Parameters
- ----------
- w : iterable of floating-point values
- y : iterable of floating-point values
- Returns
- -------
- x : list of floating-point values
- """
- if x_min is not None or x_max is not None:
- y = np.copy(y)
- w = np.copy(w)
- if x_min is not None:
- y[0] = x_min
- w[0] = 1e32
- if x_max is not None:
- y[-1] = x_max
- w[-1] = 1e32
- J = [[i,] for i in range(len(y))]
- cur = 0
- while cur < len(J) - 1:
- av0, av1, av2 = 0, 0, np.inf
- while av0 <= av1 and cur < len(J) - 1:
- idx0 = J[cur]
- idx1 = J[cur + 1]
- av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
- av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1])
- cur += 1 if av0 <= av1 else 0
- if cur == len(J) - 1:
- break
- a = J.pop(cur)
- b = J.pop(cur)
- J.insert(cur, a + b)
- while av2 > av0 and cur > 0:
- idx0 = J[cur]
- idx2 = J[cur - 1]
- av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
- av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2])
- if av2 >= av0:
- a = J.pop(cur - 1)
- b = J.pop(cur - 1)
- J.insert(cur - 1, a + b)
- cur -= 1
- sol = []
- for idx in J:
- sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx)
- return np.asarray(sol)
- if __name__ == '__main__':
- from time import time
- np.random.seed(0)
- t1, t2, t3 = [], [], []
- for i in range(1, 11):
- dat = np.arange(i * 1e4).astype(np.float)
- dat += 2 * np.random.randn(i * 1e4) # add noise
- weights = .5 + np.abs(np.random.randn(i * 1e4))
- start = time()
- dat_hat = isotonic_regression(dat, weights)
- t1.append(time() - start)
- start = time()
- dat_hat2 = isotonic_regression_2(weights, dat)
- t2.append(time() - start)
- print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
- start = time()
- dat_hat2 = isotonic_regression_new(weights, dat)
- t3.append(time() - start)
- print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
- import pylab as pl
- n_samples = [i * 1e4 for i in range(10)]
- pl.plot(n_samples, t1, color='blue', label='simon collins')
- pl.plot(n_samples, t2, color='red', label='me')
- pl.plot(n_samples, t3, color='green', label='me new')
- pl.xlabel('Number of samples')
- pl.ylabel('Time')
- pl.legend()
- pl.savefig('isotonic_compare.png')
- pl.show()
Add Comment
Please, Sign In to add comment