daily pastebin goal
31%
SHARE
TWEET

Untitled

a guest Dec 17th, 2018 57 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2.  
  3. # Author : Fabian Pedregosa <fabian@fseoane.net>
  4. #
  5. # License : BSD
  6.  
  7.  
  8. def isotonic_regression_new(w, y, x_min=None, x_max=None):
  9.     """
  10.     Solve the isotonic regression with complete ordering model:
  11.  
  12.         min_x Sum_i{ w_i (y_i - x_i) ** 2 }
  13.  
  14.         subject to x_min = x_1 <= x_2 ... <= x_n = x_max
  15.  
  16.     where each w_i is strictly positive and each y_i is an arbitrary
  17.     real number.
  18.  
  19.     Parameters
  20.     ----------
  21.     w : iterable of floating-point values
  22.     y : iterable of floating-point values
  23.  
  24.     Returns
  25.     -------
  26.     x : list of floating-point values
  27.     """
  28.  
  29.     if x_min is not None or x_max is not None:
  30.         y = np.copy(y)
  31.         w = np.copy(w)
  32.         C = np.dot(w, y * y) * 10 # upper bound on the cost function
  33.         if x_min is not None:
  34.             y[0] = x_min
  35.             w[0] = C
  36.         if x_max is not None:
  37.             y[-1] = x_max
  38.             w[-1] = C
  39.  
  40.     J = [(w[i] * y[i], w[i], [i,]) for i in range(len(y))]
  41.     cur = 0
  42.  
  43.     while cur < len(J) - 1:
  44.         v0, v1, v2 = 0, 0, np.inf
  45.         w0, w1, w2 = 1, 1, 1
  46.         while v0 * w1 <= v1 * w0 and cur < len(J) - 1:
  47.             v0, w0, idx0 = J[cur]
  48.             v1, w1, idx1 = J[cur + 1]
  49.             if v0 * w1 <= v1 * w0:
  50.                 cur +=1
  51.  
  52.         if cur == len(J) - 1:
  53.             break
  54.  
  55.         # merge two groups
  56.         v0, w0, idx0 = J.pop(cur)
  57.         v1, w1, idx1 = J.pop(cur)
  58.         J.insert(cur, (v0 + v1, w0 + w1, idx0 + idx1))
  59.         while v2 * w0 > v0 * w2 and cur > 0:
  60.             v0, w0, idx0 = J[cur]
  61.             v2, w2, idx2 = J[cur - 1]
  62.             if w0 * v2 >= w2 * v0:
  63.                 J.pop(cur)
  64.                 J[cur - 1] = (v0 + v2, w0 + w2, idx0 + idx2)
  65.                 cur -= 1
  66.  
  67.     sol = np.empty(len(y))
  68.     for v, w, idx in J:
  69.         sol[idx] = v / w
  70.     return sol
  71.  
  72.  
  73.  
  74. def isotonic_regression(y, weights=[]):
  75.     """
  76.     Solve the isotonic regression model:
  77.  
  78.         min Sum{ weights_i (x_i - y_i) ** 2 }
  79.  
  80.         subject to x_0 <= x_1 < ... < x_n
  81.  
  82.     Parameters
  83.     ----------
  84.     y : array-like, shape=(n_samples,)
  85.         Input data.
  86.  
  87.     w : array-like, shape=(n_samples,), optional
  88.         Weights in the cost function (must be strictly
  89.         positive numbers),
  90.  
  91.     Returns
  92.     -------
  93.     x : array
  94.     """
  95.     y, weights = map(np.asarray, (y, weights))
  96.     assert y.ndim == 1
  97.     if weights.size:
  98.         assert weights.ndim == 1
  99.         assert weights.size == y.size
  100.     n_samples = len(y)
  101.     v = y.copy()
  102.     lvls = np.arange(n_samples)
  103.     lvlsets = np.c_[lvls, lvls]
  104.     viol = np.where(np.diff(v) < 0)[0]
  105.     while viol.size:
  106.         start = lvlsets[viol[0], 0]
  107.         last = lvlsets[viol[0] + 1, 1]
  108.  
  109.         n = last - start + 1
  110.         idx = slice(start, last + 1)
  111.         if weights.size:
  112.             v[idx] = np.dot(weights[idx], y[idx]) / np.sum(weights[idx])
  113.         else:
  114.             v[idx]= np.sum(v[idx]) / n
  115.         lvlsets[idx, 0] = start
  116.         lvlsets[idx, 1] = last
  117.         viol = np.where(np.diff(v) < 0)[0]
  118.     return v
  119.  
  120.  
  121. def isotonic_regression_2(w, y, x_min=None, x_max=None):
  122.     """
  123.     Solve the isotonic regression model:
  124.  
  125.         min Sum w_i (y_i - x_i) ** 2
  126.  
  127.         subject to x_min = x_1 <= x_2 ... <= x_n = x_max
  128.  
  129.     where each w_i is strictly positive and each y_i is an arbitrary
  130.     real number.
  131.  
  132.     Parameters
  133.     ----------
  134.     w : iterable of floating-point values
  135.     y : iterable of floating-point values
  136.  
  137.     Returns
  138.     -------
  139.     x : list of floating-point values
  140.     """
  141.  
  142.     if x_min is not None or x_max is not None:
  143.         y = np.copy(y)
  144.         w = np.copy(w)
  145.         if x_min is not None:
  146.             y[0] = x_min
  147.             w[0] = 1e32
  148.         if x_max is not None:
  149.             y[-1] = x_max
  150.             w[-1] = 1e32
  151.  
  152.     J = [[i,] for i in range(len(y))]
  153.     cur = 0
  154.  
  155.     while cur < len(J) - 1:
  156.         av0, av1, av2 = 0, 0, np.inf
  157.         while av0 <= av1 and cur < len(J) - 1:
  158.             idx0 = J[cur]
  159.             idx1 = J[cur + 1]
  160.             av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
  161.             av1 = np.dot(w[idx1], y[idx1]) / np.sum(w[idx1])
  162.             cur += 1 if av0 <= av1 else 0
  163.  
  164.         if cur == len(J) - 1:
  165.             break
  166.  
  167.         a = J.pop(cur)
  168.         b = J.pop(cur)
  169.         J.insert(cur, a + b)
  170.         while av2 > av0 and cur > 0:
  171.             idx0 = J[cur]
  172.             idx2 = J[cur - 1]
  173.             av0 = np.dot(w[idx0], y[idx0]) / np.sum(w[idx0])
  174.             av2 = np.dot(w[idx2], y[idx2]) / np.sum(w[idx2])
  175.             if av2 >= av0:
  176.                 a = J.pop(cur - 1)
  177.                 b = J.pop(cur - 1)
  178.                 J.insert(cur - 1, a + b)
  179.                 cur -= 1
  180.  
  181.     sol = []
  182.     for idx in J:
  183.         sol += [np.dot(w[idx], y[idx]) / np.sum(w[idx])] * len(idx)
  184.     return np.asarray(sol)
  185.  
  186.  
  187. if __name__ == '__main__':
  188.     from time import time
  189.     np.random.seed(0)
  190.     t1, t2, t3 = [], [], []
  191.     for i in range(1, 11):
  192.         dat = np.arange(i * 1e4).astype(np.float)
  193.         dat += 2 * np.random.randn(i * 1e4)  # add noise
  194.         weights = .5 + np.abs(np.random.randn(i * 1e4))
  195.         start = time()
  196.         dat_hat = isotonic_regression(dat, weights)
  197.  
  198.         t1.append(time() - start)
  199.         start = time()
  200.         dat_hat2 = isotonic_regression_2(weights, dat)
  201.         t2.append(time() - start)
  202.         print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
  203.  
  204.  
  205.         start = time()
  206.         dat_hat2 = isotonic_regression_new(weights, dat)
  207.         t3.append(time() - start)
  208.         print 'Result matches: ', np.allclose(dat_hat, dat_hat2)
  209.  
  210.     import pylab as pl
  211.     n_samples = [i * 1e4 for i in range(10)]
  212.     pl.plot(n_samples, t1, color='blue', label='simon collins')
  213.     pl.plot(n_samples, t2, color='red', label='me')
  214.     pl.plot(n_samples, t3, color='green', label='me new')
  215.     pl.xlabel('Number of samples')
  216.     pl.ylabel('Time')
  217.     pl.legend()
  218.     pl.savefig('isotonic_compare.png')
  219.     pl.show()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand