Advertisement
Guest User

Untitled

a guest
Dec 17th, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.06 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from scipy.optimize import minimize_scalar
  4.  
  5. from collections import namedtuple
  6.  
  7.  
  8. Node = namedtuple('Node', ('feature', 'value', 'impurity', 'left', 'right'))
  9. Leaf = namedtuple('Leaf', ('value', 'x', 'y'))
  10. #node = Node(feature = 0, value = 2, impurity =0.1, left=node(), right())
  11.  
  12.  
  13.  
  14.  
  15. def partition(x, y, feature, value):
  16. i_right = x[:, feature] >= value
  17. i_left = np.logical_not(i_right)
  18. return (x[i_left], y[i_left]), (x[i_right], y[i_right])
  19.  
  20. def criteria(y):
  21. return np.std(y)**2
  22.  
  23.  
  24. def impurity(y_left, y_right):
  25. size = y_left.size + y_right.size
  26. h = (y_left.size * criteria(y_left) + y_right.size * criteria(y_right))/size
  27. #h = const - left.size*right.size*(left.mean() - right.mean())**2
  28. return h
  29.  
  30.  
  31. def f(value, feature, x, y):
  32. ( _ , y_left), ( _ , y_right) = partition(x, y, feature, value)
  33. return impurity(y_left, y_right)
  34.  
  35.  
  36. def find_best_split(x, y):
  37.  
  38. best_feature, best_value, best_impurity = 0, x[0, 0], np.inf
  39. for feature in range(x.shape[1]):
  40. x_i_sorted = np.sort(x[:, feature])
  41. result = minimize_scalar(
  42. f,
  43. args=(feature, x, y),
  44. method='Bounded',
  45. bounds=(x_i_sorted[1], x_i_sorted[-1]),
  46. )
  47. assert result.success
  48. value = result.x
  49. impurity = result.fun
  50. if impurity < best_impurity:
  51. best_feature, best_value, best_impurity = feature, value, impurity
  52. return best_feature, best_value, best_impurity
  53.  
  54.  
  55. def build_tree(x, y, depth=1, max_depth=np.inf):
  56. if depth >= max_depth or criteria(y) < 1e-6:
  57. return Leaf(np.mean(y), x, y)
  58.  
  59. feature, value, impurity = find_best_split(x, y)
  60. (x_left, y_left), (x_right, y_right) = partition(x, y, feature, value)
  61.  
  62.  
  63. left = build_tree(x_left, y_left, depth+1, max_depth)
  64. right = build_tree(x_right, y_right, depth+1, max_depth)
  65.  
  66. root = Node(feature, value, impurity, left, right)
  67. return root
  68.  
  69. def predict(tree, x):
  70. y= np.empty(x.shape[0])
  71. for i, row in enumerate(x):
  72. node = tree
  73. while isinstance(node, Node):
  74. if row[node.feature] >= node.value:
  75. node = node.right
  76. else:
  77. node = node.left
  78. y[i] = node.value
  79. return y
  80.  
  81. """ y = 2*x0 +1 """
  82. n=100
  83. x = np.random.normal(0, 1, size=(n,2))
  84. y_true = 2* x[:, 0] + 1
  85. y = y_true + np.random.normal(0, 0.5, n)
  86. tree = build_tree(x, y)
  87. plt.plot(y_true, y, 'o')
  88. x_test = np.random.normal(0, 1, size=(n,2))
  89. y_test = 2 * x_test[:, 0] + 1
  90. y_pred = predict(tree, x_test)
  91. plt.plot(y_test, y_pred, 'v')
  92.  
  93.  
  94. """ y = 1 + 2*x0 + 3*x1**2 """
  95.  
  96. plt.figure()
  97. plt.xscale('log')
  98. plt.yscale('log')
  99. n=10000
  100. rs = np.random.RandomState(1)
  101. x = rs.normal(0, 1, size=(n,2))
  102. y_true = 2* x[:, 0]**2 + 1 + x[:, 1]**2
  103. y = y_true + np.random.normal(0, 0.5, n)
  104. tree = build_tree(x, y)
  105. plt.plot(y_true, y, 'o')
  106. x_test = rs.normal(2, 1, size=(n,2))
  107. y_test = 2* x_test[:, 0]**2 + 1 + x_test[:, 1]**2
  108. y_pred = predict(tree, x_test)
  109. plt.plot(y_test, y_pred, 'v')
  110. print(np.std(y_test, y_pred))
  111. plt.plot(plt.xlim(), plt.xlim(), 'k', lw=0.5)
  112.  
  113.  
  114.  
  115.  
  116.  
  117.  
  118. #
  119. ##прямая
  120. #n=1000
  121. #x = np.random.normal(0, 1, size=(n, 2))
  122. #y = np.asarray(x[:, 0] > 0, dtype=int)
  123. #tree = build_tree(x, y)
  124. #COLORS = np.array([[1.,0.,0.], [0.,0.,1.]])
  125. #plt.scatter(*x.T, color=COLORS[y])
  126. #x_test = np.random.normal(0, 1, size=(n, 2))
  127. #y_pred = predict(tree, x_test).astype(np.int)
  128. ##plt.scatter(*x_test.T, color=COLORS[y_pred], marker='v', s=50)
  129. #
  130. #
  131. ##окружность
  132. #plt.figure(figsize=(5,5))
  133. #plt.xlim([-2,2])
  134. #plt.ylim([-2,2])
  135. #n=1000
  136. #x = np.random.normal(0, 1, size=(n, 2))
  137. #y = np.asarray(x[:,0]**2 + x[:,1]**2 <= 1, dtype=int)
  138. #tree = build_tree(x, y)
  139. #COLORS = np.array([[1.,0.,0.], [0.,0.,1.]])
  140. #plt.scatter(*x.T, color=COLORS[y])
  141. #x_test = np.random.normal(0, 1, size=(n, 2))
  142. #y_pred = predict(tree, x_test).astype(np.int)
  143. ##plt.scatter(*x_test.T, color=COLORS[y_pred], marker='v', s=50)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement