Advertisement
Guest User

Untitled

a guest
Nov 17th, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.92 KB | None | 0 0
  1. def find_best_split(feature_vector, target_vector):
  2.     unique_features = np.sort(feature_vector)
  3.     thresholds = (unique_features[1:] + unique_features[:-1]) / 2
  4.  
  5.     m_inv = ~np.less.outer(thresholds, feature_vector).astype(bool)
  6.  
  7.     t = target_vector.astype(bool)
  8.     t_inv = ~t
  9.  
  10.     m_inv = m_inv.astype(float)
  11.  
  12.     t = t.astype(float)
  13.     t_inv = t_inv.astype(float)
  14.  
  15.     n = len(feature_vector)
  16.     r = np.sum(m, axis=1).astype(float)
  17.     l = n - r
  18.  
  19.     one = sum(t)
  20.     zero = n - one
  21.  
  22.     p_0_right = np.dot(m_inv, t_inv) / zero
  23.     p_1_right = np.dot(m_inv, t) / one
  24.  
  25.     p_0_left = 1 - p_0_right
  26.     p_1_left = 1 - p_1_right
  27.  
  28.     ginis = - r / n * (1 - p_0_right**2 - p_1_right**2) - l / n * (1 - p_0_left**2 - p_1_left**2)
  29.  
  30.     index_best = np.argmax(ginis)
  31.     threshold_best = thresholds[index_best]
  32.     gini_best = ginis[index_best]
  33.     return thresholds, ginis, threshold_best, gini_best
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement