Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def find_best_split(feature_vector, target_vector):
- unique_features = np.sort(feature_vector)
- thresholds = (unique_features[1:] + unique_features[:-1]) / 2
- m_inv = ~np.less.outer(thresholds, feature_vector).astype(bool)
- t = target_vector.astype(bool)
- t_inv = ~t
- m_inv = m_inv.astype(float)
- t = t.astype(float)
- t_inv = t_inv.astype(float)
- n = len(feature_vector)
- r = np.sum(m, axis=1).astype(float)
- l = n - r
- one = sum(t)
- zero = n - one
- p_0_right = np.dot(m_inv, t_inv) / zero
- p_1_right = np.dot(m_inv, t) / one
- p_0_left = 1 - p_0_right
- p_1_left = 1 - p_1_right
- ginis = - r / n * (1 - p_0_right**2 - p_1_right**2) - l / n * (1 - p_0_left**2 - p_1_left**2)
- index_best = np.argmax(ginis)
- threshold_best = thresholds[index_best]
- gini_best = ginis[index_best]
- return thresholds, ginis, threshold_best, gini_best
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement