Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Node:
- def __init__(self):
- self.l = None
- self.r = None
- self.feature = None
- self.threshold = -1
- self.answer = None
- def H(self, X):
- # ЭНТРОПИЯ
- c = Counter(X)
- s = 0
- for v in c.values():
- p = v / len(X)
- s += math.log(p) * p
- return -s
- def Q(self, X, Xl, Xr):
- # ФУНКЦИЯ ПОТЕРЬ
- return self.H(X) - len(Xl) / len(X) * self.H(Xl) - len(Xr) / len(X) * self.H(Xr)
- def is_stop(self, y):
- return len(Counter(y).keys()) <= 1
- def fit(self, X, y):
- if self.is_stop(y):
- self.answer = Counter(y).most_common()[0][0]
- return
- max_q = -1
- max_feature = None
- max_threshold = -1
- for feature in X.columns:
- for val in np.unique(X.values):
- left_cond = X[feature] <= val
- right_cond = X[feature] > val
- q = self.Q(y, y[left_cond], y[right_cond])
- if q > max_q:
- max_q = q
- max_feature = feature
- max_threshold = val
- self.feature = max_feature
- self.threshold = max_threshold
- print(self.feature, self.threshold)
- self.l = Node()
- self.r = Node()
- left_cond = X[self.feature] <= self.threshold
- right_cond = X[self.feature] > self.threshold
- self.l.fit(X[left_cond], y[left_cond])
- self.r.fit(X[right_cond], y[right_cond])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement