Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def ent(df,attribute):
- target_variables = df.play.unique() #This gives all 'Yes' and 'No'
- variables = df[attribute].unique() #This gives different features in that attribute (like 'Sweet')
- entropy_attribute = 0
- for variable in variables:
- entropy_each_feature = 0
- for target_variable in target_variables:
- num = len(df[attribute][df[attribute]==variable][df.play ==target_variable]) #numerator
- den = len(df[attribute][df[attribute]==variable]) #denominator
- fraction = num/(den+eps) #pi
- entropy_each_feature += -fraction*log(fraction+eps) #This calculates entropy for one feature like 'Sweet'
- fraction2 = den/len(df)
- entropy_attribute += -fraction2*entropy_each_feature #Sums up all the entropy ETaste
- return(abs(entropy_attribute))
Add Comment
Please, Sign In to add comment