Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.06 KB | None | 0 0
  1. import math
  2.  
  3. # Version of split_ratio package
  4. __version__ = "1.0.0"
  5.  
  6.  
  7. def info(D):
  8. categories = set(D)
  9. freqofcategories = dict()
  10. for i in D:
  11. if i not in freqofcategories:
  12. freqofcategories[i] = 0
  13. freqofcategories[i] = freqofcategories[i] +1
  14. sum = 0.0
  15. for x in freqofcategories:
  16. interm = (freqofcategories[x]/len(D))
  17. sum = sum-(interm*math.log(interm,2))
  18. return(float(sum))
  19.  
  20.  
  21. def infoofDA(freqofcategoriesDina,total):
  22. sum = 0.0
  23. for i in freqofcategoriesDina:
  24. if i > 0:
  25. interm = (i/total)
  26. sum = sum-(interm* math.log(interm,2))
  27. return(float(sum))
  28.  
  29.  
  30. def infoA(a,D):
  31. categoriesD = list(set(D))
  32. categories = set(a)
  33. freqofcategoriesDwitha = dict()
  34. for i in categories:
  35. freqofcategoriesDwitha[i] = [0]*len(categoriesD)
  36. freqofcategories = dict()
  37. count = 0
  38. for i in a:
  39. if i not in freqofcategories:
  40. freqofcategories[i] = 0
  41. freqofcategories[i] = freqofcategories[i] + 1
  42. freqofcategoriesDwitha[i][categoriesD.index(D[count])] +=1
  43. count = count+1
  44. sum = 0.0
  45. for x in freqofcategories:
  46. interm = (freqofcategories[x]/len(D))
  47. sum = sum+(interm*infoofDA(freqofcategoriesDwitha[x],freqofcategories[x]))
  48. return(float(sum))
  49.  
  50. def info_gain(A,D):
  51. return(info(D)-infoA(A,D))
  52.  
  53.  
  54. def split_infoA(a):
  55. categories = list(set(a))
  56. freqofcategories = dict()
  57. for i in a:
  58. if i not in freqofcategories:
  59. freqofcategories[i] = 0
  60. freqofcategories[i] = freqofcategories[i] + 1
  61. # print(freqofcategories)
  62. sum = 0
  63. for x in freqofcategories:
  64. interm = (freqofcategories[x]/len(a))
  65. # print(freqofcategories[x],len(a))
  66. sum = sum-((interm)*math.log(interm,2))
  67. return(float(sum))
  68.  
  69.  
  70. def gain_ratio(a,D):
  71. p=float(info(D) - infoA(a,D))
  72. if p == float(0) and split_infoA(a) != float(0.0):
  73. return 0.0
  74. elif p == float(0) and split_infoA(a) == float(0.0):
  75. return("indeterminate form(0/0)")
  76. else:
  77. return p/split_infoA(a)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement