Advertisement
Guest User

Untitled

a guest
Sep 18th, 2019
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.32 KB | None | 0 0
  1. -- Instructions for running
  2. --
  3. -- If GHC haskell is installed:
  4. --
  5. -- $ runhaskell id3.hs
  6. --
  7. -- Have fun!
  8. --
  9. -- On some occasions, 'containers' package might need to be installed;
  10. -- in that case, install with
  11. --
  12. -- $ cabal install containers
  13. --
  14.  
  15. module Main where
  16.  
  17. import Data.Function
  18. import Data.List
  19. import Data.Maybe
  20. import Data.Ord
  21. import Data.Tree
  22. import Prelude
  23. import qualified Data.Map as M
  24.  
  25. data DataPoint = DP { outlook :: String
  26. , temperature :: String
  27. , humidity :: String
  28. , wind :: String
  29. , toPlay :: Bool
  30. } deriving Show
  31.  
  32. data Attribute = Atr { atrName :: String
  33. , atrGet :: GetAttribute
  34. }
  35.  
  36. type GetAttribute = DataPoint -> String
  37.  
  38. instance Eq Attribute where (==) = on (==) atrName
  39.  
  40. attributes :: [Attribute]
  41. attributes = [ Atr "outlook" outlook
  42. , Atr "temperature" temperature
  43. , Atr "humidity" humidity
  44. , Atr "wind" wind
  45. ]
  46.  
  47. -- | Parse a list of strings into a DataPoin. Possibly.
  48. toDataPoint :: [String] -> Maybe DataPoint
  49. toDataPoint (o:t:h:w:c:_) = Just $ DP o t h w (c == "p")
  50. toDataPoint _ = Nothing
  51.  
  52. -- | Entropy of a given data set, on its class variable
  53. dpEntropy :: [DataPoint] -> Double
  54. dpEntropy = entropy . getProb
  55. where
  56. getProb xs = genericLength (filter toPlay xs)
  57. / genericLength xs
  58. entropy x = ex x + ex (1-x)
  59. ex 0 = 0 -- special case for x == 0
  60. ex x = -logBase 2 x * x
  61.  
  62. -- | Calculate the expected entropy gain generated by splitting a data set
  63. -- on the given attribute
  64. entropyGain :: [DataPoint] -> Attribute -> Double
  65. entropyGain dps atr = initialEntropy - expFinalEntropy
  66. where
  67. initialEntropy = dpEntropy dps
  68. expFinalEntropy = (/ genericLength dps)
  69. . sum
  70. . map (\(_, vdps) -> genericLength vdps * dpEntropy vdps)
  71. . splitOnAtr atr
  72. $ dps
  73.  
  74. -- | Split a set of data points on an attribute, as Attribute value - data
  75. -- set pairs
  76. splitOnAtr :: Attribute -> [DataPoint] -> [(String, [DataPoint])]
  77. splitOnAtr atr = M.toList
  78. . M.fromListWith (++)
  79. . map (\dp -> (atrGet atr dp, [dp]))
  80.  
  81. -- | build the decision tree with a list of remaining attributes, a "label"
  82. -- prefix (current choice), and a set of data points.
  83. buildTree :: [Attribute] -> String -> [DataPoint] -> Tree String
  84. -- base case: attributes exhausted
  85. buildTree [] labl dps = Node (labl ++ ": " ++ favoring) []
  86. where
  87. playCount = length . filter toPlay $ dps
  88. noPlayCount = length . filter (not . toPlay) $ dps
  89. favoring | playCount >= noPlayCount = "Play!"
  90. | otherwise = "No Play"
  91. buildTree atrs labl dps = Node nodeName subTrees
  92. where
  93. bestAtr = maximumBy (comparing (entropyGain dps)) atrs
  94. nodeName = labl ++ ": check " ++ atrName bestAtr
  95. badAtrs = filter (/= bestAtr) atrs
  96. splitDps = splitOnAtr bestAtr dps
  97. subTrees = flip map splitDps $
  98. \(labl',dps') ->
  99. if all toPlay dps' || not (any toPlay dps')
  100. -- skip to end if unambiguous
  101. then buildTree [] labl' dps'
  102. -- otherwise, recurse
  103. else buildTree badAtrs labl' dps'
  104.  
  105. dataset :: [[String]]
  106. dataset = [ [ "sunny" , "hot" , "high" , "weak" , "n" ]
  107. , [ "sunny" , "hot" , "high" , "strong", "n" ]
  108. , [ "overcast", "hot" , "high" , "weak" , "p" ]
  109. , [ "rain" , "mild", "high" , "weak" , "p" ]
  110. , [ "rain" , "cool", "normal", "weak" , "p" ]
  111. , [ "rain" , "cool", "normal", "strong", "n" ]
  112. , [ "overcast", "cool", "normal", "strong", "p" ]
  113. , [ "sunny" , "mild", "high" , "weak" , "n" ]
  114. , [ "sunny" , "cool", "normal", "weak" , "p" ]
  115. , [ "rain" , "mild", "normal", "weak" , "p" ]
  116. , [ "sunny" , "mild", "normal", "strong", "p" ]
  117. , [ "overcast", "mild", "high" , "strong", "p" ]
  118. , [ "overcast", "hot" , "normal", "weak" , "p" ]
  119. , [ "rain" , "mild", "high" , "strong", "n" ]
  120. ]
  121.  
  122. main :: IO ()
  123. main = putStrLn
  124. . drawTree
  125. . buildTree attributes "top"
  126. . mapMaybe toDataPoint
  127. $ dataset
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement