Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const fs = require("fs")
- // const NO = "no-recurrence-events"
- // const YES = "recurrence-events"
- const NO = "No"
- const YES = "Yes"
- const NAMES = ["", "Outlook", "Temperature", "Humidity", "Wind"]
- // utils
- const splitBy = del => str => str.split(del)
- const nonEmpty = el => Boolean(el)
- const atIndexEquals = (ind, value) => el => el[ind] === value
- const prop = p => obj => obj[p]
- const uniq = collection => [...new Set(collection)]
- //
- const entropyFn = (p, n) =>
- p === 0 || n === 0
- ? 0
- : (-p / (p + n)) * Math.log2(p / (p + n)) +
- (-n / (p + n)) * Math.log2(n / (p + n))
- //
- const findBestNode = (trainingData, attributeIndices) => {
- const pos = trainingData.filter(atIndexEquals(0, YES)).length
- const neg = trainingData.filter(atIndexEquals(0, NO)).length
- const globalEntropy = entropyFn(pos, neg)
- let bestNode = undefined
- if (attributeIndices.length === 1) {
- const attrIndex = attributeIndices[0]
- const classes = trainingData.map(prop(0))
- console.log("classes", classes)
- } else {
- attributeIndices.forEach(attrIndex => {
- //
- const currAttrValues = uniq(trainingData.map(prop(attrIndex)))
- const entropyForValues = currAttrValues.reduce((acc, value) => {
- const samplesWithValue = trainingData.filter(
- atIndexEquals(attrIndex, value)
- )
- const pos = samplesWithValue.filter(atIndexEquals(0, YES)).length
- const neg = samplesWithValue.filter(atIndexEquals(0, NO)).length
- const entropy = entropyFn(pos, neg)
- return acc.concat({ entropy, pos, neg, value })
- }, [])
- const avgInfEntropy = entropyForValues.reduce(
- (acc, curr) =>
- acc + ((curr.pos + curr.neg) / (pos + neg)) * curr.entropy,
- 0
- )
- const gain = globalEntropy - avgInfEntropy
- // console.log(pos, neg, globalEntropy)
- // console.log(`----${NAMES[attrIndex]}----`)
- // console.log(entropyForValues)
- // console.log("avgInfEntropy", avgInfEntropy)
- // console.log(gain)
- if (!bestNode || bestNode.gain < gain) {
- bestNode = {
- attrIndex,
- attrName: NAMES[attrIndex],
- gain,
- values: gain !== 0 ? currAttrValues : trainingData[0][0]
- }
- }
- })
- }
- if (Array.isArray(bestNode.values)) {
- for (value of bestNode.values) {
- if (!bestNode.children) {
- bestNode.children = []
- }
- const child = findBestNode(
- trainingData.filter(atIndexEquals(bestNode.attrIndex, value)),
- attributeIndices.filter(el => el !== bestNode.attrIndex)
- )
- bestNode.children.push(child)
- }
- }
- // const rl = findBestNode(
- // trainingData.filter(atIndexEquals(1, "Sunny")),
- // attributeIndices.filter(el => el !== 1)
- // )
- return bestNode
- }
- const predict = (root, entry) => {
- // if (!root) ??
- if (Array.isArray(root.values)) {
- const classValue = entry[root.attrIndex]
- const valueIndex = root.values.indexOf(classValue)
- const nextChild = root.children[valueIndex]
- return predict(nextChild, entry)
- }
- return root.values
- }
- //
- // read file
- const file = fs.readFileSync("./play-tennis.data", "utf8")
- // const file = fs.readFileSync("./breast-cancer.data", "utf8")
- const data = file
- .split("\n")
- .filter(nonEmpty)
- .map(splitBy(","))
- const step = 2
- // const step = ~~(data.length / 10)
- //
- // solution
- const results = []
- const i = 0
- // for (let i = 0; i < 10; i++) {
- const testData =
- i === 9 ? data.slice(i * step) : data.slice(i * step, i * step + step)
- const trainingData = data.slice(0, i * step).concat(data.slice(i * step + step))
- // const trainingData = data
- const attributeIndices = Array.from(
- { length: trainingData[0].length - 1 },
- (_, i) => i + 1
- )
- let root = findBestNode(trainingData, attributeIndices)
- // const rl = findBestNode(
- // trainingData.filter(atIndexEquals(1, "Sunny")),
- // attributeIndices.filter(el => el !== 1)
- // )
- // const rm = findBestNode(
- // trainingData.filter(atIndexEquals(1, "Overcast")),
- // attributeIndices.filter(el => el !== 1)
- // )
- // const rll = findBestNode(
- // trainingData
- // .filter(atIndexEquals(1, "Sunny"))
- // .filter(atIndexEquals(3, "High")),
- // attributeIndices.filter(el => el !== 1 && el !== 3)
- // )
- // const rlr = findBestNode(
- // trainingData
- // .filter(atIndexEquals(1, "Sunny"))
- // .filter(atIndexEquals(3, "Normal")),
- // attributeIndices.filter(el => el !== 1 && el !== 3)
- // )
- const stringifiedRoot = JSON.stringify(root)
- const correctAnswers = testData.filter(currTest => {
- const res = currTest[0]
- const prediction = predict(root, currTest)
- return res === prediction
- }).length
- const accuracy = correctAnswers / testData.length
- console.log(accuracy)
- // console.log(prediction)
- // console.log(stringifiedRoot)
- // fs.writeFile("myjsonfile.json", stringifiedRoot, "utf8", () => {})
- // console.log(rl)
- // console.log(rm)
- // console.log(rll)
- // console.log(rlr)
- // global
- // results.push(accuracy)
- // }
- // const classifierAccuracy =
- // results.reduce((acc, res, i) => {
- // if (i === 9) {
- // return res * (data.length - 9 * step) + acc
- // }
- // return res * step + acc
- // }, 0) / data.length
- // console.log("Classifier: ", classifierAccuracy)
Add Comment
Please, Sign In to add comment