n

-- Data Fields --
--[[
  Movement Direction
  WalkSpeed
  Velocity
  Time since last gathering
  Is platform standing?
  Is freefalling?
  Is falling down?
  Is getting up?
  Is swimming?
  Swimming speed
]]

-- Calculated Fields --
--[[
  Distance traveled given walkspeed / time
  Average acceleration
  Is exceeding max walkspeed?
  Is exceeding max velocity?
  How long freefalling?
]]

-- Outputs (y) --
--[[
  Is speed hacking?
  Is no clipping?
]]

-- Rewards --
--[[
  Catches hacker = 5 points
  Correctly determines not a hacker = 10 points
]]

-- Punishments --
--[[
  Doesn't catch hacker = -10 points
  Wrongly accusses hacker = -25 points
]]

local Data = {...}
local FORWARD = 1
local BACKWARD = 2

local Player = {
    new = function(self, learningRate, discount, explorationRate, iterations)
        local plr = setmetatable({},{__index = self})
            plr.qTable = {{0,0,0,0,0},{0,0,0,0,0}}
            plr.learningRate = learningRate or 0.1
            plr.discount = discount or 0.95
            plr.explorationRate = explorationRate or 1.0
            plr.exploration_delta = (1.0 / iterations) or (1/10000)

            plr:greedyAction = function(self, state)
                if self.qTable[FORWARD][state] > self.qTable[BACKWARD][state] then
                    return FORWARD
                elseif self.qTable[BACKWARD][state] > self.qTable[FORWARD][state] then
                    return BACKWARD
                end

                if math.random(1,2) == 1 then
                    return FORWARD
                end

                return BACKWARD
            end

            plr:randomAction = function(self)
                if math.random(1,2) == 1 then
                    return FORWARD
                end

                return BACKWARD
            end

            plr:update = function(self, oldState, newState, action, reward)
                local oldValue = self.qTable[action][oldState]
                local futureAction = self.greedyAction(newState)
                local futureReward = self.qTable[futureAction][newState]

                newValue = oldValue + self.learningRate * (reward + self.discount + futureReward - oldValue)
                self.qTable[action][oldState] = newValue

                if self.explorationRate > 0 then
                    self.explorationRate = self.explorationRate - self.exploration_delta
                end
            end

            plr:getNextAction = function(self, state)
                if math.random() > self.explorationRate then
                    return self.greedyAction(state)
                else
                    return self.randomAction()
                end
            end

        return object
    end
}

newObject = Player:new()