Untitled

class CFR {
    constructor(){
        this.best = [1, 2, 0]
        this.actions = 3;
    }

    normalize(strategy) {
        let normalizingSum = strategy.reduce((t, v) => t+v)
        if(normalizingSum > 0) {
            strategy = strategy.map((action) => {
                return action/normalizingSum
            })
        } else {
            strategy = new Array(strategy.length)
            strategy.fill(1/strategy.length)
        }
        return strategy
    }

    getStrategy(regretSum){
        let strategy = regretSum.map((val) => (val <0)? 0 : val)
        return this.normalize(strategy)
    }

    getAction(strat){
        let r = Math.random();
        let cumulativeProbability = 0;
        for(var i = 0; i < this.actions; i++){
            cumulativeProbability += strat[i];
            if(r < cumulativeProbability){
                break;
            }
        }
        return i;
    }

    innerTrain(regretSum, stratSum, oppStrategy) {
        let strategy = this.getStrategy(regretSum)
        strategy.forEach((val, index, arr) => {
            stratSum[index] += val
        })
        let myAction = this.getAction(strategy)
        let otherAction = this.getAction(oppStrategy)
        let actionUtil = new Array(this.actions)
        actionUtil[otherAction] = 0
        actionUtil[this.best[otherAction]] = 1
        actionUtil[actionUtil.findIndex(e => e===undefined)] = -1
        actionUtil.forEach((val, index, arr) => {
            regretSum[index] += val - arr[myAction]
        })
        return [regretSum, stratSum]
    }

    train(oppStrategy, iterations) {
        let stratSum = Array.from({length:this.actions}, () => 0)

        let regretSum = Array.from({length:this.actions}, () => 0)
        for(let i=0;i<iterations;i++) {
            [regretSum, stratSum] = this.innerTrain(regretSum, stratSum, oppStrategy)
            console.log(this.getStrategy(regretSum))
        }

        let strat = this.normalize(stratSum)
        //let strat = this.getStrategy(regretSum)
        let results = this.simulate(strat, oppStrategy, iterations)
        console.log(results)
        return strat
    }

    simulate(strategy, oppStrategy, iterations) {
        let results = {
            "strategy":0,
            "oppStrategy":0
        }

        for(let i=0;i<iterations; i++) {
            let action1 = this.getAction(strategy)
            let action2 = this.getAction(oppStrategy)
            if(action1 == action2) {
                continue

            }else if(this.best[action1] == action2) {
                results['oppStrategy'] += 1
            } else if(this.best[action2] == action1) {
                results['strategy'] += 1
            }
        }

        results['strategy'] = results['strategy']/iterations
        results['oppStrategy'] = results['oppStrategy']/iterations
        return results
    }

    train2p(oiterations, iterations) {
        let stratSump1 = Array.from({length:this.actions}, () => 0)
        let stratSump2 = Array.from({length:this.actions}, () => 0)

        for(var j=0;j<oiterations;j++) {
            let regretSump1 = Array.from({length:this.actions}, () => 0)
            let regretSump2 = Array.from({length:this.actions}, () => 0)

            for(var i=0;i<iterations;i++) {
                [regretSump1, stratSump1] = this.innerTrain(regretSump1, stratSump1, this.normalize(stratSump2))
            }
            for(var i=0;i<iterations;i++) {
                [regretSump2, stratSump2] = this.innerTrain(regretSump2, stratSump2, this.normalize(stratSump1))
            }
            console.log(this.normalize(stratSump1), this.normalize(stratSump2))
        }

        let results = this.simulate(this.normalize(stratSump1), this.normalize(stratSump2), 1000)
        console.log(results)

        return [this.normalize(stratSump1), this.normalize(stratSump2)]
    }


}

const cfr = new CFR()
console.log("Strategy found", cfr.train2p(100000, 100))