Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class CFR {
- constructor(){
- this.best = [1, 2, 0]
- this.actions = 3;
- }
- normalize(strategy) {
- let normalizingSum = strategy.reduce((t, v) => t+v)
- if(normalizingSum > 0) {
- strategy = strategy.map((action) => {
- return action/normalizingSum
- })
- } else {
- strategy = new Array(strategy.length)
- strategy.fill(1/strategy.length)
- }
- return strategy
- }
- getStrategy(regretSum){
- let strategy = regretSum.map((val) => (val <0)? 0 : val)
- return this.normalize(strategy)
- }
- getAction(strat){
- let r = Math.random();
- let cumulativeProbability = 0;
- for(var i = 0; i < this.actions; i++){
- cumulativeProbability += strat[i];
- if(r < cumulativeProbability){
- break;
- }
- }
- return i;
- }
- innerTrain(regretSum, stratSum, oppStrategy) {
- let strategy = this.getStrategy(regretSum)
- strategy.forEach((val, index, arr) => {
- stratSum[index] += val
- })
- let myAction = this.getAction(strategy)
- let otherAction = this.getAction(oppStrategy)
- let actionUtil = new Array(this.actions)
- actionUtil[otherAction] = 0
- actionUtil[this.best[otherAction]] = 1
- actionUtil[actionUtil.findIndex(e => e===undefined)] = -1
- actionUtil.forEach((val, index, arr) => {
- regretSum[index] += val - arr[myAction]
- })
- return [regretSum, stratSum]
- }
- train(oppStrategy, iterations) {
- let stratSum = Array.from({length:this.actions}, () => 0)
- let regretSum = Array.from({length:this.actions}, () => 0)
- for(let i=0;i<iterations;i++) {
- [regretSum, stratSum] = this.innerTrain(regretSum, stratSum, oppStrategy)
- console.log(this.getStrategy(regretSum))
- }
- let strat = this.normalize(stratSum)
- //let strat = this.getStrategy(regretSum)
- let results = this.simulate(strat, oppStrategy, iterations)
- console.log(results)
- return strat
- }
- simulate(strategy, oppStrategy, iterations) {
- let results = {
- "strategy":0,
- "oppStrategy":0
- }
- for(let i=0;i<iterations; i++) {
- let action1 = this.getAction(strategy)
- let action2 = this.getAction(oppStrategy)
- if(action1 == action2) {
- continue
- }else if(this.best[action1] == action2) {
- results['oppStrategy'] += 1
- } else if(this.best[action2] == action1) {
- results['strategy'] += 1
- }
- }
- results['strategy'] = results['strategy']/iterations
- results['oppStrategy'] = results['oppStrategy']/iterations
- return results
- }
- train2p(oiterations, iterations) {
- let stratSump1 = Array.from({length:this.actions}, () => 0)
- let stratSump2 = Array.from({length:this.actions}, () => 0)
- for(var j=0;j<oiterations;j++) {
- let regretSump1 = Array.from({length:this.actions}, () => 0)
- let regretSump2 = Array.from({length:this.actions}, () => 0)
- for(var i=0;i<iterations;i++) {
- [regretSump1, stratSump1] = this.innerTrain(regretSump1, stratSump1, this.normalize(stratSump2))
- }
- for(var i=0;i<iterations;i++) {
- [regretSump2, stratSump2] = this.innerTrain(regretSump2, stratSump2, this.normalize(stratSump1))
- }
- console.log(this.normalize(stratSump1), this.normalize(stratSump2))
- }
- let results = this.simulate(this.normalize(stratSump1), this.normalize(stratSump2), 1000)
- console.log(results)
- return [this.normalize(stratSump1), this.normalize(stratSump2)]
- }
- }
- const cfr = new CFR()
- console.log("Strategy found", cfr.train2p(100000, 100))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement