Advertisement
Guest User

Untitled

a guest
May 11th, 2019
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. class CFR {
  2.     constructor(){
  3.         this.best = [1, 2, 0]
  4.         this.actions = 3;
  5.     }
  6.  
  7.     normalize(strategy) {
  8.         let normalizingSum = strategy.reduce((t, v) => t+v)
  9.         if(normalizingSum > 0) {
  10.             strategy = strategy.map((action) => {
  11.                 return action/normalizingSum
  12.             })
  13.         } else {
  14.             strategy = new Array(strategy.length)
  15.             strategy.fill(1/strategy.length)
  16.         }
  17.         return strategy
  18.     }
  19.  
  20.     getStrategy(regretSum){
  21.         let strategy = regretSum.map((val) => (val <0)? 0 : val)
  22.         return this.normalize(strategy)
  23.     }
  24.  
  25.     getAction(strat){
  26.         let r = Math.random();
  27.         let cumulativeProbability = 0;
  28.         for(var i = 0; i < this.actions; i++){
  29.             cumulativeProbability += strat[i];
  30.             if(r < cumulativeProbability){
  31.                 break;
  32.             }
  33.         }
  34.         return i;
  35.     }
  36.  
  37.     innerTrain(regretSum, stratSum, oppStrategy) {
  38.         let strategy = this.getStrategy(regretSum)
  39.         strategy.forEach((val, index, arr) => {
  40.             stratSum[index] += val
  41.         })
  42.         let myAction = this.getAction(strategy)
  43.         let otherAction = this.getAction(oppStrategy)
  44.         let actionUtil = new Array(this.actions)
  45.         actionUtil[otherAction] = 0
  46.         actionUtil[this.best[otherAction]] = 1
  47.         actionUtil[actionUtil.findIndex(e => e===undefined)] = -1
  48.         actionUtil.forEach((val, index, arr) => {
  49.             regretSum[index] += val - arr[myAction]
  50.         })
  51.         return [regretSum, stratSum]
  52.     }
  53.  
  54.     train(oppStrategy, iterations) {
  55.         let stratSum = Array.from({length:this.actions}, () => 0)
  56.        
  57.         let regretSum = Array.from({length:this.actions}, () => 0)
  58.         for(let i=0;i<iterations;i++) {
  59.             [regretSum, stratSum] = this.innerTrain(regretSum, stratSum, oppStrategy)
  60.             console.log(this.getStrategy(regretSum))
  61.         }
  62.  
  63.         let strat = this.normalize(stratSum)
  64.         //let strat = this.getStrategy(regretSum)
  65.         let results = this.simulate(strat, oppStrategy, iterations)
  66.         console.log(results)
  67.         return strat
  68.     }
  69.  
  70.     simulate(strategy, oppStrategy, iterations) {
  71.         let results = {
  72.             "strategy":0,
  73.             "oppStrategy":0
  74.         }
  75.  
  76.         for(let i=0;i<iterations; i++) {
  77.             let action1 = this.getAction(strategy)
  78.             let action2 = this.getAction(oppStrategy)
  79.             if(action1 == action2) {
  80.                 continue
  81.  
  82.             }else if(this.best[action1] == action2) {
  83.                 results['oppStrategy'] += 1
  84.             } else if(this.best[action2] == action1) {
  85.                 results['strategy'] += 1
  86.             }
  87.         }
  88.  
  89.         results['strategy'] = results['strategy']/iterations
  90.         results['oppStrategy'] = results['oppStrategy']/iterations
  91.         return results
  92.     }
  93.  
  94.     train2p(oiterations, iterations) {
  95.         let stratSump1 = Array.from({length:this.actions}, () => 0)
  96.         let stratSump2 = Array.from({length:this.actions}, () => 0)
  97.  
  98.         for(var j=0;j<oiterations;j++) {
  99.             let regretSump1 = Array.from({length:this.actions}, () => 0)
  100.             let regretSump2 = Array.from({length:this.actions}, () => 0)
  101.  
  102.             for(var i=0;i<iterations;i++) {
  103.                 [regretSump1, stratSump1] = this.innerTrain(regretSump1, stratSump1, this.normalize(stratSump2))
  104.             }
  105.             for(var i=0;i<iterations;i++) {
  106.                 [regretSump2, stratSump2] = this.innerTrain(regretSump2, stratSump2, this.normalize(stratSump1))
  107.             }
  108.             console.log(this.normalize(stratSump1), this.normalize(stratSump2))
  109.         }
  110.  
  111.         let results = this.simulate(this.normalize(stratSump1), this.normalize(stratSump2), 1000)
  112.         console.log(results)
  113.  
  114.         return [this.normalize(stratSump1), this.normalize(stratSump2)]
  115.     }
  116.  
  117.  
  118. }
  119.  
  120. const cfr = new CFR()
  121. console.log("Strategy found", cfr.train2p(100000, 100))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement