Untitled

import numpy as np
import sys

import time
import random
import math

position={0:'a',1:'b',2:'c',3:'d',4:'e',5:'f',6:'g',7:'h',8:'i',9:'j',10:'k',11:'l',12:'m',13:'n',14:'o',15:'p'}
temperature=1
print(position[1],position[2])
class MCTS(object):
    def __init__(self,board=15,similation_per_step=400,netural_network=None):
        super(MCTS, self).__init__()
        self.board_size=board
        self.s_per_step=similation_per_step
        self.current_node==node(None,1)
        self.NN=netural_network
        self.game_process=five_stone_game(board_size=board)
        self.simulate_game=five_stone(board_size=board)
        self.map={}
        self.order=[]
        for i in range(15):
            for j in range(15):
                name=position[i]+position[j]
                self.order.append(name)
                self.map[name]=0

    def renew(self):
        self.current_node=node(None,0)
        self.game_process.renew()

    def MCTS_setp(self,action):
        next_node=self.current_node.get_child(action)
        next_node.parent=None
        return next_node

    def simulation(self):

    def interact_game_init(self):

    def interact_gamel(self,action):

    def interact_game2(self,action):


class node(object):
    def __init__(self,parent,player):
        super(node, self).__init__()
        self.parent=parent
        self.counter=0
        self.child={}
        self.node_player=player
        self.map={}
        self.order=[]
        for i in range(15):
            for j in range(15):
                key_1=position[i]+position[j]
                self.map[key_1]=0
                self.order.append(key_1)

    def add_child(self,action,next_exp):
        action_name=position[action[0]]+position[action[1]]
        self.child[action_name]=edge(action=action,parent_node=self,next_exp=next_exp)

    def get_action(self,action):
        child_node,_=self.child[action].get_child()
        return child_node

    def eval_or_not(self):
        return len(self.child)==0

    def back_up(self,v):
        self.counter+=1
        if self.parent:
            self.parent.backup(v)

    def fenzhi(self,train=True):
        for key in self.child.keys():
            self.map[key]=self.child[key].counter


        result=[]
        choice_pool=[]
        choice_prob=[]
        for key in self.order:
            if self.map[key]!=0:
                choice_pool.append(key)
                tmp=np.float_power(self.map[key],1/temperature)
                choice_prob.append(tmp)
                result.append(tmp)
                self.map[key]=0
            else:
                result.append(0)

        for i in range(len(result)):
            if result[i]:
                result[i]=result[i]/sum(result)

        choice_prob=[choice/sum(result) for choice in choice_prob]

        if train:
            move=np.random.choice(choice_pool,p=0.8*np.array(choice_prob)+0.2*np.random.dirichlet(0.3*np.ones(len(choice_prob))))
        else:
            move=choice_pool[np.argmax(choice_prob)]
        return move,result

    def ucb_sim(self):
        ucb_max=0
        ucb_max_key=None
        for key in self.child.keys():
            if self.child[key].ucb_value()>ucb_max:
                ucb_max_key=key
                ucb_max=self.child[key].ucb_value()
        this_node,expand,self.child[ucb_max_key].get_child()
        return this_node,expand,self.child[ucb_max_key].action

class edge(object):
    def __init__(self,action,parent_node,next_exp):
        super(edge, self).__init__()
        self.action=action
        self.counter=0
        self.parent_node=parent_node
        self.next_exp=next_exp
        self.child_node=None
        self.actine_value=0
        self.cpuct=0.1

    def backup(self,v):
        self.action+=v
        self.counter+=1
        self.parent_node(-v)

    def get_child(self):
        if self.child_node is None:
            self.counter+=1
            self.child_node=node(self,-self.parent_node.node_player)
            return self.child_node,True
        else:
            self.counter+=1
            return self.child_node,False

    def ucb_value(self):
        if self.actine_value:
            q=self.action_value/self.counter
        else:
            q=0
        return q+self.cpuct*self.next_exp*np.sqrt(self.parent_node.counter)/(1+self.counter)