Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import sys
- import time
- import random
- import math
- position={0:'a',1:'b',2:'c',3:'d',4:'e',5:'f',6:'g',7:'h',8:'i',9:'j',10:'k',11:'l',12:'m',13:'n',14:'o',15:'p'}
- temperature=1
- print(position[1],position[2])
- class MCTS(object):
- def __init__(self,board=15,similation_per_step=400,netural_network=None):
- super(MCTS, self).__init__()
- self.board_size=board
- self.s_per_step=similation_per_step
- self.current_node==node(None,1)
- self.NN=netural_network
- self.game_process=five_stone_game(board_size=board)
- self.simulate_game=five_stone(board_size=board)
- self.map={}
- self.order=[]
- for i in range(15):
- for j in range(15):
- name=position[i]+position[j]
- self.order.append(name)
- self.map[name]=0
- def renew(self):
- self.current_node=node(None,0)
- self.game_process.renew()
- def MCTS_setp(self,action):
- next_node=self.current_node.get_child(action)
- next_node.parent=None
- return next_node
- def simulation(self):
- def interact_game_init(self):
- def interact_gamel(self,action):
- def interact_game2(self,action):
- class node(object):
- def __init__(self,parent,player):
- super(node, self).__init__()
- self.parent=parent
- self.counter=0
- self.child={}
- self.node_player=player
- self.map={}
- self.order=[]
- for i in range(15):
- for j in range(15):
- key_1=position[i]+position[j]
- self.map[key_1]=0
- self.order.append(key_1)
- def add_child(self,action,next_exp):
- action_name=position[action[0]]+position[action[1]]
- self.child[action_name]=edge(action=action,parent_node=self,next_exp=next_exp)
- def get_action(self,action):
- child_node,_=self.child[action].get_child()
- return child_node
- def eval_or_not(self):
- return len(self.child)==0
- def back_up(self,v):
- self.counter+=1
- if self.parent:
- self.parent.backup(v)
- def fenzhi(self,train=True):
- for key in self.child.keys():
- self.map[key]=self.child[key].counter
- result=[]
- choice_pool=[]
- choice_prob=[]
- for key in self.order:
- if self.map[key]!=0:
- choice_pool.append(key)
- tmp=np.float_power(self.map[key],1/temperature)
- choice_prob.append(tmp)
- result.append(tmp)
- self.map[key]=0
- else:
- result.append(0)
- for i in range(len(result)):
- if result[i]:
- result[i]=result[i]/sum(result)
- choice_prob=[choice/sum(result) for choice in choice_prob]
- if train:
- move=np.random.choice(choice_pool,p=0.8*np.array(choice_prob)+0.2*np.random.dirichlet(0.3*np.ones(len(choice_prob))))
- else:
- move=choice_pool[np.argmax(choice_prob)]
- return move,result
- def ucb_sim(self):
- ucb_max=0
- ucb_max_key=None
- for key in self.child.keys():
- if self.child[key].ucb_value()>ucb_max:
- ucb_max_key=key
- ucb_max=self.child[key].ucb_value()
- this_node,expand,self.child[ucb_max_key].get_child()
- return this_node,expand,self.child[ucb_max_key].action
- class edge(object):
- def __init__(self,action,parent_node,next_exp):
- super(edge, self).__init__()
- self.action=action
- self.counter=0
- self.parent_node=parent_node
- self.next_exp=next_exp
- self.child_node=None
- self.actine_value=0
- self.cpuct=0.1
- def backup(self,v):
- self.action+=v
- self.counter+=1
- self.parent_node(-v)
- def get_child(self):
- if self.child_node is None:
- self.counter+=1
- self.child_node=node(self,-self.parent_node.node_player)
- return self.child_node,True
- else:
- self.counter+=1
- return self.child_node,False
- def ucb_value(self):
- if self.actine_value:
- q=self.action_value/self.counter
- else:
- q=0
- return q+self.cpuct*self.next_exp*np.sqrt(self.parent_node.counter)/(1+self.counter)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement