Advertisement
Guest User

Untitled

a guest
May 21st, 2019
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.23 KB | None | 0 0
  1. import numpy as np
  2. import sys
  3.  
  4. import time
  5. import random
  6. import math
  7.  
  8. position={0:'a',1:'b',2:'c',3:'d',4:'e',5:'f',6:'g',7:'h',8:'i',9:'j',10:'k',11:'l',12:'m',13:'n',14:'o',15:'p'}
  9. temperature=1
  10. print(position[1],position[2])
  11. class MCTS(object):
  12. def __init__(self,board=15,similation_per_step=400,netural_network=None):
  13. super(MCTS, self).__init__()
  14. self.board_size=board
  15. self.s_per_step=similation_per_step
  16. self.current_node==node(None,1)
  17. self.NN=netural_network
  18. self.game_process=five_stone_game(board_size=board)
  19. self.simulate_game=five_stone(board_size=board)
  20. self.map={}
  21. self.order=[]
  22. for i in range(15):
  23. for j in range(15):
  24. name=position[i]+position[j]
  25. self.order.append(name)
  26. self.map[name]=0
  27.  
  28. def renew(self):
  29. self.current_node=node(None,0)
  30. self.game_process.renew()
  31.  
  32. def MCTS_setp(self,action):
  33. next_node=self.current_node.get_child(action)
  34. next_node.parent=None
  35. return next_node
  36.  
  37. def simulation(self):
  38.  
  39. def interact_game_init(self):
  40.  
  41. def interact_gamel(self,action):
  42.  
  43. def interact_game2(self,action):
  44.  
  45.  
  46. class node(object):
  47. def __init__(self,parent,player):
  48. super(node, self).__init__()
  49. self.parent=parent
  50. self.counter=0
  51. self.child={}
  52. self.node_player=player
  53. self.map={}
  54. self.order=[]
  55. for i in range(15):
  56. for j in range(15):
  57. key_1=position[i]+position[j]
  58. self.map[key_1]=0
  59. self.order.append(key_1)
  60.  
  61. def add_child(self,action,next_exp):
  62. action_name=position[action[0]]+position[action[1]]
  63. self.child[action_name]=edge(action=action,parent_node=self,next_exp=next_exp)
  64.  
  65. def get_action(self,action):
  66. child_node,_=self.child[action].get_child()
  67. return child_node
  68.  
  69. def eval_or_not(self):
  70. return len(self.child)==0
  71.  
  72. def back_up(self,v):
  73. self.counter+=1
  74. if self.parent:
  75. self.parent.backup(v)
  76.  
  77. def fenzhi(self,train=True):
  78. for key in self.child.keys():
  79. self.map[key]=self.child[key].counter
  80.  
  81.  
  82. result=[]
  83. choice_pool=[]
  84. choice_prob=[]
  85. for key in self.order:
  86. if self.map[key]!=0:
  87. choice_pool.append(key)
  88. tmp=np.float_power(self.map[key],1/temperature)
  89. choice_prob.append(tmp)
  90. result.append(tmp)
  91. self.map[key]=0
  92. else:
  93. result.append(0)
  94.  
  95. for i in range(len(result)):
  96. if result[i]:
  97. result[i]=result[i]/sum(result)
  98.  
  99. choice_prob=[choice/sum(result) for choice in choice_prob]
  100.  
  101. if train:
  102. move=np.random.choice(choice_pool,p=0.8*np.array(choice_prob)+0.2*np.random.dirichlet(0.3*np.ones(len(choice_prob))))
  103. else:
  104. move=choice_pool[np.argmax(choice_prob)]
  105. return move,result
  106.  
  107. def ucb_sim(self):
  108. ucb_max=0
  109. ucb_max_key=None
  110. for key in self.child.keys():
  111. if self.child[key].ucb_value()>ucb_max:
  112. ucb_max_key=key
  113. ucb_max=self.child[key].ucb_value()
  114. this_node,expand,self.child[ucb_max_key].get_child()
  115. return this_node,expand,self.child[ucb_max_key].action
  116.  
  117. class edge(object):
  118. def __init__(self,action,parent_node,next_exp):
  119. super(edge, self).__init__()
  120. self.action=action
  121. self.counter=0
  122. self.parent_node=parent_node
  123. self.next_exp=next_exp
  124. self.child_node=None
  125. self.actine_value=0
  126. self.cpuct=0.1
  127.  
  128. def backup(self,v):
  129. self.action+=v
  130. self.counter+=1
  131. self.parent_node(-v)
  132.  
  133. def get_child(self):
  134. if self.child_node is None:
  135. self.counter+=1
  136. self.child_node=node(self,-self.parent_node.node_player)
  137. return self.child_node,True
  138. else:
  139. self.counter+=1
  140. return self.child_node,False
  141.  
  142. def ucb_value(self):
  143. if self.actine_value:
  144. q=self.action_value/self.counter
  145. else:
  146. q=0
  147. return q+self.cpuct*self.next_exp*np.sqrt(self.parent_node.counter)/(1+self.counter)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement