Advertisement
Guest User

Untitled

a guest
Apr 22nd, 2019
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.34 KB | None | 0 0
  1. def hitting_time(self, start, goal):
  2. print("------------------------------------------------------------")
  3.  
  4. d = False
  5.  
  6. ordered = []
  7. closed = set()
  8.  
  9. #TODO: consider player id?
  10.  
  11. q = []
  12. q.append(list(start))
  13. cnt = 0
  14.  
  15. goal = list(goal)
  16.  
  17. print("start", start, "goal", goal)
  18.  
  19. def already_hit(s, goal):
  20. return all([s[i] >= goal[i] for i in range(len(goal))])
  21.  
  22. def flatten_state(s, goal):
  23. return [min(s[0], goal[0]), min(s[1], goal[1]), min(s[2], goal[2])]
  24.  
  25. # def custom_resources(player_id=0):
  26. # return [[i+1,i+1,i+1] for i in range(11)]
  27. # get_resources = custom_resources # temporary
  28.  
  29. ### BFS ###
  30.  
  31. # print("resources", list(zip(range(2,13), self.board.get_resources(self.player_id))))
  32. print("resources")
  33. for i in range(11):
  34. print(i+2, self.board.get_resources(self.player_id)[i])
  35.  
  36. # With BFS, Generate an ordering of the possible states leading up the goal state
  37. can_hit_goal = False
  38. while len(q) != 0:
  39. # while not all([already_hit(state, goal) for state in q]) and len(q) != 0:
  40. s = q.pop(0)
  41. closed.add(tuple(s))
  42. ordered.append(s)
  43. print("Explored", s, "in", cnt, "steps")
  44.  
  45. if s == goal:
  46. can_hit_goal = True
  47. print("We've seen the goal", goal, "in", cnt, "steps")
  48.  
  49. if already_hit(s, goal):
  50. continue
  51. all_nxt = [[s[0] + delta[0], s[1] + delta[1], s[2] + delta[2]] for delta in self.board.get_resources(self.player_id)]
  52. for nxt in all_nxt:
  53. nxt = flatten_state(nxt, goal)
  54. if tuple(nxt) not in closed:
  55. q.append(nxt)
  56. closed.add(tuple(nxt))
  57. cnt += 1
  58.  
  59. if not can_hit_goal:
  60. print("Cannot hit the goal, giving up...")
  61. print("------------------------------------------------------------")
  62. return float("inf")
  63.  
  64.  
  65. ### MATRIX ###
  66.  
  67. num_states = len(ordered)
  68.  
  69. # Figure out the indicies (in the list "ordering") for each possible "next" states from each
  70. # idx_nxt[0] = a list of the indices (in "ordering") that each successor state is at
  71. idx_nxt = [[float("inf") for _ in range(11)] for _ in range(len(ordered))]
  72. for i in range(num_states):
  73. current = ordered[i] # TODO: rename
  74.  
  75. # poss_nxt_state[0] = the state we will be in if we roll a 2
  76. # poss_nxt_state[10] = state we will be in if we roll a 12
  77. poss_nxt_states = [flatten_state((current[0] + delta[0], \
  78. current[1] + delta[1], \
  79. current[2] + delta[2]), goal) \
  80. for delta in self.board.get_resources(self.player_id)]
  81.  
  82. print("from", current, "we can reach", list(zip(range(2,13), poss_nxt_states)))
  83.  
  84. # Look for each of the poss_nxt_states
  85. for j in range(0, len(ordered)):
  86. for k in range(len(poss_nxt_states)): # len should be 11 each time
  87. nxt = poss_nxt_states[k]
  88. if ordered[j] == nxt:
  89. idx_nxt[i][k] = j
  90.  
  91. # print(idx_nxt)
  92.  
  93.  
  94.  
  95. A = np.zeros((num_states, num_states))
  96. np.fill_diagonal(A, -1)
  97.  
  98. # probs_trans[0] = probability of rolling a 2
  99. probs_trans = [.0278, .0556, .0833, .1111, .1389, .1667, .1389, .1111, .0833, .056, .0278]
  100.  
  101. # Fill in hitting time recursion based off idx_nxt indices
  102. for s in range(num_states):
  103. for trans in range(11):
  104. idx = idx_nxt[s][trans]
  105. prob = probs_trans[trans]
  106. A[s][idx] += prob
  107.  
  108. b = np.array([-1 for _ in range(num_states)])
  109.  
  110. # Change the hitting time of the goal state to be 0. Sanity check: modify the last rows
  111. for s in range(num_states):
  112. if ordered[s] == goal:
  113. A[s] = np.zeros((1, num_states))
  114. A[s][s] = 1
  115. b[s] = 0
  116.  
  117. # Ax = b
  118.  
  119. print("A", A)
  120. print("b", b)
  121.  
  122. x = np.linalg.solve(A, b)
  123.  
  124. print("x", x)
  125.  
  126. print("------------------------------------------------------------")
  127.  
  128. return x[0] # return the hitting time from the start state
  129.  
  130. # hitting_time([0,0,0],[4,2,8])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement