Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.40 KB | None | 0 0
  1. def hitting_time(self, start, goal):
  2. # print("------------------------------------------------------------")
  3.  
  4. d = False
  5.  
  6. ordered = []
  7. closed = set()
  8.  
  9. #TODO: consider player id?
  10.  
  11. q = []
  12. q.append(list(start))
  13. cnt = 0
  14.  
  15. goal = list(goal)
  16.  
  17. # print("start", start, "goal", goal)
  18.  
  19. def already_hit(s, goal):
  20. return all([s[i] >= goal[i] for i in range(len(goal))])
  21.  
  22. def flatten_state(s, goal):
  23. return [min(s[0], goal[0]), min(s[1], goal[1]), min(s[2], goal[2])]
  24.  
  25. ### BFS ###
  26.  
  27. s_idx = {} # state to index in ordered[]
  28.  
  29. # print("resources", list(zip(range(2,13), self.board.get_resources(self.player_id))))
  30. # print("resources")
  31. # for i in range(11):
  32. # print(i+2, self.board.get_resources(self.player_id)[i])
  33.  
  34. # With BFS, Generate an ordering of the possible states leading up the goal state
  35. can_hit_goal = False
  36. while len(q) != 0:
  37. # while not all([already_hit(state, goal) for state in q]) and len(q) != 0:
  38. s = q.pop(0)
  39. closed.add(tuple(s))
  40. ordered.append(s)
  41.  
  42. # print("Explored", s, "in", cnt, "steps")
  43.  
  44. if s == goal:
  45. can_hit_goal = True
  46. # print("We've seen the goal", goal, "in", cnt, "steps")
  47.  
  48. if already_hit(s, goal):
  49. continue
  50. all_nxt = [[s[0] + delta[0], s[1] + delta[1], s[2] + delta[2]] for delta in self.board.get_resources(self.player_id)]
  51. for nxt in all_nxt:
  52. nxt = flatten_state(nxt, goal)
  53. if tuple(nxt) not in closed:
  54. q.append(nxt)
  55. closed.add(tuple(nxt))
  56. cnt += 1
  57.  
  58. if not can_hit_goal:
  59. # print("Cannot hit the goal, giving up...")
  60. # print("------------------------------------------------------------")
  61. return float("inf")
  62.  
  63. for idx in range(len(ordered)):
  64. s_idx[tuple(ordered[idx])] = idx
  65.  
  66. # print(s_idx)
  67.  
  68. ### MATRIX ###
  69.  
  70. num_states = len(ordered)
  71.  
  72. A = np.zeros((num_states, num_states))
  73. np.fill_diagonal(A, -1)
  74.  
  75. # Figure out the indicies (in the list "ordering") for each possible "next" states from each
  76. # idx_nxt[0] = a list of the indices (in "ordering") that each successor state is at
  77. idx_nxt = [[float("inf") for _ in range(11)] for _ in range(len(ordered))]
  78. for i in range(num_states):
  79. current = ordered[i] # TODO: rename
  80.  
  81. # poss_nxt_state[0] = the state we will be in if we roll a 2
  82. # poss_nxt_state[10] = state we will be in if we roll a 12
  83. poss_nxt_states = [flatten_state((current[0] + delta[0], \
  84. current[1] + delta[1], \
  85. current[2] + delta[2]), goal) \
  86. for delta in self.board.get_resources(self.player_id)]
  87.  
  88. # print(current, poss_nxt_states)
  89.  
  90. # Look for each of the poss_nxt_states
  91. for k in range(len(poss_nxt_states)): # len should be 11 each time
  92. # for j in range(0, len(ordered)):
  93. nxt = poss_nxt_states[k]
  94. j = s_idx[tuple(nxt)]
  95. if ordered[j] == nxt:
  96. idx_nxt[i][k] = j
  97.  
  98. # print(idx_nxt)
  99.  
  100.  
  101. # probs_trans[0] = probability of rolling a 2
  102. probs_trans = [.0278, .0556, .0833, .1111, .1389, .1667, .1389, .1111, .0833, .056, .0278]
  103.  
  104. # Fill in hitting time recursion based off idx_nxt indices
  105. for s in range(num_states):
  106. for trans in range(11):
  107. idx = idx_nxt[s][trans]
  108. prob = probs_trans[trans]
  109. A[s][idx] += prob
  110.  
  111. b = np.array([-1 for _ in range(num_states)])
  112.  
  113. # Change the hitting time of the goal state to be 0. Sanity check: modify the last rows
  114. # for s in range(num_states):
  115. # if ordered[s] == goal:
  116. # A[s] = np.zeros((1, num_states))
  117. # A[s][s] = 1
  118. # b[s] = 0
  119. A[len(A) - 1][len(A) - 1] = 1
  120. b[len(b) - 1] = 0
  121. # ^ REPLACE
  122.  
  123.  
  124. # Ax = b
  125.  
  126. # print("A", A)
  127. # print("b", b)
  128.  
  129. x = np.linalg.solve(A, b)
  130.  
  131. # print("x", x)
  132.  
  133. # print("------------------------------------------------------------")
  134.  
  135. return x[0] # return the hitting time from the start state
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement