Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def hitting_time(self, start, goal):
- d = False
- ordered = []
- closed = set()
- q = []
- q.append(list(start))
- cnt = 0
- goal = list(goal)
- def already_hit(s, goal):
- return all([s[i] >= goal[i] for i in range(len(goal))])
- def flatten_state(s, goal):
- return [min(s[0], goal[0]), min(s[1], goal[1]), min(s[2], goal[2])]
- ### BFS ###
- # With BFS, Generate an ordering of the possible states leading up the goal state
- can_hit_goal = False
- while len(q) != 0:
- # while not all([already_hit(state, goal) for state in q]) and len(q) != 0:
- s = q.pop(0)
- closed.add(tuple(s))
- ordered.append(s)
- if s == goal:
- can_hit_goal = True
- if already_hit(s, goal):
- continue
- all_nxt = [[s[0] + delta[0], s[1] + delta[1], s[2] + delta[2]] for delta in self.board.get_resources(self.player_id)]
- for nxt in all_nxt:
- nxt = flatten_state(nxt, goal)
- if tuple(nxt) not in closed:
- q.append(nxt)
- closed.add(tuple(nxt))
- cnt += 1
- if not can_hit_goal:
- return float("inf")
- ### MATRIX ###
- num_states = len(ordered)
- # Figure out the indicies (in the list "ordering") for each possible "next" states from each
- # idx_nxt[0] = a list of the indices (in "ordering") that each successor state is at
- idx_nxt = [[float("inf") for _ in range(11)] for _ in range(len(ordered))]
- for i in range(num_states):
- current = ordered[i] # TODO: rename
- # poss_nxt_state[0] = the state we will be in if we roll a 2
- # poss_nxt_state[10] = state we will be in if we roll a 12
- poss_nxt_states = [flatten_state((current[0] + delta[0], \
- current[1] + delta[1], \
- current[2] + delta[2]), goal) \
- for delta in self.board.get_resources(self.player_id)]
- # Look for each of the poss_nxt_states
- for j in range(0, len(ordered)):
- for k in range(len(poss_nxt_states)): # len should be 11 each time
- nxt = poss_nxt_states[k]
- if ordered[j] == nxt:
- idx_nxt[i][k] = j
- A = np.zeros((num_states, num_states))
- np.fill_diagonal(A, -1)
- # probs_trans[0] = probability of rolling a 2
- probs_trans = [.0278, .0556, .0833, .1111, .1389, .1667, .1389, .1111, .0833, .056, .0278]
- # Fill in hitting time recursion based off idx_nxt indices
- for s in range(num_states):
- for trans in range(11):
- idx = idx_nxt[s][trans]
- prob = probs_trans[trans]
- A[s][idx] += prob
- b = np.array([-1 for _ in range(num_states)])
- # Change the hitting time of the goal state to be 0. Sanity check: modify the last rows
- for s in range(num_states):
- if ordered[s] == goal:
- A[s] = np.zeros((1, num_states))
- A[s][s] = 1
- b[s] = 0
- # Ax = b
- x = np.linalg.solve(A, b)
- return x[0] # return the hitting time from the start state
- # hitting_time([0,0,0],[4,2,8])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement