Untitled

def hitting_time(self, start, goal):
    # print("------------------------------------------------------------")

    d = False

    ordered = []
    closed = set()

    #TODO: consider player id?

    q = []
    q.append(list(start))
    cnt = 0

    goal = list(goal)

    # print("start", start, "goal", goal)

    def already_hit(s, goal):
        return all([s[i] >= goal[i] for i in range(len(goal))])

    def flatten_state(s, goal):
        return [min(s[0], goal[0]), min(s[1], goal[1]), min(s[2], goal[2])]

    ### BFS ###

    s_idx = {} # state to index in ordered[]

#     print("resources", list(zip(range(2,13), self.board.get_resources(self.player_id))))
    # print("resources")
    # for i in range(11):
    #     print(i+2, self.board.get_resources(self.player_id)[i])

#     With BFS, Generate an ordering of the possible states leading up the goal state
    can_hit_goal = False
    while len(q) != 0:
#     while not all([already_hit(state, goal) for state in q]) and len(q) != 0:
        s = q.pop(0)
        closed.add(tuple(s))
        ordered.append(s)

        # print("Explored", s, "in", cnt, "steps")

        if s == goal:
            can_hit_goal = True
            # print("We've seen the goal", goal, "in", cnt, "steps")

        if already_hit(s, goal):
            continue
        all_nxt = [[s[0] + delta[0], s[1] + delta[1], s[2] + delta[2]] for delta in self.board.get_resources(self.player_id)]
        for nxt in all_nxt:
            nxt = flatten_state(nxt, goal)
            if tuple(nxt) not in closed:
                q.append(nxt)
                closed.add(tuple(nxt))
        cnt += 1

    if not can_hit_goal:
        # print("Cannot hit the goal, giving up...")
        # print("------------------------------------------------------------")
        return float("inf")

    for idx in range(len(ordered)):
        s_idx[tuple(ordered[idx])] = idx

    # print(s_idx)

    ### MATRIX ###

    num_states = len(ordered)

    A = np.zeros((num_states, num_states))
    np.fill_diagonal(A, -1)

    # Figure out the indicies (in the list "ordering") for each possible "next" states from each
    # idx_nxt[0] = a list of the indices (in "ordering") that each successor state is at
    idx_nxt = [[float("inf") for _ in range(11)] for _ in range(len(ordered))]
    for i in range(num_states):
        current = ordered[i] # TODO: rename

        # poss_nxt_state[0] = the state we will be in if we roll a 2
        # poss_nxt_state[10] = state we will be in if we roll a 12
        poss_nxt_states = [flatten_state((current[0] + delta[0], \
                                         current[1] + delta[1], \
                                         current[2] + delta[2]), goal) \
                           for delta in self.board.get_resources(self.player_id)]

        # print(current, poss_nxt_states)

        # Look for each of the poss_nxt_states
        for k in range(len(poss_nxt_states)): # len should be 11 each time
#             for j in range(0, len(ordered)):
            nxt = poss_nxt_states[k]
            j = s_idx[tuple(nxt)]
            if ordered[j] == nxt:
                idx_nxt[i][k] = j

#     print(idx_nxt)


    # probs_trans[0] = probability of rolling a 2
    probs_trans = [.0278, .0556, .0833, .1111, .1389, .1667, .1389, .1111, .0833, .056, .0278]

    # Fill in hitting time recursion based off idx_nxt indices
    for s in range(num_states):
        for trans in range(11):
            idx = idx_nxt[s][trans]
            prob = probs_trans[trans]
            A[s][idx] += prob

    b = np.array([-1 for _ in range(num_states)])

    # Change the hitting time of the goal state to be 0. Sanity check: modify the last rows
#     for s in range(num_states):
#         if ordered[s] == goal:
#             A[s] = np.zeros((1, num_states))
#             A[s][s] = 1
#             b[s] = 0
    A[len(A) - 1][len(A) - 1] = 1
    b[len(b) - 1] = 0
    # ^ REPLACE


    # Ax = b

    # print("A", A)
    # print("b", b)

    x = np.linalg.solve(A, b)

    # print("x", x)

    # print("------------------------------------------------------------")

    return x[0] # return the hitting time from the start state