Untitled

def hitting_time(self, start, goal):
    print("------------------------------------------------------------")

    d = False

    ordered = []
    closed = set()

    #TODO: consider player id?

    q = []
    q.append(list(start))
    cnt = 0

    goal = list(goal)

    print("start", start, "goal", goal)

    def already_hit(s, goal):
        return all([s[i] >= goal[i] for i in range(len(goal))])

    def flatten_state(s, goal):
        return [min(s[0], goal[0]), min(s[1], goal[1]), min(s[2], goal[2])]

#     def custom_resources(player_id=0):
#         return [[i+1,i+1,i+1] for i in range(11)]
#     get_resources = custom_resources # temporary

    ### BFS ###

#     print("resources", list(zip(range(2,13), self.board.get_resources(self.player_id))))
    print("resources")
    for i in range(11):
        print(i+2, self.board.get_resources(self.player_id)[i])

#     With BFS, Generate an ordering of the possible states leading up the goal state
    can_hit_goal = False
    while len(q) != 0:
#     while not all([already_hit(state, goal) for state in q]) and len(q) != 0:
        s = q.pop(0)
        closed.add(tuple(s))
        ordered.append(s)
        print("Explored", s, "in", cnt, "steps")

        if s == goal:
            can_hit_goal = True
            print("We've seen the goal", goal, "in", cnt, "steps")

        if already_hit(s, goal):
            continue
        all_nxt = [[s[0] + delta[0], s[1] + delta[1], s[2] + delta[2]] for delta in self.board.get_resources(self.player_id)]
        for nxt in all_nxt:
            nxt = flatten_state(nxt, goal)
            if tuple(nxt) not in closed:
                q.append(nxt)
                closed.add(tuple(nxt))
        cnt += 1

    if not can_hit_goal:
        print("Cannot hit the goal, giving up...")
        print("------------------------------------------------------------")
        return float("inf")


    ### MATRIX ###

    num_states = len(ordered)

    # Figure out the indicies (in the list "ordering") for each possible "next" states from each
    # idx_nxt[0] = a list of the indices (in "ordering") that each successor state is at
    idx_nxt = [[float("inf") for _ in range(11)] for _ in range(len(ordered))]
    for i in range(num_states):
        current = ordered[i] # TODO: rename

        # poss_nxt_state[0] = the state we will be in if we roll a 2
        # poss_nxt_state[10] = state we will be in if we roll a 12
        poss_nxt_states = [flatten_state((current[0] + delta[0], \
                                         current[1] + delta[1], \
                                         current[2] + delta[2]), goal) \
                           for delta in self.board.get_resources(self.player_id)]

        print("from", current, "we can reach", list(zip(range(2,13), poss_nxt_states)))

        # Look for each of the poss_nxt_states
        for j in range(0, len(ordered)):
            for k in range(len(poss_nxt_states)): # len should be 11 each time
                nxt = poss_nxt_states[k]
                if ordered[j] == nxt:
                    idx_nxt[i][k] = j

#     print(idx_nxt)


    A = np.zeros((num_states, num_states))
    np.fill_diagonal(A, -1)

    # probs_trans[0] = probability of rolling a 2
    probs_trans = [.0278, .0556, .0833, .1111, .1389, .1667, .1389, .1111, .0833, .056, .0278]

    # Fill in hitting time recursion based off idx_nxt indices
    for s in range(num_states):
        for trans in range(11):
            idx = idx_nxt[s][trans]
            prob = probs_trans[trans]
            A[s][idx] += prob

    b = np.array([-1 for _ in range(num_states)])

    # Change the hitting time of the goal state to be 0. Sanity check: modify the last rows
    for s in range(num_states):
        if ordered[s] == goal:
            A[s] = np.zeros((1, num_states))
            A[s][s] = 1
            b[s] = 0

    # Ax = b

    print("A", A)
    print("b", b)

    x = np.linalg.solve(A, b)

    print("x", x)

    print("------------------------------------------------------------")

    return x[0] # return the hitting time from the start state

# hitting_time([0,0,0],[4,2,8])