Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def evaluatePolicy(self, policy_matrix, gamma):
- temp = 0.2
- #while(temp>0.1):
- #delta=0
- for x in range(1):
- temp = 0
- for i in range(ROW_NUM):
- # print("here1")
- for j in range(COL_NUM):
- # print("here2")
- state = self.state_space.state2Darray[i][j]
- #value = self.return_value(state)
- value = self.value_matrix[i][j]
- a = self.policy_matrix[state.row][state.col]
- self.value_matrix[i][j] = self.getSum(state, a, gamma)
- #abs_value = abs(value-self.value_matrix[i][j])
- #temp = max(temp, abs(value-self.value_matrix[i][j]))
- def getSum(self, s, a, gamma):
- sum1 = 0
- for i in range(ROW_NUM):
- #print("here3")
- for j in range (COL_NUM):
- # print("here4")
- s_prime = self.state_space.state2Darray[i][j]
- New_a = robotAction(a)
- t_fn = to_Next_State_Prob(New_a, s, self.error, s_prime)
- #v_fn = self.return_value(s_prime)
- v_fn = self.value_matrix[i][j]
- #print(v_fn)
- reward = self.state_space.getStateReward(i,j)
- #print(reward)
- sum1 = sum1 + t_fn*(reward + gamma*v_fn)
- if (sum1 > 100):
- sum1 = sum1/100
- print(sum1)
- return sum1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement