Advertisement
Guest User

Untitled

a guest
Feb 25th, 2020
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.51 KB | None | 0 0
  1. def evaluatePolicy(self, policy_matrix, gamma):
  2. temp = 0.2
  3. #while(temp>0.1):
  4. #delta=0
  5. for x in range(1):
  6. temp = 0
  7. for i in range(ROW_NUM):
  8. # print("here1")
  9. for j in range(COL_NUM):
  10. # print("here2")
  11. state = self.state_space.state2Darray[i][j]
  12. #value = self.return_value(state)
  13. value = self.value_matrix[i][j]
  14. a = self.policy_matrix[state.row][state.col]
  15. self.value_matrix[i][j] = self.getSum(state, a, gamma)
  16. #abs_value = abs(value-self.value_matrix[i][j])
  17.  
  18. #temp = max(temp, abs(value-self.value_matrix[i][j]))
  19.  
  20. def getSum(self, s, a, gamma):
  21. sum1 = 0
  22. for i in range(ROW_NUM):
  23. #print("here3")
  24. for j in range (COL_NUM):
  25. # print("here4")
  26. s_prime = self.state_space.state2Darray[i][j]
  27. New_a = robotAction(a)
  28. t_fn = to_Next_State_Prob(New_a, s, self.error, s_prime)
  29. #v_fn = self.return_value(s_prime)
  30. v_fn = self.value_matrix[i][j]
  31. #print(v_fn)
  32. reward = self.state_space.getStateReward(i,j)
  33. #print(reward)
  34. sum1 = sum1 + t_fn*(reward + gamma*v_fn)
  35. if (sum1 > 100):
  36. sum1 = sum1/100
  37. print(sum1)
  38. return sum1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement