Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def get_reward(previous_state, new_state,
- previous_sensor, new_sensor,
- prev_action, action,
- prev_val, new_val):
- # Max no. of green in img, before and after
- # 0: No green pixels in img; 1: All img consists of green pixels
- prev_right, prev_mid, prev_left = prev_val
- sum_prev_val = sum(prev_val)
- new_right, new_mid, new_left = new_val
- sum_new_val = sum(new_val)
- max_new_sensor = np.max(new_sensor)
- max_prev_sensor = np.max(previous_sensor)
- max_c_prev = np.max(previous_state[3:])
- max_c_new = np.max(new_state[3:])
- # Encourages going towards prey
- if max_c_prev == 0 and max_c_new == 1:
- return 10 if action == 0 else 2
- # Massive payoff if we get super close to prey
- if max_c_prev == 1 and max_c_new == 2:
- return 30
- # Nothing happens if prey gets away
- if max_c_prev == 2 and max_c_new == 1:
- return 0
- # Give good reward if we see more red than before
- if sum_prev_val < sum_new_val:
- return 10 if action == 0 else 0
- # If sensors detect enemy, then give good payoff.
- # If sensors detect wall, give bad payoff to steer clear
- if max_new_sensor > max_prev_sensor:
- return 15 if max_c_new >= 1 else -5
- if prev_action != 0 and action != 0:
- return -10
- # Give good payoff to encourage exploring (going straight)
- # Minor bad payoff for turning around, but not bad enough to discourage it
- return 1 if action == 0 else -2.5
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement