Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2020
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.25 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. from __future__ import print_function
  3. from __future__ import absolute_import, division, print_function, unicode_literals
  4. import matplotlib
  5. matplotlib.use('Agg')
  6. import os
  7. import time
  8. import numpy as np
  9. from numpy import inf, random
  10. import matplotlib.pyplot as plt
  11. import pickle
  12. import json
  13. import robobo
  14. import cv2
  15. import sys
  16. import signal
  17. from pprint import pprint
  18. import prey
  19.  
  20. import collections
  21.  
  22. use_simulation = True
  23. run_test = False
  24. speed = 20 if use_simulation else 30
  25. dist = 500 if use_simulation else 400
  26. rewards = [0]
  27. fitness = [0]
  28.  
  29.  
  30. def terminate_program(signal_number, frame):
  31.     print("Ctrl-C received, terminating program")
  32.     sys.exit(1)
  33.  
  34.  
  35. def main():
  36.     signal.signal(signal.SIGINT, terminate_program)
  37.  
  38.     rob = robobo.SimulationRobobo().connect(address='192.168.1.2', port=19997) if use_simulation \
  39.         else robobo.HardwareRobobo(camera=True).connect(address="10.15.3.48")
  40.  
  41.     def get_sensor_info(direction):
  42.         a = np.log(np.array(rob.read_irs())) / 10
  43.         all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
  44.             else np.array(np.log(rob.read_irs())) / 10
  45.         all_sensor_info[all_sensor_info == inf] = 0
  46.         all_sensor_info[all_sensor_info == -inf] = 0
  47.         # [0, 1, 2, 3, 4, 5, 6, 7]
  48.         if direction == 'front':
  49.             return all_sensor_info[5]
  50.         elif direction == 'back':
  51.             return all_sensor_info[1]
  52.         elif direction == 'front_left':
  53.             return all_sensor_info[6]
  54.         elif direction == 'front_left_left':
  55.             return all_sensor_info[7]
  56.         elif direction == 'front_right':
  57.             return all_sensor_info[4]
  58.         elif direction == 'front_right_right':
  59.             return all_sensor_info[3]
  60.         elif direction == 'back_left':
  61.             return all_sensor_info[0]
  62.         elif direction == 'back_right':
  63.             return all_sensor_info[2]
  64.         elif direction == 'all':
  65.             print(all_sensor_info[3:])
  66.             return all_sensor_info
  67.         elif direction == 'front_3':
  68.             return [all_sensor_info[3]] + [all_sensor_info[5]] + [all_sensor_info[7]]
  69.         else:
  70.             raise Exception('Invalid direction')
  71.  
  72.     # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
  73.     # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
  74.     # s to a to r to s'.
  75.     # Small steps for going left or right (left or right are only rotating and straight is going forward).
  76.     # controller is the q values: the boundary for every sensor.
  77.  
  78.     def move_left():
  79.         rob.move(-speed, speed, dist)
  80.  
  81.     def move_right():
  82.         rob.move(speed, -speed, dist)
  83.  
  84.     def go_straight():
  85.         rob.move(speed, speed, dist)
  86.  
  87.     def move_back():
  88.         rob.move(-speed, -speed, dist)
  89.  
  90.     boundary_sensor = 0.7 if not use_simulation else 0.95
  91.     boundaries_color = [0.3, 0.7] if not use_simulation else [0.6, 0.98]
  92.  
  93.     # A static collision-avoidance policy
  94.     def static_policy(s):
  95.         if get_sensor_info('front_left') >= s \
  96.                 and get_sensor_info('front_left') > get_sensor_info('front_right'):
  97.             return 2
  98.  
  99.         elif get_sensor_info('front_right') >= s \
  100.                 and get_sensor_info('front_right') > get_sensor_info('front_left'):
  101.             return 1
  102.         else:
  103.             return 0
  104.  
  105.     state_table = {}
  106.     if os.path.exists('./src/state_table.json'):
  107.         with open('./src/state_table.json') as f:
  108.             state_table = json.load(f)
  109.  
  110.     def epsilon_policy(s, epsilon):
  111.         s = str(s)
  112.         # epsilon greedy
  113.         """"
  114.       ACTIONS ARE DEFINED AS FOLLOWS:
  115.         NUM: ACTION
  116.           ------------
  117.           0: STRAIGHT
  118.           1: LEFT
  119.           2: RIGHT
  120.           ------------
  121.       """
  122.         e = 0 if run_test else epsilon
  123.         if e > random.random():
  124.             return random.choice([0, 1, 2])
  125.         else:
  126.             return np.argmax(state_table[s])
  127.  
  128.     def take_action(action):
  129.         if action == 1:
  130.             move_left()
  131.         elif action == 2:
  132.             move_right()
  133.         elif action == 0:
  134.             go_straight()
  135.         # elif action == 'back':
  136.         #     move_back()
  137.  
  138.     def get_color_info():
  139.         image = rob.get_image_front()
  140.  
  141.         # Mask function
  142.         def get_green_pixels(img):
  143.             count = 0
  144.             pix_count = 0
  145.             b = 64
  146.             for i in range(len(img)):
  147.                 for j in range(len(img[i])):
  148.                     pixel = img[i][j]
  149.                     pix_count += 1
  150.                     if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  151.                             or (pixel[0] > b and pixel[1] > b and pixel[2] > b):
  152.                         img[i][j] = [0, 0, 0]
  153.                         count += 1
  154.             return 1 - (count / pix_count)
  155.  
  156.         left, middle_l, middle_r, right = np.hsplit(image, 4)
  157.         middle = np.concatenate((middle_l, middle_r), axis=1)
  158.  
  159.         return get_green_pixels(image), get_green_pixels(left), get_green_pixels(middle), get_green_pixels(right)
  160.  
  161.     def get_reward(previous_state, new_state, action, prev_val, new_val):
  162.         # Max no. of green in img, before and after
  163.         # 0: No green pixels in img;    1: All img consists of green pixels
  164.         total_c_prev = prev_val
  165.         total_c_new = new_val
  166.         print('previous: ', prev_val, 'new: ', new_val)
  167.  
  168.         max_new_sensor = np.max(new_state[:3])
  169.         max_prev_sensor = np.max(previous_state[:3])
  170.         max_c_new = np.max(previous_state[3:])
  171.         if max_c_new == 2:
  172.             # TODO: check when actually hitting a green block, because sometimes gives +30 when not touching yet.
  173.             #  Changing boundary doesn't help
  174.             print("30")
  175.             return 30
  176.         elif total_c_prev < total_c_new:
  177.             # This one is already fixed
  178.             return 10
  179.         elif max_prev_sensor < max_new_sensor:
  180.             # TODO: change this in the non discretised values, would be way better!
  181.             print("-20")
  182.             return -20
  183.         # The else is when it doesn't see anything, maybe we should add penalizing in one of the other if statements
  184.         # as well for going left or right, but not sure
  185.         else:
  186.             return 1 if action == 0 else -1
  187.         # TODO give negative reward for repetitions
  188.  
  189.     # Returns list of values with discretized sensor values and color values.
  190.     def make_discrete(values_s, boundary_s, values_c, boundaries_c):
  191.         discrete_list_s = []
  192.         discrete_list_c = []
  193.         for x in values_s:
  194.             if boundary_s > x:
  195.                 discrete_list_s.append(0)
  196.             else:
  197.                 discrete_list_s.append(1)
  198.         for y in values_c:
  199.             if y < boundaries_c[0]:
  200.                 discrete_list_c.append(0)
  201.             elif boundaries_c[0] < y < boundaries_c[1]:
  202.                 discrete_list_c.append(1)
  203.             else:
  204.                 discrete_list_c.append(2)
  205.         return discrete_list_s + discrete_list_c
  206.  
  207.     """
  208.   REINFORCEMENT LEARNING PROCESS
  209.   INPUT:  alpha    : learning rate
  210.           gamma    : discount factor
  211.           epsilon  : epsilon value for e-greedy
  212.           episodes : no. of episodes
  213.           act_lim  : no. of actions robot takes before ending episode
  214.           qL       : True if you use Q-Learning
  215.   """
  216.     stat_fitness = [0]
  217.     stat_rewards = [0]
  218.  
  219.     def run_static(lim):
  220.         for _ in range(lim):
  221.             if use_simulation:
  222.                 rob.play_simulation()
  223.  
  224.             current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, get_color_info(),
  225.                                           boundaries_color)
  226.  
  227.             if str(current_state) not in state_table.keys():
  228.                 state_table[str(current_state)] = [0 for _ in range(5)]
  229.  
  230.             action = static_policy(0.75)
  231.  
  232.             take_action(action)
  233.  
  234.             new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, get_color_info(), boundaries_color)
  235.  
  236.             # r = get_reward(current_state, new_state, action, current_color_info, new_color_info)
  237.  
  238.             # normalized_r = ((r - -10) / (1 - -10)) * (1 - -1) + -1
  239.             # stat_fitness.append(stat_fitness[-1] + (normalized_r * np.max(get_sensor_info("all")[3:])))
  240.             # print(fitness)
  241.             # if stat_rewards:
  242.             #     stat_rewards.append(stat_rewards[-1] + normalized_r)
  243.             # else:
  244.             #     rewards.append(normalized_r)
  245.  
  246.     def rl(alpha, gamma, epsilon, episodes, act_lim, qL=False):
  247.         for i in range(episodes):
  248.             print('Episode ' + str(i))
  249.             terminate = False
  250.             if use_simulation:
  251.                 rob.play_simulation()
  252.  
  253.             img, left, mid, right = get_color_info()
  254.             current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, (left, mid, right),
  255.                                           boundaries_color)
  256.  
  257.             if str(current_state) not in state_table.keys():
  258.                 state_table[str(current_state)] = [0 for _ in range(5)]
  259.  
  260.             action = epsilon_policy(current_state, epsilon)
  261.             # initialise state if it doesn't exist, else retrieve the current q-value
  262.             x = 0
  263.             while not terminate:
  264.  
  265.                 take_action(action)
  266.                 # Whole img extracted to get reward value
  267.                 # left, mid, right extracted to save state space accordingly
  268.                 new_img, left, mid, right = get_color_info()
  269.                 new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, (left, mid, right),
  270.                                           boundaries_color)
  271.  
  272.                 if str(new_state) not in state_table.keys():
  273.                     state_table[str(new_state)] = [0 for _ in range(5)]
  274.  
  275.                 new_action = epsilon_policy(new_state, epsilon)
  276.  
  277.                 # Retrieve the max action if we use Q-Learning
  278.                 max_action = np.argmax(state_table[str(new_state)]) if qL else new_action
  279.  
  280.                 # Get reward
  281.                 r = get_reward(current_state, new_state, action, img, new_img)
  282.                 print(r)
  283.  
  284.                 # normalized_r = ((r - -10) / (1 - -10)) * (1 - -1) + -1
  285.                 # fitness.append(fitness[-1] + normalized_r * np.max(get_sensor_info("front_3")))
  286.                 # # print(fitness)
  287.                 # if rewards:
  288.                 #     rewards.append(rewards[-1] + normalized_r)
  289.                 # else:
  290.                 #     rewards.append(normalized_r)
  291.  
  292.                 # Update rule
  293.  
  294.                 if not run_test:
  295.                     # print('update')
  296.                     state_table[str(current_state)][action] += \
  297.                         alpha * (r + (gamma *
  298.                                       np.array(
  299.                                           state_table[str(new_state)][max_action]))
  300.                                  - np.array(state_table[str(current_state)][action]))
  301.  
  302.                 # Stop episode if we get very close to an obstacle
  303.                 if (max(new_state[:3]) == 1 and max(new_state[3:]) != 2 and use_simulation) or x == act_lim - 1:
  304.                     state_table[str(new_state)][new_action] = -10
  305.                     terminate = True
  306.                     print("done")
  307.                     if not run_test:
  308.                         print('writing json')
  309.                         with open('./src/state_table.json', 'w') as json_file:
  310.                             json.dump(state_table, json_file)
  311.  
  312.                     if use_simulation:
  313.                         print("stopping the simulation")
  314.                         rob.stop_world()
  315.                         while not rob.is_sim_stopped():
  316.                             print("waiting for the simulation to stop")
  317.                         time.sleep(2)
  318.  
  319.                 # update current state and action
  320.                 current_state = new_state
  321.                 action = new_action
  322.                 # current_color_info = new_color_info
  323.                 img = new_img
  324.  
  325.                 # increment action limit counter
  326.                 x += 1
  327.  
  328.     # alpha, gamma, epsilon, episodes, actions per episode
  329.     # run_static(200)
  330.     rl(0.9, 0.9, 0.08, 3, 500, qL=True)
  331.  
  332.     pprint(state_table)
  333.  
  334.     if run_test:
  335.         all_rewards = []
  336.         all_fits = []
  337.         if os.path.exists('./src/rewards.csv'):
  338.             with open('./src/rewards.csv') as f:
  339.                 all_rewards = pickle.load(f)
  340.  
  341.         if os.path.exists('./src/fitness.csv'):
  342.             with open('./src/fitness.csv') as f:
  343.                 all_fits = pickle.load(f)
  344.  
  345.         all_rewards += rewards
  346.         all_fits += fitness
  347.  
  348.         # print(all_rewards)
  349.         # print(all_fits)
  350.  
  351.         # with open('./src/rewards.csv', 'w') as f:
  352.         #     pickle.dump(all_rewards, f)
  353.         #
  354.         # with open('./src/fitness.csv', 'w') as f:
  355.         #     pickle.dump(all_fits, f)
  356.         #
  357.         # with open('./src/stat_rewards.csv', 'w') as f:
  358.         #     pickle.dump(stat_rewards, f)
  359.         #
  360.  
  361.         # with open('./src/stat_fitness.csv', 'w') as f:
  362.         #     pickle.dump(stat_fitness, f)
  363.         #
  364.         # plt.figure('Rewards')
  365.         # plt.plot(all_rewards, label='Q-Learning Controller')
  366.         # plt.plot(stat_rewards, label='Static Controller')
  367.         # plt.legend()
  368.         # plt.savefig("./src/plot_reward.png")
  369.         # plt.show()
  370.         #
  371.         # plt.figure('Fitness Values')
  372.         # plt.plot(all_fits, label='Q-Learning Controller')
  373.         # plt.plot(stat_fitness, label='Static Controller')
  374.         # plt.legend()
  375.         # plt.savefig("./src/plot_fitness.png")
  376.         # plt.show()
  377.  
  378.  
  379. def image_test():
  380.     signal.signal(signal.SIGINT, terminate_program)
  381.     rob = robobo.SimulationRobobo().connect(address='192.168.1.2', port=19997) if use_simulation \
  382.          else robobo.HardwareRobobo(camera=True).connect(address="172.20.10.11")
  383.     if use_simulation:
  384.         rob.play_simulation()
  385.     # rob.set_phone_tilt(109, 90)
  386.  
  387.     image = rob.get_image_front()
  388.  
  389.     def get_green_pixels(img):
  390.         count = 0
  391.         pix_count = 0
  392.         b = 64
  393.         for i in range(len(img)):
  394.             for j in range(len(img[i])):
  395.                 pixel = img[i][j]
  396.                 pix_count += 1
  397.                 if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  398.                         or (pixel[0] > b and pixel[1] > b and pixel[2] > b):
  399.                     img[i][j] = [0, 0, 0]
  400.                     count += 1
  401.         return 1 - (count / pix_count)
  402.  
  403.         # rob.set_phone_tilt(109, 100)
  404.         # image = rob.get_image_front()
  405.         # print(image)
  406.  
  407.     left, middle_l, middle_r, right = np.hsplit(image, 4)
  408.     middle = np.concatenate((middle_l, middle_r), axis=1)
  409.  
  410.     print(np.average([get_green_pixels(left), get_green_pixels(middle), get_green_pixels(right)]))
  411.     print(np.average(get_green_pixels(image)))
  412.  
  413.     if use_simulation:
  414.         print("stopping the simulation")
  415.         rob.stop_world()
  416.         while not rob.is_sim_stopped():
  417.             print("waiting for the simulation to stop")
  418.         time.sleep(2)
  419.  
  420.  
  421. if __name__ == "__main__":
  422.     main()
  423.     # image_test()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement