Advertisement
Guest User

Untitled

a guest
Jan 26th, 2020
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.81 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. from __future__ import print_function
  3. from __future__ import absolute_import, division, print_function, unicode_literals
  4. import matplotlib
  5.  
  6. matplotlib.use('Agg')
  7. import os
  8. import time
  9. import numpy as np
  10. from numpy import inf, random
  11. import matplotlib.pyplot as plt
  12. import pickle
  13. import json
  14. import robobo
  15. import cv2
  16. import sys
  17. import signal
  18. from pprint import pprint
  19. import prey
  20.  
  21. import collections
  22.  
  23. use_simulation = True
  24. run_test = False
  25. speed = 20 if use_simulation else 50
  26. dist = 500 if use_simulation else 400
  27. rewards = [0]
  28. fitness = list()
  29.  
  30.  
  31. def terminate_program(signal_number, frame):
  32. print("Ctrl-C received, terminating program")
  33. sys.exit(1)
  34.  
  35.  
  36. def main():
  37. signal.signal(signal.SIGINT, terminate_program)
  38.  
  39. rob = robobo.SimulationRobobo().connect(address='192.168.1.2', port=19997) if use_simulation \
  40. else robobo.HardwareRobobo(camera=True).connect(address="10.15.3.48")
  41. rob.set_phone_tilt(45, 100) if use_simulation else rob.set_phone_tilt(109, 100)
  42.  
  43. state_table = {}
  44. q_table_file = './src/state_table.json'
  45. if os.path.exists(q_table_file):
  46. with open(q_table_file) as g:
  47. state_table = json.load(g)
  48.  
  49. def get_sensor_info(direction):
  50. a = np.log(np.array(rob.read_irs())) / 10
  51. all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
  52. else np.array(np.log(rob.read_irs())) / 10
  53. all_sensor_info[all_sensor_info == inf] = 0
  54. all_sensor_info[all_sensor_info == -inf] = 0
  55. # [0, 1, 2, 3, 4, 5, 6, 7]
  56. if direction == 'front':
  57. return all_sensor_info[5]
  58. elif direction == 'back':
  59. return all_sensor_info[1]
  60. elif direction == 'front_left':
  61. return all_sensor_info[6]
  62. elif direction == 'front_left_left':
  63. return all_sensor_info[7]
  64. elif direction == 'front_right':
  65. return all_sensor_info[4]
  66. elif direction == 'front_right_right':
  67. return all_sensor_info[3]
  68. elif direction == 'back_left':
  69. return all_sensor_info[0]
  70. elif direction == 'back_right':
  71. return all_sensor_info[2]
  72. elif direction == 'all':
  73. print(all_sensor_info[3:])
  74. return all_sensor_info
  75. elif direction == 'front_3':
  76. return [all_sensor_info[3]] + [all_sensor_info[5]] + [all_sensor_info[7]]
  77. else:
  78. raise Exception('Invalid direction')
  79.  
  80. # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
  81. # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
  82. # s to a to r to s'.
  83. # Small steps for going left or right (left or right are only rotating and straight is going forward).
  84. # controller is the q values: the boundary for every sensor.
  85.  
  86. def move_left():
  87. rob.move(-speed, speed, dist)
  88.  
  89. def move_right():
  90. rob.move(speed, -speed, dist)
  91.  
  92. def go_straight():
  93. rob.move(speed, speed, dist)
  94.  
  95. def move_back():
  96. rob.move(-speed, -speed, dist)
  97.  
  98. boundary_sensor = [0.6, 0.8] if not use_simulation else [0.5, 0.95]
  99. boundaries_color = [0.1, 0.7] if not use_simulation else [0.05, 0.85]
  100.  
  101. # A static collision-avoidance policy
  102. def static_policy(color_info):
  103. max_c = np.max(color_info)
  104. if max_c == color_info[0]:
  105. return 1
  106. elif max_c == color_info[1]:
  107. return 0
  108. elif max_c == color_info[2]:
  109. return 2
  110. return 0
  111.  
  112. def epsilon_policy(s, epsilon):
  113. s = str(s)
  114. # epsilon greedy
  115. """"
  116. ACTIONS ARE DEFINED AS FOLLOWS:
  117. NUM: ACTION
  118. ------------
  119. 0: STRAIGHT
  120. 1: LEFT
  121. 2: RIGHT
  122. ------------
  123. """
  124. e = 0 if run_test else epsilon
  125. if e > random.random():
  126. return random.choice([0, 1, 2])
  127. else:
  128. return np.argmax(state_table[s])
  129.  
  130. def take_action(action):
  131. if action == 1:
  132. move_left()
  133. elif action == 2:
  134. move_right()
  135. elif action == 0:
  136. go_straight()
  137. # elif action == 'back':
  138. # move_back()
  139.  
  140. def get_color_info():
  141. image = rob.get_image_front()
  142.  
  143. # Mask function
  144. def get_red_pixels(img):
  145. hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  146. lower_range = np.array([0, 50, 20])
  147. upper_range = np.array([5, 255, 255])
  148. mask = cv2.inRange(hsv, lower_range, upper_range)
  149. # print(get_green_pixels(image))
  150. cv2.imwrite('a.png', mask)
  151. a = b = 0
  152. for i in mask:
  153. for j in i:
  154. b += 1
  155. if j == 255:
  156. a += 1
  157. return a / b
  158. # count = 0
  159. # pix_count = 0
  160. # b = 64
  161. # for i in range(len(img)):
  162. # for j in range(len(img[i])):
  163. # pixel = img[i][j]
  164. # pix_count += 1
  165. # if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  166. # or (pixel[0] > b*2 and pixel[1] > b*2 and pixel[2] > b*2):
  167. # # img[i][j] = [0, 0, 0]
  168. # count += 1
  169. # return 1 - (count / pix_count)
  170.  
  171. left, middle_l, middle_r, right = np.hsplit(image, 4)
  172. middle = np.concatenate((middle_l, middle_r), axis=1)
  173. return get_red_pixels(left), get_red_pixels(middle), get_red_pixels(right)
  174.  
  175. def get_reward(previous_state, new_state,
  176. previous_sensor, new_sensor,
  177. prev_action, action,
  178. prev_val, new_val):
  179.  
  180. # Max no. of green in img, before and after
  181. # 0: No green pixels in img; 1: All img consists of green pixels
  182.  
  183. prev_right, prev_mid, prev_left = prev_val
  184. sum_prev_val = sum(prev_val)
  185. new_right, new_mid, new_left = new_val
  186. sum_new_val = sum(new_val)
  187. max_new_sensor = np.max(new_sensor)
  188. max_prev_sensor = np.max(previous_sensor)
  189. max_c_prev = np.max(previous_state[3:])
  190. max_c_new = np.max(new_state[3:])
  191.  
  192. # Encourages going towards prey
  193. if max_c_prev == 0 and max_c_new == 1:
  194. return 10 if action == 0 else 0
  195.  
  196. # Massive payoff if we get super close to prey
  197. if max_c_prev == 1 and max_c_new == 2:
  198. return 30
  199. if max_c_prev == 2 == max_c_new == 2\
  200. and max_prev_sensor == 1:
  201. return 20
  202.  
  203. # Nothing happens if prey gets away
  204. if max_c_prev == 2 and max_c_new == 1:
  205. return 0
  206.  
  207. # Give good reward if we see more red than before
  208. if sum_prev_val < sum_new_val:
  209. return 10 if action == 0 else 0
  210.  
  211. # If sensors detect enemy, then give good payoff.
  212. # If sensors detect wall, give bad payoff to steer clear
  213. if max_new_sensor > max_prev_sensor:
  214. return 30 if max_c_new >= 1 else -5
  215.  
  216. # Give good payoff to encourage exploring (going straight)
  217. # Minor bad payoff for turning around, but not bad enough to discourage it
  218. return 1 if action == 0 else -1
  219.  
  220. # Returns list of values with discretized sensor values and color values.
  221. def make_discrete(values_s, boundary_s, values_c, boundaries_c):
  222. discrete_list_s = []
  223. discrete_list_c = []
  224.  
  225. for x in values_s:
  226. if boundary_s[0] > x:
  227. discrete_list_s.append(0)
  228. elif boundary_s[1] > x > boundary_s[0]:
  229. discrete_list_s.append(1)
  230. else:
  231. discrete_list_s.append(2)
  232. for y in values_c:
  233. if y < boundaries_c[0]:
  234. discrete_list_c.append(0)
  235. elif boundaries_c[0] < y < boundaries_c[1]:
  236. discrete_list_c.append(1)
  237. else:
  238. discrete_list_c.append(2)
  239. print('real c_values: ', values_c)
  240. return discrete_list_s + discrete_list_c
  241.  
  242. """
  243. REINFORCEMENT LEARNING PROCESS
  244. INPUT: alpha : learning rate
  245. gamma : discount factor
  246. epsilon : epsilon value for e-greedy
  247. episodes : no. of episodes
  248. act_lim : no. of actions robot takes before ending episode
  249. qL : True if you use Q-Learning
  250. """
  251. stat_fitness = list()
  252. stat_rewards = [0]
  253.  
  254. def normalize(reward, old_min, old_max, new_min=-1, new_max=1):
  255. return ((reward - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min
  256.  
  257. # def run_static(lim, no_blocks=0):
  258. # for i in range(lim):
  259. # if use_simulation:
  260. # rob.play_simulation()
  261. #
  262. # a, b, c = get_color_info()
  263. # current_color_info = a, b, c
  264. # current_sensor_info = get_sensor_info('front_3')
  265. #
  266. # current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, current_color_info,
  267. # boundaries_color)
  268. #
  269. # if str(current_state) not in state_table.keys():
  270. # state_table[str(current_state)] = [0 for _ in range(3)]
  271. #
  272. # a, b, c = get_color_info()
  273. # new_color_info = a, b, c
  274. # # print(a, b, c, new_color_info)
  275. #
  276. # action = static_policy(new_color_info)
  277. #
  278. # take_action(action)
  279. #
  280. # new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, new_color_info,
  281. # boundaries_color)
  282. # # TODO: make sure that current color info gets initialized the first time.
  283. # r = get_reward(current_state, new_state, action, current_color_info, new_color_info, no_blocks)
  284. # if r == 20:
  285. # no_blocks += 1
  286. #
  287. # norm_r = normalize(r, -30, 20)
  288. #
  289. # if i != 0:
  290. # stat_fitness.append(stat_fitness[-1] + (no_blocks / i))
  291. # else:
  292. # stat_fitness.append(float(0))
  293. # print(fitness)
  294. # if stat_rewards:
  295. # stat_rewards.append(stat_rewards[-1] + norm_r)
  296. # else:
  297. # rewards.append(norm_r)
  298. #
  299. # current_state = new_state
  300. # current_color_info = new_color_info
  301.  
  302. def rl(alpha, gamma, epsilon, episodes, act_lim, param_tuples, qL=False, no_blocks=0):
  303.  
  304. fitness = list()
  305. rewards = [0]
  306.  
  307. for i in range(episodes):
  308. print('Episode ' + str(i))
  309. terminate = False
  310. if use_simulation:
  311. rob.play_simulation()
  312.  
  313. current_color_space = get_color_info()
  314. current_sensor_info = get_sensor_info('front_3')
  315. current_state = make_discrete(current_sensor_info, boundary_sensor, current_color_space,
  316. boundaries_color)
  317.  
  318. if str(current_state) not in state_table.keys():
  319. state_table[str(current_state)] = [0 for _ in range(3)]
  320.  
  321. action = epsilon_policy(current_state, epsilon)
  322. # current_collected_food = rob.collected_food() if use_simulation else 0
  323. # initialise state if it doesn't exist, else retrieve the current q-value
  324. x = 0
  325. while not terminate:
  326.  
  327. take_action(action)
  328. # new_collected_food = rob.collected_food() if use_simulation else 0
  329.  
  330. # Whole img extracted to get reward value
  331. # left, mid, right extracted to save state space accordingly
  332.  
  333. new_color_space = get_color_info()
  334. new_sensor_info = get_sensor_info('front_3')
  335. new_state = make_discrete(new_sensor_info, boundary_sensor, new_color_space,
  336. boundaries_color)
  337.  
  338. if str(new_state) not in state_table.keys():
  339. state_table[str(new_state)] = [0 for _ in range(3)]
  340.  
  341. new_action = epsilon_policy(new_state, epsilon)
  342.  
  343. # Retrieve the max action if we use Q-Learning
  344. max_action = np.argmax(state_table[str(new_state)]) if qL else new_action
  345.  
  346. # Get reward
  347. r = get_reward(current_state, new_state,
  348. current_sensor_info, new_sensor_info,
  349. action, new_action,
  350. current_color_space, new_color_space)
  351. print("State and obtained Reward: ", new_state, r)
  352.  
  353. # norm_r = normalize(r, -30, 20)
  354. #
  355. # if i != 0:
  356. # fitness.append(no_blocks / i)
  357. # else:
  358. # fitness.append(float(0))
  359. # # print(fitness)
  360. # if rewards:
  361. # rewards.append(rewards[-1] + norm_r)
  362. # else:
  363. # rewards.append(norm_r)
  364.  
  365. # Update rule
  366. if not run_test:
  367. # print('update')
  368. state_table[str(current_state)][action] += \
  369. alpha * (r + (gamma *
  370. np.array(
  371. state_table[str(new_state)][max_action]))
  372. - np.array(state_table[str(current_state)][action]))
  373.  
  374. # Stop episode if we get very close to an obstacle
  375. if (max(new_state[:3]) == 2 and max(new_state[3:]) != 2 and use_simulation) or x == act_lim - 1:
  376. state_table[str(new_state)][new_action] = -10
  377. terminate = True
  378. print("done")
  379. if not run_test:
  380. print('writing json')
  381. with open(q_table_file, 'w') as json_file:
  382. json.dump(state_table, json_file)
  383.  
  384. if use_simulation:
  385. print("stopping the simulation")
  386. rob.stop_world()
  387. while not rob.is_sim_stopped():
  388. print("waiting for the simulation to stop")
  389. time.sleep(2)
  390.  
  391. # update current state and action
  392. current_state = new_state
  393. current_sensor_info = new_sensor_info
  394. action = new_action
  395. current_color_space = new_color_space
  396.  
  397. # increment action limit counter
  398. x += 1
  399.  
  400. return fitness, rewards
  401.  
  402. experiments = 2
  403.  
  404. epsilons = [0.01, 0.08, 0.22]
  405. gammas = [0.9]
  406. param_tuples = [(epsilon, gamma) for epsilon in epsilons for gamma in gammas]
  407. _, _ = rl(0.9, 0, 0, 30, 500, [()],
  408. qL=True) # alpha, gamma, epsilon, episodes, actions per episode
  409. # RL_fitnesses = {key: np.zeros(eps) for key in param_tuples}
  410. # for epsilon, gamma in param_tuples:
  411. #
  412. # for run in range(experiments):
  413. # print('======= RUNNING FOR epsilon ' + str(epsilon) + ' and gamma ' + str(gamma),
  414. # ' , this is run ' + str(run))
  415. # fitness, rewards = rl(0.9, gamma, epsilon, eps, timesteps, param_tuples,
  416. # qL=True) # alpha, gamma, epsilon, episodes, actions per episode
  417. #
  418. # # RL_fitnesses[(epsilon, gamma)][0] = fitness
  419. #
  420. # # if not run_test:
  421. # # if os.path.exists('./src/rewards.csv'):
  422. # # with open('./src/rewards.csv') as f:
  423. # # all_rewards = pickle.load(f)
  424. # #
  425. # # if os.path.exists('./src/fitness.csv'):
  426. # # with open('./src/fitness.csv') as f:
  427. # # all_fits = pickle.load(f)
  428. # file_name_rewards = './src/rewards_epsilon' + str(epsilon) + '_run' + str(run) + '.csv'
  429. # with open(file_name_rewards, 'wb') as f:
  430. # pickle.dump(rewards, f)
  431. #
  432. # file_name_fitness = './src/fitnesss_epsilon' + str(epsilon) + '_run' + str(run) + '.csv'
  433. # with open(file_name_fitness, 'wb') as f:
  434. # pickle.dump(fitness, f)
  435. #
  436. # # with open('./src/stat_rewards.csv', 'w') as f:
  437. # # pickle.dump(stat_rewards, f)
  438. # #
  439. #
  440. # # with open('./src/stat_fitness.csv', 'w') as f:
  441. # # pickle.dump(stat_fitness, f)
  442. # #
  443. #
  444. # # plt.figure('Fitness Values')
  445. #
  446.  
  447.  
  448. # def image_test():
  449. #
  450. # def get_green_pixels(img):
  451. # count = 0
  452. # pix_count = 0
  453. # b = 64
  454. # for i in range(len(img)):
  455. # for j in range(len(img[i])):
  456. # pixel = img[i][j]
  457. # pix_count += 1
  458. # if (pixel[0] > b/4 or pixel[2] > b/4) and pixel[1] < b/2 \
  459. # or (pixel[0] > b/8 and pixel[1] > b/8 and pixel[2] > b/8):
  460. # img[i][j] = [0, 0, 0]
  461. # count += 1
  462. # return 1 - (count / pix_count)
  463. # s = 15
  464. # #image = rob.get_image_front()
  465. # image = cv2.imread('img.png')
  466. # hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
  467. # lower_range = np.array([36, 0, 0])
  468. # upper_range = np.array([86, 255, 255])
  469. # mask = cv2.inRange(hsv, lower_range, upper_range)
  470. # pprint(mask)
  471. # # print(get_green_pixels(image))
  472. # cv2.imwrite('a.png', mask)
  473. # a=b=0
  474. # for i in mask:
  475. # for j in i:
  476. # b += 1
  477. # if j == 255:
  478. # a += 1
  479. # return a/b
  480.  
  481.  
  482. if __name__ == "__main__":
  483. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement