Advertisement
Guest User

Nice code :)

a guest
Jan 23rd, 2020
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.16 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. from __future__ import print_function
  3. from __future__ import absolute_import, division, print_function, unicode_literals
  4. import matplotlib
  5.  
  6. matplotlib.use('Agg')
  7. import os
  8. import time
  9. import numpy as np
  10. from numpy import inf, random
  11. import matplotlib.pyplot as plt
  12. import pickle
  13. import json
  14. import robobo
  15. import cv2
  16. import sys
  17. import signal
  18. from pprint import pprint
  19. import prey
  20.  
  21. import collections
  22.  
  23. use_simulation = True
  24. run_test = False
  25. speed = 20 if use_simulation else 30
  26. dist = 500 if use_simulation else 400
  27. rewards = [0]
  28. fitness = [0]
  29.  
  30.  
  31. def terminate_program(signal_number, frame):
  32. print("Ctrl-C received, terminating program")
  33. sys.exit(1)
  34.  
  35.  
  36. def main():
  37. signal.signal(signal.SIGINT, terminate_program)
  38.  
  39. rob = robobo.SimulationRobobo().connect(address='145.108.72.23', port=19997) if use_simulation \
  40. else robobo.HardwareRobobo(camera=True).connect(address="10.15.3.48")
  41. rob.set_phone_tilt(45, 100)
  42.  
  43. def get_sensor_info(direction):
  44. a = np.log(np.array(rob.read_irs())) / 10
  45. all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
  46. else np.array(np.log(rob.read_irs())) / 10
  47. all_sensor_info[all_sensor_info == inf] = 0
  48. all_sensor_info[all_sensor_info == -inf] = 0
  49. # [0, 1, 2, 3, 4, 5, 6, 7]
  50. if direction == 'front':
  51. return all_sensor_info[5]
  52. elif direction == 'back':
  53. return all_sensor_info[1]
  54. elif direction == 'front_left':
  55. return all_sensor_info[6]
  56. elif direction == 'front_left_left':
  57. return all_sensor_info[7]
  58. elif direction == 'front_right':
  59. return all_sensor_info[4]
  60. elif direction == 'front_right_right':
  61. return all_sensor_info[3]
  62. elif direction == 'back_left':
  63. return all_sensor_info[0]
  64. elif direction == 'back_right':
  65. return all_sensor_info[2]
  66. elif direction == 'all':
  67. print(all_sensor_info[3:])
  68. return all_sensor_info
  69. elif direction == 'front_3':
  70. return [all_sensor_info[3]] + [all_sensor_info[5]] + [all_sensor_info[7]]
  71. else:
  72. raise Exception('Invalid direction')
  73.  
  74. # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
  75. # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
  76. # s to a to r to s'.
  77. # Small steps for going left or right (left or right are only rotating and straight is going forward).
  78. # controller is the q values: the boundary for every sensor.
  79.  
  80. def move_left():
  81. rob.move(-speed, speed, dist)
  82.  
  83. def move_right():
  84. rob.move(speed, -speed, dist)
  85.  
  86. def go_straight():
  87. rob.move(speed, speed, dist)
  88.  
  89. def move_back():
  90. rob.move(-speed, -speed, dist)
  91.  
  92. boundary_sensor = [0.6, 0.8] if not use_simulation else [0.5, 0.95]
  93. boundaries_color = [0.1, 0.7] if not use_simulation else [0.05, 0.85]
  94.  
  95. # A static collision-avoidance policy
  96. def static_policy(color_info):
  97. max_c = np.max(color_info)
  98. if max_c == color_info[0]:
  99. return 1
  100. elif max_c == color_info[1]:
  101. return 0
  102. elif max_c == color_info[2]:
  103. return 2
  104. return 0
  105.  
  106. state_table = {}
  107. if os.path.exists('./src/state_table.json'):
  108. with open('./src/state_table.json') as f:
  109. state_table = json.load(f)
  110.  
  111. def epsilon_policy(s, epsilon):
  112. s = str(s)
  113. # epsilon greedy
  114. """"
  115. ACTIONS ARE DEFINED AS FOLLOWS:
  116. NUM: ACTION
  117. ------------
  118. 0: STRAIGHT
  119. 1: LEFT
  120. 2: RIGHT
  121. ------------
  122. """
  123. e = 0 if run_test else epsilon
  124. if e > random.random():
  125. return random.choice([0, 1, 2])
  126. else:
  127. return np.argmax(state_table[s])
  128.  
  129. def take_action(action):
  130. if action == 1:
  131. move_left()
  132. elif action == 2:
  133. move_right()
  134. elif action == 0:
  135. go_straight()
  136. # elif action == 'back':
  137. # move_back()
  138.  
  139. def get_color_info():
  140. image = rob.get_image_front()
  141.  
  142. # Mask function
  143. def get_green_pixels(img):
  144. count = 0
  145. pix_count = 0
  146. b = 64
  147. for i in range(len(img)):
  148. for j in range(len(img[i])):
  149. pixel = img[i][j]
  150. pix_count += 1
  151. if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  152. or (pixel[0] > b and pixel[1] > b and pixel[2] > b):
  153. # img[i][j] = [0, 0, 0]
  154. count += 1
  155. return 1 - (count / pix_count)
  156.  
  157. left, middle_l, middle_r, right = np.hsplit(image, 4)
  158. middle = np.concatenate((middle_l, middle_r), axis=1)
  159. return get_green_pixels(left), get_green_pixels(middle), get_green_pixels(right)
  160.  
  161. def get_reward(previous_state, new_state,
  162. previous_sensor, new_sensor,
  163. prev_action, action,
  164. prev_val, new_val,
  165. prev_food, new_food):
  166.  
  167. # Max no. of green in img, before and after
  168. # 0: No green pixels in img; 1: All img consists of green pixels
  169.  
  170. prev_right, prev_mid, prev_left = prev_val
  171. new_right, new_mid, new_left = new_val
  172. max_new_sensor = np.max(new_sensor)
  173. max_prev_sensor = np.max(previous_sensor)
  174. max_c_prev = np.max(previous_state[3:])
  175. max_c_new = np.max(new_state[3:])
  176.  
  177. # TODO Check which condition to keep
  178. if max_c_new == 2:
  179. print("30")
  180. return 30
  181. if prev_mid < new_mid:
  182. return 10 if action == 0 else -1
  183. l = prev_left < new_left
  184. r = prev_right < new_right
  185. if l or r:
  186. if l:
  187. if action == 1:
  188. return 10 if not r else -2
  189. elif r:
  190. if action == 2:
  191. return 10 if not l else -2
  192. else:
  193. return 5
  194. if max_new_sensor > boundary_sensor[1]:
  195. return -100
  196. # if max_prev_sensor < max_new_sensor:
  197. # print('here')
  198. # return -2
  199. # The else is when it doesn't see anything, maybe we should add penalizing in one of the other if statements
  200. # as well for going left or right, but not sure
  201.  
  202. if (action == 1 and prev_action == 2) or (action == 2 and prev_action == 1):
  203. return -5
  204.  
  205. return 1 if action == 0 else -1
  206. # TODO give negative reward for repetitions
  207.  
  208. # Returns list of values with discretized sensor values and color values.
  209. def make_discrete(values_s, boundary_s, values_c, boundaries_c):
  210. discrete_list_s = []
  211. discrete_list_c = []
  212.  
  213. for x in values_s:
  214. if boundary_s[0] > x:
  215. discrete_list_s.append(0)
  216. elif boundary_s[1] > x > boundary_s[0]:
  217. discrete_list_s.append(1)
  218. else:
  219. discrete_list_s.append(2)
  220. for y in values_c:
  221. if y < boundaries_c[0]:
  222. discrete_list_c.append(0)
  223. elif boundaries_c[0] < y < boundaries_c[1]:
  224. discrete_list_c.append(1)
  225. else:
  226. discrete_list_c.append(2)
  227. print('real c_values: ', values_c)
  228. return discrete_list_s + discrete_list_c
  229.  
  230. """
  231. REINFORCEMENT LEARNING PROCESS
  232. INPUT: alpha : learning rate
  233. gamma : discount factor
  234. epsilon : epsilon value for e-greedy
  235. episodes : no. of episodes
  236. act_lim : no. of actions robot takes before ending episode
  237. qL : True if you use Q-Learning
  238. """
  239. stat_fitness = list()
  240. stat_rewards = [0]
  241.  
  242. def normalize(reward, old_min, old_max, new_min=-1, new_max=1):
  243. return ((reward - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min
  244.  
  245. def run_static(lim, no_blocks=0):
  246. for i in range(lim):
  247. if use_simulation:
  248. rob.play_simulation()
  249.  
  250. a, b, c = get_color_info()
  251. current_color_info = a, b, c
  252. current_sensor_info = get_sensor_info('front_3')
  253.  
  254. current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, current_color_info,
  255. boundaries_color)
  256.  
  257. if str(current_state) not in state_table.keys():
  258. state_table[str(current_state)] = [0 for _ in range(3)]
  259.  
  260. a, b, c = get_color_info()
  261. new_color_info = a, b, c
  262. # print(a, b, c, new_color_info)
  263.  
  264. action = static_policy(new_color_info)
  265.  
  266. take_action(action)
  267.  
  268. new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, new_color_info,
  269. boundaries_color)
  270. # TODO: make sure that current color info gets initialized the first time.
  271. r = get_reward(current_state, new_state, action, current_color_info, new_color_info)
  272. if r == 20:
  273. no_blocks += 1
  274.  
  275. norm_r = normalize(r, -30, 20)
  276.  
  277. if i != 0:
  278. stat_fitness.append(stat_fitness[-1] + (no_blocks / i))
  279. else:
  280. stat_fitness.append(float(0))
  281. print(fitness)
  282. if stat_rewards:
  283. stat_rewards.append(stat_rewards[-1] + norm_r)
  284. else:
  285. rewards.append(norm_r)
  286.  
  287. current_state = new_state
  288. current_color_info = new_color_info
  289.  
  290. def rl(alpha, gamma, epsilon, episodes, act_lim, qL=False, no_blocks=0):
  291. for i in range(episodes):
  292. print('Episode ' + str(i))
  293. terminate = False
  294. if use_simulation:
  295. rob.play_simulation()
  296.  
  297. current_color_space = get_color_info()
  298. current_sensor_info = get_sensor_info('front_3')
  299. current_state = make_discrete(current_sensor_info, boundary_sensor, current_color_space,
  300. boundaries_color)
  301.  
  302. if str(current_state) not in state_table.keys():
  303. state_table[str(current_state)] = [0 for _ in range(3)]
  304.  
  305. action = epsilon_policy(current_state, epsilon)
  306. current_collected_food = rob.collected_food()
  307. # initialise state if it doesn't exist, else retrieve the current q-value
  308. x = 0
  309. while not terminate:
  310.  
  311. take_action(action)
  312. new_collected_food = rob.collected_food()
  313.  
  314. # Whole img extracted to get reward value
  315. # left, mid, right extracted to save state space accordingly
  316.  
  317. new_color_space = get_color_info()
  318. new_sensor_info = get_sensor_info('front_3')
  319. new_state = make_discrete(new_sensor_info, boundary_sensor, new_color_space,
  320. boundaries_color)
  321.  
  322. if str(new_state) not in state_table.keys():
  323. state_table[str(new_state)] = [0 for _ in range(3)]
  324.  
  325. new_action = epsilon_policy(new_state, epsilon)
  326.  
  327. # Retrieve the max action if we use Q-Learning
  328. max_action = np.argmax(state_table[str(new_state)]) if qL else new_action
  329.  
  330. # Get reward
  331. r = get_reward(current_state, new_state,
  332. current_sensor_info, new_sensor_info,
  333. action, new_action,
  334. current_color_space, new_color_space,
  335. current_collected_food, new_collected_food)
  336. print("State and obtained Reward: ", new_state, r)
  337.  
  338. norm_r = normalize(r, -30, 20)
  339.  
  340. if i != 0:
  341. stat_fitness.append(stat_fitness[-1] + (no_blocks / i))
  342. else:
  343. stat_fitness.append(float(0))
  344. # print(fitness)
  345. if rewards:
  346. rewards.append(rewards[-1] + norm_r)
  347. else:
  348. rewards.append(norm_r)
  349.  
  350. # Update rule
  351. if not run_test:
  352. # print('update')
  353. state_table[str(current_state)][action] += \
  354. alpha * (r + (gamma *
  355. np.array(
  356. state_table[str(new_state)][max_action]))
  357. - np.array(state_table[str(current_state)][action]))
  358.  
  359. # Stop episode if we get very close to an obstacle
  360. if (max(new_state[:3]) == 2 and max(new_state[3:]) != 2 and use_simulation) or x == act_lim - 1:
  361. state_table[str(new_state)][new_action] = -10
  362. terminate = True
  363. print("done")
  364. if not run_test:
  365. print('writing json')
  366. with open('./src/state_table.json', 'w') as json_file:
  367. json.dump(state_table, json_file)
  368.  
  369. if use_simulation:
  370. print("stopping the simulation")
  371. rob.stop_world()
  372. while not rob.is_sim_stopped():
  373. print("waiting for the simulation to stop")
  374. time.sleep(2)
  375.  
  376. # update current state and action
  377. current_state = new_state
  378. current_sensor_info = new_sensor_info
  379. action = new_action
  380. current_color_space = new_color_space
  381. current_collected_food = new_collected_food
  382. # img = new_img
  383.  
  384. # increment action limit counter
  385. x += 1
  386.  
  387. # alpha, gamma, epsilon, episodes, actions per episode
  388. # run_static(200)
  389. rl(0.9, 0.9, 0.08, 50, 250, qL=True)
  390.  
  391. pprint(state_table)
  392.  
  393. if run_test:
  394. all_rewards = []
  395. all_fits = []
  396. if os.path.exists('./src/rewards.csv'):
  397. with open('./src/rewards.csv') as f:
  398. all_rewards = pickle.load(f)
  399.  
  400. if os.path.exists('./src/fitness.csv'):
  401. with open('./src/fitness.csv') as f:
  402. all_fits = pickle.load(f)
  403.  
  404. all_rewards += rewards
  405. all_fits += fitness
  406.  
  407. # print(all_rewards)
  408. # print(all_fits)
  409.  
  410. # with open('./src/rewards.csv', 'w') as f:
  411. # pickle.dump(all_rewards, f)
  412. #
  413. # with open('./src/fitness.csv', 'w') as f:
  414. # pickle.dump(all_fits, f)
  415. #
  416. # with open('./src/stat_rewards.csv', 'w') as f:
  417. # pickle.dump(stat_rewards, f)
  418. #
  419.  
  420. # with open('./src/stat_fitness.csv', 'w') as f:
  421. # pickle.dump(stat_fitness, f)
  422. #
  423. # plt.figure('Rewards')
  424. # plt.plot(all_rewards, label='Q-Learning Controller')
  425. # plt.plot(stat_rewards, label='Static Controller')
  426. # plt.legend()
  427. # plt.savefig("./src/plot_reward.png")
  428. # plt.show()
  429. #
  430. # plt.figure('Fitness Values')
  431. # plt.plot(all_fits, label='Q-Learning Controller')
  432. # plt.plot(stat_fitness, label='Static Controller')
  433. # plt.legend()
  434. # plt.savefig("./src/plot_fitness.png")
  435. # plt.show()
  436.  
  437.  
  438. def image_test():
  439. signal.signal(signal.SIGINT, terminate_program)
  440. rob = robobo.SimulationRobobo().connect(address='192.168.1.2', port=19997) if use_simulation \
  441. else robobo.HardwareRobobo(camera=True).connect(address="172.20.10.11")
  442. if use_simulation:
  443. rob.play_simulation()
  444. # rob.set_phone_tilt(109, 90)
  445. image = rob.get_image_front()
  446.  
  447. rob.move(50, -50, 1000)
  448.  
  449. def get_green_pixels(img):
  450. count = 0
  451. pix_count = 0
  452. b = 64
  453. for i in range(len(img)):
  454. for j in range(len(img[i])):
  455. pixel = img[i][j]
  456. pix_count += 1
  457. if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  458. or (pixel[0] > b and pixel[1] > b and pixel[2] > b):
  459. img[i][j] = [0, 0, 0]
  460. count += 1
  461. return 1 - (count / pix_count)
  462.  
  463. # rob.set_phone_tilt(109, 100)
  464. # image = rob.get_image_front()
  465. # print(image)
  466.  
  467. left, middle_l, middle_r, right = np.hsplit(image, 4)
  468. middle = np.concatenate((middle_l, middle_r), axis=1)
  469.  
  470. x = rob.collected_food()
  471. pprint(x)
  472. # print([get_green_pixels(left), get_green_pixels(middle), get_green_pixels(right)])
  473. print(np.average(get_green_pixels(image)))
  474.  
  475. if use_simulation:
  476. print("stopping the simulation")
  477. rob.stop_world()
  478. while not rob.is_sim_stopped():
  479. print("waiting for the simulation to stop")
  480. time.sleep(2)
  481.  
  482.  
  483. if __name__ == "__main__":
  484. main()
  485. # image_test()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement