Advertisement
Guest User

New fitness, let's give it a try

a guest
Jan 29th, 2020
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.06 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. from __future__ import print_function
  3. from __future__ import absolute_import, division, print_function, unicode_literals
  4. import os
  5. import time
  6. import numpy as np
  7. from numpy import inf, random
  8. import pickle
  9. import json
  10. import robobo
  11. import cv2
  12. import sys
  13. import signal
  14. from pprint import pprint
  15. import prey
  16.  
  17. import collections
  18.  
  19. use_simulation = True
  20. run_test = False
  21. speed = 20 if use_simulation else 50
  22. dist = 500 if use_simulation else 400
  23. rewards = [0]
  24. fitness = list()
  25. MIN_REWARD = -2.5
  26. MAX_REWARD = 30
  27. MIN_TIMESTEPS = 0
  28. MAX_TIMESTEPS = 200
  29.  
  30.  
  31. def terminate_program(signal_number, frame):
  32. print("Ctrl-C received, terminating program")
  33. sys.exit(1)
  34.  
  35.  
  36. def main():
  37. signal.signal(signal.SIGINT, terminate_program)
  38.  
  39. virtual_ip = '145.108.234.105'
  40. robot_ip = '10.15.3.48'
  41.  
  42. rob = robobo.SimulationRobobo().connect(address=virtual_ip, port=19997) if use_simulation \
  43. else robobo.HardwareRobobo(camera=True).connect(address=robot_ip)
  44. rob.set_phone_tilt(45, 100) if use_simulation else rob.set_phone_tilt(100, 100)
  45.  
  46. state_table = {}
  47. q_table_file = './src/state_table_trustworthy.json'
  48. if os.path.exists(q_table_file):
  49. with open(q_table_file) as g:
  50. state_table = json.load(g)
  51.  
  52. def get_sensor_info(direction):
  53. a = np.log(np.array(rob.read_irs())) / 10
  54. all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
  55. else np.array(np.log(rob.read_irs())) / 10
  56. all_sensor_info[all_sensor_info == inf] = 0
  57. all_sensor_info[all_sensor_info == -inf] = 0
  58. # [0, 1, 2, 3, 4, 5, 6, 7]
  59. if direction == 'front':
  60. return all_sensor_info[5]
  61. elif direction == 'back':
  62. return all_sensor_info[1]
  63. elif direction == 'front_left':
  64. return all_sensor_info[6]
  65. elif direction == 'front_left_left':
  66. return all_sensor_info[7]
  67. elif direction == 'front_right':
  68. return all_sensor_info[4]
  69. elif direction == 'front_right_right':
  70. return all_sensor_info[3]
  71. elif direction == 'back_left':
  72. return all_sensor_info[0]
  73. elif direction == 'back_right':
  74. return all_sensor_info[2]
  75. elif direction == 'all':
  76. print(all_sensor_info[3:])
  77. return all_sensor_info
  78. elif direction == 'front_3':
  79. return [all_sensor_info[3]] + [all_sensor_info[5]] + [all_sensor_info[7]]
  80. else:
  81. raise Exception('Invalid direction')
  82.  
  83. # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
  84. # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
  85. # s to a to r to s'.
  86. # Small steps for going left or right (left or right are only rotating and straight is going forward).
  87. # controller is the q values: the boundary for every sensor.
  88.  
  89. def move_left():
  90. rob.move(-speed, speed, dist)
  91.  
  92. def move_right():
  93. rob.move(speed, -speed, dist)
  94.  
  95. def go_straight():
  96. rob.move(speed, speed, dist)
  97.  
  98. def move_back():
  99. rob.move(-speed, -speed, dist)
  100.  
  101. boundary_sensor = [0.6, 0.8] if not use_simulation else [0.5, 0.95]
  102. boundaries_color = [0.01, 0.2] if not use_simulation else [0.001, 0.4]
  103.  
  104. # A static collision-avoidance policy
  105. def static_policy(color_info):
  106. max_c = np.max(color_info)
  107. if max_c == color_info[0]:
  108. return 1
  109. elif max_c == color_info[1]:
  110. return 0
  111. elif max_c == color_info[2]:
  112. return 2
  113. return 0
  114.  
  115. def epsilon_policy(s, epsilon):
  116. s = str(s)
  117. # epsilon greedy
  118. """"
  119. ACTIONS ARE DEFINED AS FOLLOWS:
  120. NUM: ACTION
  121. ------------
  122. 0: STRAIGHT
  123. 1: LEFT
  124. 2: RIGHT
  125. ------------
  126. """
  127. e = 0 if run_test else epsilon
  128. if e > random.random():
  129. return random.choice([0, 1, 2])
  130. else:
  131. return np.argmax(state_table[s])
  132.  
  133. def take_action(action):
  134. if action == 1:
  135. move_left()
  136. elif action == 2:
  137. move_right()
  138. elif action == 0:
  139. go_straight()
  140. # elif action == 'back':
  141. # move_back()
  142.  
  143. def get_color_info():
  144. image = rob.get_image_front()
  145.  
  146. # Mask function
  147. def get_red_pixels(img):
  148. hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  149. lower_range = np.array([0, 50, 20])
  150. upper_range = np.array([5, 255, 255])
  151. mask = cv2.inRange(hsv, lower_range, upper_range)
  152. # print(get_green_pixels(image))
  153. cv2.imwrite('a.png', mask)
  154. a = b = 0
  155. for i in mask:
  156. for j in i:
  157. b += 1
  158. if j == 255:
  159. a += 1
  160. return a / b
  161. # count = 0
  162. # pix_count = 0
  163. # b = 64
  164. # for i in range(len(img)):
  165. # for j in range(len(img[i])):
  166. # pixel = img[i][j]
  167. # pix_count += 1
  168. # if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
  169. # or (pixel[0] > b*2 and pixel[1] > b*2 and pixel[2] > b*2):
  170. # # img[i][j] = [0, 0, 0]
  171. # count += 1
  172. # return 1 - (count / pix_count)
  173.  
  174. left, middle_l, middle_r, right = np.hsplit(image, 4)
  175. middle = np.concatenate((middle_l, middle_r), axis=1)
  176. return get_red_pixels(left), get_red_pixels(middle), get_red_pixels(right)
  177.  
  178. def get_reward(previous_state, new_state,
  179. previous_sensor, new_sensor,
  180. prev_action, action,
  181. prev_val, new_val):
  182.  
  183. # Max no. of green in img, before and after
  184. # 0: No green pixels in img; 1: All img consists of green pixels
  185.  
  186. prev_right, prev_mid, prev_left = prev_val
  187. sum_prev_val = sum(prev_val)
  188. new_right, new_mid, new_left = new_val
  189. sum_new_val = sum(new_val)
  190. max_new_sensor = np.max(new_sensor)
  191. max_prev_sensor = np.max(previous_sensor)
  192. max_c_prev = np.max(previous_state[3:])
  193. max_c_new = np.max(new_state[3:])
  194.  
  195. # Encourages going towards prey
  196. if max_c_prev == 0 and max_c_new == 1:
  197. return 10 if action == 0 else 2
  198.  
  199. # Massive payoff if we get super close to prey
  200. if max_c_prev == 1 and max_c_new == 2:
  201. return 30
  202.  
  203. # Nothing happens if prey gets a little away
  204. if max_c_prev == 2 and max_c_new == 1:
  205. return 0
  206.  
  207. # A LOT happens when prey is not in sight
  208. if max_c_prev == 1 and max_c_new == 0:
  209. return -3
  210.  
  211. # Give good reward if we see more red than before
  212. if sum_prev_val < sum_new_val:
  213. return 5 if action == 0 else 0
  214.  
  215. # If sensors detect enemy, then give good payoff.
  216. # If sensors detect wall, give bad payoff to steer clear
  217. if max_new_sensor > max_prev_sensor:
  218. return 15 if max_c_new >= 1 else -3
  219.  
  220. if (prev_action == 1 and action == 2)\
  221. or (prev_action == 2 and action == 1)\
  222. and max_new_sensor == 0:
  223. return -5
  224.  
  225. # if prev_action != 0 or action != 0:
  226. # return -5
  227.  
  228. # Give good payoff to encourage exploring (going straight)
  229. # Minor bad payoff for turning around, but not bad enough to discourage it
  230. return 0 if action == 0 else -1
  231.  
  232. # Returns list of values with discretized sensor values and color values.
  233. def make_discrete(values_s, boundary_s, values_c, boundaries_c):
  234. discrete_list_s = []
  235. discrete_list_c = []
  236.  
  237. for x in values_s:
  238. if boundary_s[0] > x:
  239. discrete_list_s.append(0)
  240. elif boundary_s[1] > x > boundary_s[0]:
  241. discrete_list_s.append(1)
  242. else:
  243. discrete_list_s.append(2)
  244. for y in values_c:
  245. if y < boundaries_c[0]:
  246. discrete_list_c.append(0)
  247. elif boundaries_c[0] < y < boundaries_c[1]:
  248. discrete_list_c.append(1)
  249. else:
  250. discrete_list_c.append(2)
  251. print('real c_values: ', values_c)
  252. return discrete_list_s + discrete_list_c
  253.  
  254. """
  255. REINFORCEMENT LEARNING PROCESS
  256. INPUT: alpha : learning rate
  257. gamma : discount factor
  258. epsilon : epsilon value for e-greedy
  259. episodes : no. of episodes
  260. act_lim : no. of actions robot takes before ending episode
  261. qL : True if you use Q-Learning
  262. """
  263. stat_fitness = list()
  264. stat_rewards = [0]
  265.  
  266. def normalize(reward, old_min, old_max, new_min=-1, new_max=1):
  267. return ((reward - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min
  268.  
  269. # def run_static(lim, no_blocks=0):
  270. # for i in range(lim):
  271. # if use_simulation:
  272. # rob.play_simulation()
  273. #
  274. # a, b, c = get_color_info()
  275. # current_color_info = a, b, c
  276. # current_sensor_info = get_sensor_info('front_3')
  277. #
  278. # current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, current_color_info,
  279. # boundaries_color)
  280. #
  281. # if str(current_state) not in state_table.keys():
  282. # state_table[str(current_state)] = [0 for _ in range(3)]
  283. #
  284. # a, b, c = get_color_info()
  285. # new_color_info = a, b, c
  286. # # print(a, b, c, new_color_info)
  287. #
  288. # action = static_policy(new_color_info)
  289. #
  290. # take_action(action)
  291. #
  292. # new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, new_color_info,
  293. # boundaries_color)
  294. # # TODO: make sure that current color info gets initialized the first time.
  295. # r = get_reward(current_state, new_state, action, current_color_info, new_color_info, no_blocks)
  296. # if r == 20:
  297. # no_blocks += 1
  298. #
  299. # norm_r = normalize(r, -30, 20)
  300. #
  301. # if i != 0:
  302. # stat_fitness.append(stat_fitness[-1] + (no_blocks / i))
  303. # else:
  304. # stat_fitness.append(float(0))
  305. # print(fitness)
  306. # if stat_rewards:
  307. # stat_rewards.append(stat_rewards[-1] + norm_r)
  308. # else:
  309. # rewards.append(norm_r)
  310. #
  311. # current_state = new_state
  312. # current_color_info = new_color_info
  313.  
  314. def rl(alpha, gamma, epsilon, episodes, act_lim, qL=False):
  315.  
  316. fitness = list()
  317. rewards = [0]
  318.  
  319. for i in range(episodes):
  320. print('Episode ' + str(i))
  321. terminate = False
  322. if use_simulation:
  323. rob.play_simulation()
  324. prey_robot = robobo.SimulationRoboboPrey().connect(address=virtual_ip, port=19989)
  325. prey_controller = prey.Prey(robot=prey_robot, level=2)
  326. prey_controller.start()
  327. current_color_space = get_color_info()
  328. current_sensor_info = get_sensor_info('front_3')
  329. current_state = make_discrete(current_sensor_info, boundary_sensor, current_color_space,
  330. boundaries_color)
  331.  
  332. if str(current_state) not in state_table.keys():
  333. state_table[str(current_state)] = [0 for _ in range(3)]
  334.  
  335. action = epsilon_policy(current_state, epsilon)
  336. x = 0
  337. while not terminate:
  338.  
  339. take_action(action)
  340. # new_collected_food = rob.collected_food() if use_simulation else 0
  341.  
  342. # Whole img extracted to get reward value
  343. # left, mid, right extracted to save state space accordingly
  344.  
  345. new_color_space = get_color_info()
  346. new_sensor_info = get_sensor_info('front_3')
  347. new_state = make_discrete(new_sensor_info, boundary_sensor, new_color_space,
  348. boundaries_color)
  349.  
  350. if str(new_state) not in state_table.keys():
  351. state_table[str(new_state)] = [0 for _ in range(3)]
  352.  
  353. new_action = epsilon_policy(new_state, epsilon)
  354.  
  355. # Retrieve the max action if we use Q-Learning
  356. max_action = np.argmax(state_table[str(new_state)]) if qL else new_action
  357.  
  358. # Get reward
  359. r = get_reward(current_state, new_state,
  360. current_sensor_info, new_sensor_info,
  361. action, new_action,
  362. current_color_space, new_color_space)
  363. print("State and obtained Reward: ", new_state, r)
  364.  
  365. norm_r = normalize(r, MIN_REWARD, MAX_REWARD)
  366. norm_steps = normalize(x, MIN_TIMESTEPS, MAX_TIMESTEPS)
  367. fitness.append(norm_r / norm_steps+1)
  368.  
  369. # Update rule
  370. if not run_test:
  371. # print('update')
  372. state_table[str(current_state)][action] += \
  373. alpha * (r + (gamma *
  374. np.array(
  375. state_table[str(new_state)][max_action]))
  376. - np.array(state_table[str(current_state)][action]))
  377.  
  378. # Stop episode if we get very close to an obstacle
  379. if (max(new_state[:3]) == 2 and max(new_state[3:]) != 2 and use_simulation) or x == act_lim - 1:
  380. state_table[str(new_state)][new_action] = -10
  381. terminate = True
  382. print("done")
  383. if not run_test:
  384. print('writing json')
  385. with open(q_table_file, 'w') as json_file:
  386. json.dump(state_table, json_file)
  387.  
  388. if use_simulation:
  389. print("stopping the simulation")
  390. prey_controller.stop()
  391. prey_controller.join()
  392. prey_robot.disconnect()
  393. rob.stop_world()
  394. while not rob.is_sim_stopped():
  395. print("waiting for the simulation to stop")
  396. time.sleep(2)
  397.  
  398. # update current state and action
  399. current_state = new_state
  400. current_sensor_info = new_sensor_info
  401. action = new_action
  402. current_color_space = new_color_space
  403.  
  404. # increment action limit counter
  405. x += 1
  406.  
  407. return fitness, rewards
  408.  
  409.  
  410. # epsilons = [0.01, 0.08, 0.22]
  411. # gammas = [0.9]
  412. # param_tuples = [(epsilon, gamma) for epsilon in epsilons for gamma in gammas]
  413. experiments = 1 if not run_test else 1
  414. actions = MAX_TIMESTEPS if not run_test else 10000
  415. eps = 2 if not run_test else 1
  416. epsilons = [0.08]
  417. for epsilon in epsilons:
  418.  
  419. for run in range(experiments):
  420. print('======= RUNNING FOR epsilon ', epsilon, ' , run ', run)
  421. fitness, rewards = rl(0.9, 0.9, epsilon, eps, actions,
  422. qL=True) # alpha, gamma, epsilon, episodes, actions per episode
  423. if not run_test:
  424. file_name_rewards = './src/rewards_epsilon' + str(epsilon) + '_run' + str(run) + '.csv'
  425. with open(file_name_rewards, 'wb') as f:
  426. pickle.dump(rewards, f)
  427.  
  428. file_name_fitness = './src/fitness_epsilon' + str(epsilon) + '_run' + str(run) + '.csv'
  429. with open(file_name_fitness, 'wb') as f:
  430. pickle.dump(fitness, f)
  431.  
  432.  
  433. if __name__ == "__main__":
  434. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement