Advertisement
Guest User

arl

a guest
Jul 22nd, 2015
166
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.52 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. #
  3. # ARL_remote_api_v10.1.py
  4. #
  5. # Copyright 2015 Can Aykin, Max Hinteregger, Baris Can Durak
  6. # <canaykin@hotmail.com>, <max.hinterregger@mytum.de>, <bariscandurak@hotmail.com>
  7. #
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program; if not, write to the Free Software
  20. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  21. # MA 02110-1301, USA.
  22.  
  23. from ePuckVRep import ePuck
  24. import time
  25. import os
  26. import sys
  27. import math
  28. import random
  29. import numpy as np
  30. import matplotlib.pyplot as plt
  31.  
  32. def log(text):
  33. blue = '\033[1;34m'
  34. off = '\033[1;m'
  35.  
  36. print(''.join((blue, '[Log] ', off, str(text))))
  37.  
  38. def error(text):
  39. red = '\033[1;31m'
  40. off = '\033[1;m'
  41.  
  42. print(''.join((red, '[Error] ', off, str(text))))
  43.  
  44. maxVel = 1000
  45. random.seed()
  46. dim=441
  47. Lambda=0.6
  48. gamma=0.8
  49. alpha=(1.0/(5.0*dim))
  50. epsilon=0.0
  51. epsilon0=0.5
  52. epsilon_min=0.000
  53. epsilon_decay = 1.0/2500
  54. velStep=maxVel/7
  55. steps=0
  56. episodes=1
  57. bestSteps=0
  58. matrixAux=[0]*2
  59. matrixAux[0]=dim/9
  60. matrixAux[1]=matrixAux[0]/7
  61. w=[0.0]*dim
  62. e=[0.0]*dim
  63. action=[0]*2
  64. currentState=0
  65. nextState=0
  66. prev_action=0
  67. prev_currentState=0
  68. prev_nextState=0
  69. sym_action=0
  70. sym_currentState=0
  71. sym_nextState=0
  72. resetState=0
  73.  
  74.  
  75. total_step = 0
  76. result = np.zeros((1,2),dtype=float)
  77. temp_r = np.zeros((1,2),dtype=float)
  78. result_rmse = np.zeros((1,2),dtype=float)
  79. temp_rmse = np.zeros((1,2),dtype=float)
  80. result_error = np.zeros((1,2),dtype=float)
  81. temp_error = np.zeros((1,2),dtype=float)
  82. result_r2 = np.zeros((1,2),dtype=float)
  83. temp_r2 = np.zeros((1,2),dtype=float)
  84.  
  85. debugSave = False
  86.  
  87. def dot_product(v1, v2):
  88. ret=0
  89. for i in range(0, dim):
  90. ret+=v1[i]*v2[i]
  91. return ret
  92.  
  93. def aux_get_prox():
  94. robot.step()
  95. prox=robot.get_proximity()
  96. while not(prox):
  97. robot.step()
  98. prox=robot.get_proximity()
  99. return prox
  100.  
  101. def get_state(prox_sensors):
  102. sensLeft_front=prox_sensors[1]
  103. sensRight_front=prox_sensors[4]
  104. orientation=sensLeft_front-sensRight_front
  105. state=0
  106. if(orientation<-50):
  107. state=0
  108. elif(orientation<-20):
  109. state=1
  110. elif(orientation<-8):
  111. state=2
  112. elif(orientation<-1):
  113. state=3
  114. elif(orientation<=1):
  115. state=4
  116. elif(orientation<=8):
  117. state=5
  118. elif(orientation<=20):
  119. state=6
  120. elif(orientation<=50):
  121. state=7
  122. else:
  123. state=8
  124. return state
  125.  
  126. def reward(state,action,nextstate):
  127. ret=0
  128. if(state==4 and nextstate==4 and (action[1]+action[0]==12)):
  129. ret=10
  130. elif(nextstate==0 or nextstate==8):
  131. ret=-100
  132. else:
  133. ret=-1
  134. return ret
  135.  
  136. def track_reward(state):
  137. ret=0
  138. if(state>=3 and state<=5):
  139. ret=1
  140. elif(state==0 or state==8):
  141. ret=0
  142. else:
  143. ret=0
  144. return ret
  145.  
  146. def get_features(state, action):
  147. return ((state*matrixAux[1])+(action[0]*matrixAux[2])+action[1])
  148.  
  149. def get_best_action(state):
  150. bestAction = [0]*2
  151. maximum=-9999
  152. temp=0
  153. for i in range(0, matrixAux[1]):
  154. for j in range(0,matrixAux[1]):
  155. temp=w[(state*matrixAux[0])+(i*matrixAux[1])+j]
  156. if(temp>=maximum):
  157. bestAction[0]=i
  158. bestAction[1]=j
  159. maximum=temp
  160. return bestAction
  161.  
  162. def read_file(path):
  163. global total_step
  164. global w
  165. my_file = open(path, 'r')
  166. temp_str = my_file.readline()
  167. w_str = temp_str.split('#')
  168. for i in range(0, dim):
  169. w[i] = float(w_str[i+1])
  170. total_step = float(w_str[dim+1])
  171. if(debugSave):
  172. print(str(w))
  173.  
  174. my_file.close()
  175.  
  176.  
  177. def save_file(path):
  178. global total_step
  179. global w
  180. my_file = open(path, 'w')
  181. temp_str = ''
  182. for i in range(0, dim):
  183. temp_str = temp_str + '#' + str(w[i])
  184. temp_str = temp_str + '#' + str(total_step)
  185. my_file.write(temp_str)
  186. my_file.close()
  187.  
  188. ##################################################
  189.  
  190. def RMSE(prox):
  191. global total_step
  192. global result_rmse
  193. global temp_rmse
  194. global result_error
  195. global temp_error
  196.  
  197. temp_error[0][0] = total_step
  198. temp_error[0][1] = (prox[1]-prox[4])
  199. result_error = np.concatenate((result_error,temp_error),axis=0)
  200.  
  201.  
  202. temp_rmse[0][0] = total_step
  203. temp_rmse[0][1] = math.sqrt(((((temp_rmse[0][1])*(temp_rmse[0][1]))*(total_step-1))+((prox[1]-prox[4])*(prox[1]-prox[4])))/total_step)
  204. result_rmse = np.concatenate((result_rmse,temp_rmse),axis=0)
  205.  
  206. return
  207. def track_success(a_reward,b_reward):
  208.  
  209. global total_step
  210. global result
  211. global temp_r
  212. global result_r2
  213. global temp_r2
  214.  
  215. if(b_reward>=1):
  216. foo=1
  217. else:
  218. foo=0
  219.  
  220. temp_r2[0][0] = total_step
  221. temp_r2[0][1] = ((((temp_r2[0][1]/100.0)*(total_step-1))+foo)/total_step)*100
  222. result_r2 = np.concatenate((result_r2,temp_r2),axis=0)
  223.  
  224. temp_r[0][0] = total_step
  225. temp_r[0][1] = ((((temp_r[0][1]/100.0)*(total_step-1))+a_reward)/total_step)*100
  226. result = np.concatenate((result,temp_r),axis=0)
  227.  
  228. return
  229.  
  230. def show_result():
  231.  
  232. global result
  233. global result_error
  234. global result_r2
  235. global result_rmse
  236.  
  237. # label axis, adjust axis length
  238. plt.ylabel('mm')
  239. plt.xlabel('Steps')
  240. plt.title('Error')
  241. plt.axis([0, max(result_error[:,0])+1, min(result_error[:,1])-1, max(result_error[:,1])+1])
  242.  
  243. # plot and show
  244. plt.plot(result_error[:,0], result_error[:,1], 'b', result_error[:,0], result_error[:,1], 'ro')
  245. plt.show()
  246.  
  247. # label axis, adjust axis length
  248. plt.ylabel('mm')
  249. plt.xlabel('Steps')
  250. plt.title('RMSE')
  251. plt.axis([0, max(result_rmse[:,0])+1, 0, max(result_rmse[:,1])+1])
  252.  
  253. # plot and show
  254. plt.plot(result_rmse[:,0], result_rmse[:,1], 'b', result_rmse[:,0], result_rmse[:,1], 'ro')
  255. plt.show()
  256.  
  257. plt.ylabel('%')
  258. plt.xlabel('Steps')
  259. plt.title('Percentage of optimal reward')
  260. plt.axis([0, max(result_r2[:,0])+1, 0, 100])
  261.  
  262. # plot and show
  263. plt.plot(result_r2[:,0], result_r2[:,1], 'b', result_r2[:,0], result_r2[:,1], 'ro')
  264. plt.show()
  265.  
  266. plt.ylabel('%')
  267. plt.xlabel('Steps')
  268. plt.title('Percentage of optimal position')
  269. plt.axis([0, max(result[:,0])+1, 0, 100])
  270.  
  271. # plot and show
  272. plt.plot(result[:,0], result[:,1], 'b', result[:,0], result[:,1], 'ro')
  273. plt.show()
  274.  
  275. return
  276.  
  277. ##########################################################################
  278.  
  279. print("---------------------------------------------------------------")
  280. print("---------------------------------------------------------------")
  281. print("----------------- Welcome to the Canyon Rider -----------------")
  282. print("---------------------------------------------------------------")
  283. print("---------------------------------------------------------------\n")
  284. print("-- Please specify the file name for learning weights ----------")
  285. filename = str(raw_input("-- File name : "))
  286. path = os.path.dirname(os.path.abspath(sys.argv[0]))
  287. path = os.path.join(path,filename)
  288. ans = raw_input("-- Do you wish to start learning from beginning? (y/n) --------\n-- WARNING : This will erase the contents of any existing file! --------\n")
  289. if(ans=='y'):
  290. save_file(path)
  291. read_file(path)
  292.  
  293. log('Conecting with ePuck')
  294. try:
  295. robot = ePuck(host='127.0.0.1',port=19999,debug=False)
  296. robot.connect()
  297. robot.enable('proximity')
  298. log('Connection established.')
  299. log('Library version: ' + robot.version)
  300. log('Starting learning. Press \'CTRL+C\' to stop.')
  301. temp_prox_sens=aux_get_prox()
  302.  
  303. except Exception, e:
  304. error(e)
  305. sys.exit(1)
  306.  
  307. try:
  308. while True:
  309. terminal=0
  310. epsilon=epsilon0*pow(2, (-epsilon_decay*total_step))
  311. if(epsilon<=epsilon_min):
  312. epsilon=epsilon_min
  313.  
  314. if(total_step%1000==0):
  315. print (total_step)
  316. if(steps==0):
  317. prox_sens=aux_get_prox()
  318. while(prox_sens[3]==temp_prox_sens[3]):
  319. prox_sens=aux_get_prox()
  320. temp_prox_sens=prox_sens
  321. currentState=get_state(prox_sens)
  322. else:
  323. currentState = nextState
  324.  
  325. if (random.randint(0,1000)<=(epsilon*1000)):
  326. action[0]=random.randint(0,6)
  327. action[1]=random.randint(0,6)
  328. else:
  329. action=get_best_action(currentState)
  330.  
  331. velLeft=(velStep*(action[0]+1))
  332. velRight=(velStep*(action[1]+1))
  333. robot.set_motors_speed(velLeft, velRight)
  334. robot.step()
  335. if(steps==0):
  336. time.sleep(0.01)
  337. else:
  338. sym_action[0]=prev_action[1]
  339. sym_action[1]=prev_action[0]
  340. sym_currentState=8-prev_currentState
  341.  
  342. phi = get_features(prev_currentState,prev_action)
  343. phi2 = get_features(sym_currentState,sym_action)
  344. delta = reward(prev_currentState,prev_action,prev_nextState)-w[phi]
  345. e[phi] = 1
  346. if(phi==phi2):
  347. e[phi2] = 2
  348. else:
  349. e[phi2] = 1
  350.  
  351. if (reward(prev_currentState,prev_action,prev_nextState)<-10):
  352. terminal = 1
  353. for i in range(0,dim):
  354. w[i] += alpha * delta * e[i]
  355.  
  356. log('Number of last episode was :...................................' + str(episodes))
  357. log('Last episode continued for :...................................' + str(steps) + ' steps.')
  358. log('Last episode was teminated by reaching the terminal state :....' + str(prev_nextState))
  359.  
  360.  
  361. save_file(path)
  362.  
  363. episodes=episodes+1
  364. if(steps>bestSteps):
  365. bestSteps=steps
  366. steps=0
  367.  
  368. log('Best episode continued for :...................................' + str(bestSteps) + ' steps.')
  369. print(' ')
  370.  
  371.  
  372. prox_sens=aux_get_prox()
  373. while(prox_sens[3]==temp_prox_sens[3]):
  374. prox_sens=aux_get_prox()
  375. temp_prox_sens=prox_sens
  376. resetState=get_state(prox_sens)
  377. while(resetState==0 or resetState==8):
  378. if(resetState<4):
  379. robot.set_motors_speed(0.1*maxVel, -0.1*maxVel)
  380. elif(resetState>4):
  381. robot.set_motors_speed(-0.1*maxVel, 0.1*maxVel)
  382. robot.step()
  383. time.sleep(0.001)
  384. prox_sens=aux_get_prox()
  385. while(prox_sens[3]==temp_prox_sens[3]):
  386. prox_sens=aux_get_prox()
  387. temp_prox_sens=prox_sens
  388. resetState=get_state(prox_sens)
  389.  
  390. for i in range(0,dim):
  391. e[i]=0.0
  392.  
  393. else:
  394. terminal = 0
  395.  
  396.  
  397. delta += gamma * w[get_features(prev_nextState,get_best_action(prev_nextState))]
  398.  
  399. for i in range(0,dim):
  400. w[i] += alpha * delta * e[i]
  401. e[i] = gamma * Lambda * e[i]
  402.  
  403.  
  404. if(terminal==0):
  405.  
  406. ###########################################
  407.  
  408. total_step = total_step + 1
  409.  
  410. RMSE(prox_sens)
  411. track_success(track_reward(nextState), reward(currentState,action,nextState))
  412.  
  413. ##########################################
  414.  
  415. steps=steps+1
  416.  
  417. prox_sens=aux_get_prox()
  418. while(prox_sens[3]==temp_prox_sens[3]):
  419. prox_sens=aux_get_prox()
  420. temp_prox_sens=prox_sens
  421. nextState=get_state(prox_sens)
  422.  
  423. prev_currentState=currentState
  424. prev_action=action
  425. prev_nextState=nextState
  426.  
  427.  
  428. except KeyboardInterrupt:
  429.  
  430. print("\n")
  431.  
  432. save_file(path)
  433.  
  434. ###################
  435. show_result()
  436. ##################
  437.  
  438. log('Learned values have been saved!!!')
  439. log('Stopping the robot.')
  440. robot.close()
  441. log('Quitting program.')
  442. sys.exit()
  443.  
  444. except Exception, e:
  445. error(e)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement