Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- #
- # ARL_remote_api_v10.1.py
- #
- # Copyright 2015 Can Aykin, Max Hinteregger, Baris Can Durak
- # <canaykin@hotmail.com>, <max.hinterregger@mytum.de>, <bariscandurak@hotmail.com>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- # MA 02110-1301, USA.
- from ePuckVRep import ePuck
- import time
- import os
- import sys
- import math
- import random
- import numpy as np
- import matplotlib.pyplot as plt
- def log(text):
- blue = '\033[1;34m'
- off = '\033[1;m'
- print(''.join((blue, '[Log] ', off, str(text))))
- def error(text):
- red = '\033[1;31m'
- off = '\033[1;m'
- print(''.join((red, '[Error] ', off, str(text))))
- maxVel = 1000
- random.seed()
- dim=441
- Lambda=0.6
- gamma=0.8
- alpha=(1.0/(5.0*dim))
- epsilon=0.0
- epsilon0=0.5
- epsilon_min=0.000
- epsilon_decay = 1.0/2500
- velStep=maxVel/7
- steps=0
- episodes=1
- bestSteps=0
- matrixAux=[0]*2
- matrixAux[0]=dim/9
- matrixAux[1]=matrixAux[0]/7
- w=[0.0]*dim
- e=[0.0]*dim
- action=[0]*2
- currentState=0
- nextState=0
- prev_action=0
- prev_currentState=0
- prev_nextState=0
- sym_action=0
- sym_currentState=0
- sym_nextState=0
- resetState=0
- total_step = 0
- result = np.zeros((1,2),dtype=float)
- temp_r = np.zeros((1,2),dtype=float)
- result_rmse = np.zeros((1,2),dtype=float)
- temp_rmse = np.zeros((1,2),dtype=float)
- result_error = np.zeros((1,2),dtype=float)
- temp_error = np.zeros((1,2),dtype=float)
- result_r2 = np.zeros((1,2),dtype=float)
- temp_r2 = np.zeros((1,2),dtype=float)
- debugSave = False
- def dot_product(v1, v2):
- ret=0
- for i in range(0, dim):
- ret+=v1[i]*v2[i]
- return ret
- def aux_get_prox():
- robot.step()
- prox=robot.get_proximity()
- while not(prox):
- robot.step()
- prox=robot.get_proximity()
- return prox
- def get_state(prox_sensors):
- sensLeft_front=prox_sensors[1]
- sensRight_front=prox_sensors[4]
- orientation=sensLeft_front-sensRight_front
- state=0
- if(orientation<-50):
- state=0
- elif(orientation<-20):
- state=1
- elif(orientation<-8):
- state=2
- elif(orientation<-1):
- state=3
- elif(orientation<=1):
- state=4
- elif(orientation<=8):
- state=5
- elif(orientation<=20):
- state=6
- elif(orientation<=50):
- state=7
- else:
- state=8
- return state
- def reward(state,action,nextstate):
- ret=0
- if(state==4 and nextstate==4 and (action[1]+action[0]==12)):
- ret=10
- elif(nextstate==0 or nextstate==8):
- ret=-100
- else:
- ret=-1
- return ret
- def track_reward(state):
- ret=0
- if(state>=3 and state<=5):
- ret=1
- elif(state==0 or state==8):
- ret=0
- else:
- ret=0
- return ret
- def get_features(state, action):
- return ((state*matrixAux[1])+(action[0]*matrixAux[2])+action[1])
- def get_best_action(state):
- bestAction = [0]*2
- maximum=-9999
- temp=0
- for i in range(0, matrixAux[1]):
- for j in range(0,matrixAux[1]):
- temp=w[(state*matrixAux[0])+(i*matrixAux[1])+j]
- if(temp>=maximum):
- bestAction[0]=i
- bestAction[1]=j
- maximum=temp
- return bestAction
- def read_file(path):
- global total_step
- global w
- my_file = open(path, 'r')
- temp_str = my_file.readline()
- w_str = temp_str.split('#')
- for i in range(0, dim):
- w[i] = float(w_str[i+1])
- total_step = float(w_str[dim+1])
- if(debugSave):
- print(str(w))
- my_file.close()
- def save_file(path):
- global total_step
- global w
- my_file = open(path, 'w')
- temp_str = ''
- for i in range(0, dim):
- temp_str = temp_str + '#' + str(w[i])
- temp_str = temp_str + '#' + str(total_step)
- my_file.write(temp_str)
- my_file.close()
- ##################################################
- def RMSE(prox):
- global total_step
- global result_rmse
- global temp_rmse
- global result_error
- global temp_error
- temp_error[0][0] = total_step
- temp_error[0][1] = (prox[1]-prox[4])
- result_error = np.concatenate((result_error,temp_error),axis=0)
- temp_rmse[0][0] = total_step
- temp_rmse[0][1] = math.sqrt(((((temp_rmse[0][1])*(temp_rmse[0][1]))*(total_step-1))+((prox[1]-prox[4])*(prox[1]-prox[4])))/total_step)
- result_rmse = np.concatenate((result_rmse,temp_rmse),axis=0)
- return
- def track_success(a_reward,b_reward):
- global total_step
- global result
- global temp_r
- global result_r2
- global temp_r2
- if(b_reward>=1):
- foo=1
- else:
- foo=0
- temp_r2[0][0] = total_step
- temp_r2[0][1] = ((((temp_r2[0][1]/100.0)*(total_step-1))+foo)/total_step)*100
- result_r2 = np.concatenate((result_r2,temp_r2),axis=0)
- temp_r[0][0] = total_step
- temp_r[0][1] = ((((temp_r[0][1]/100.0)*(total_step-1))+a_reward)/total_step)*100
- result = np.concatenate((result,temp_r),axis=0)
- return
- def show_result():
- global result
- global result_error
- global result_r2
- global result_rmse
- # label axis, adjust axis length
- plt.ylabel('mm')
- plt.xlabel('Steps')
- plt.title('Error')
- plt.axis([0, max(result_error[:,0])+1, min(result_error[:,1])-1, max(result_error[:,1])+1])
- # plot and show
- plt.plot(result_error[:,0], result_error[:,1], 'b', result_error[:,0], result_error[:,1], 'ro')
- plt.show()
- # label axis, adjust axis length
- plt.ylabel('mm')
- plt.xlabel('Steps')
- plt.title('RMSE')
- plt.axis([0, max(result_rmse[:,0])+1, 0, max(result_rmse[:,1])+1])
- # plot and show
- plt.plot(result_rmse[:,0], result_rmse[:,1], 'b', result_rmse[:,0], result_rmse[:,1], 'ro')
- plt.show()
- plt.ylabel('%')
- plt.xlabel('Steps')
- plt.title('Percentage of optimal reward')
- plt.axis([0, max(result_r2[:,0])+1, 0, 100])
- # plot and show
- plt.plot(result_r2[:,0], result_r2[:,1], 'b', result_r2[:,0], result_r2[:,1], 'ro')
- plt.show()
- plt.ylabel('%')
- plt.xlabel('Steps')
- plt.title('Percentage of optimal position')
- plt.axis([0, max(result[:,0])+1, 0, 100])
- # plot and show
- plt.plot(result[:,0], result[:,1], 'b', result[:,0], result[:,1], 'ro')
- plt.show()
- return
- ##########################################################################
- print("---------------------------------------------------------------")
- print("---------------------------------------------------------------")
- print("----------------- Welcome to the Canyon Rider -----------------")
- print("---------------------------------------------------------------")
- print("---------------------------------------------------------------\n")
- print("-- Please specify the file name for learning weights ----------")
- filename = str(raw_input("-- File name : "))
- path = os.path.dirname(os.path.abspath(sys.argv[0]))
- path = os.path.join(path,filename)
- ans = raw_input("-- Do you wish to start learning from beginning? (y/n) --------\n-- WARNING : This will erase the contents of any existing file! --------\n")
- if(ans=='y'):
- save_file(path)
- read_file(path)
- log('Conecting with ePuck')
- try:
- robot = ePuck(host='127.0.0.1',port=19999,debug=False)
- robot.connect()
- robot.enable('proximity')
- log('Connection established.')
- log('Library version: ' + robot.version)
- log('Starting learning. Press \'CTRL+C\' to stop.')
- temp_prox_sens=aux_get_prox()
- except Exception, e:
- error(e)
- sys.exit(1)
- try:
- while True:
- terminal=0
- epsilon=epsilon0*pow(2, (-epsilon_decay*total_step))
- if(epsilon<=epsilon_min):
- epsilon=epsilon_min
- if(total_step%1000==0):
- print (total_step)
- if(steps==0):
- prox_sens=aux_get_prox()
- while(prox_sens[3]==temp_prox_sens[3]):
- prox_sens=aux_get_prox()
- temp_prox_sens=prox_sens
- currentState=get_state(prox_sens)
- else:
- currentState = nextState
- if (random.randint(0,1000)<=(epsilon*1000)):
- action[0]=random.randint(0,6)
- action[1]=random.randint(0,6)
- else:
- action=get_best_action(currentState)
- velLeft=(velStep*(action[0]+1))
- velRight=(velStep*(action[1]+1))
- robot.set_motors_speed(velLeft, velRight)
- robot.step()
- if(steps==0):
- time.sleep(0.01)
- else:
- sym_action[0]=prev_action[1]
- sym_action[1]=prev_action[0]
- sym_currentState=8-prev_currentState
- phi = get_features(prev_currentState,prev_action)
- phi2 = get_features(sym_currentState,sym_action)
- delta = reward(prev_currentState,prev_action,prev_nextState)-w[phi]
- e[phi] = 1
- if(phi==phi2):
- e[phi2] = 2
- else:
- e[phi2] = 1
- if (reward(prev_currentState,prev_action,prev_nextState)<-10):
- terminal = 1
- for i in range(0,dim):
- w[i] += alpha * delta * e[i]
- log('Number of last episode was :...................................' + str(episodes))
- log('Last episode continued for :...................................' + str(steps) + ' steps.')
- log('Last episode was teminated by reaching the terminal state :....' + str(prev_nextState))
- save_file(path)
- episodes=episodes+1
- if(steps>bestSteps):
- bestSteps=steps
- steps=0
- log('Best episode continued for :...................................' + str(bestSteps) + ' steps.')
- print(' ')
- prox_sens=aux_get_prox()
- while(prox_sens[3]==temp_prox_sens[3]):
- prox_sens=aux_get_prox()
- temp_prox_sens=prox_sens
- resetState=get_state(prox_sens)
- while(resetState==0 or resetState==8):
- if(resetState<4):
- robot.set_motors_speed(0.1*maxVel, -0.1*maxVel)
- elif(resetState>4):
- robot.set_motors_speed(-0.1*maxVel, 0.1*maxVel)
- robot.step()
- time.sleep(0.001)
- prox_sens=aux_get_prox()
- while(prox_sens[3]==temp_prox_sens[3]):
- prox_sens=aux_get_prox()
- temp_prox_sens=prox_sens
- resetState=get_state(prox_sens)
- for i in range(0,dim):
- e[i]=0.0
- else:
- terminal = 0
- delta += gamma * w[get_features(prev_nextState,get_best_action(prev_nextState))]
- for i in range(0,dim):
- w[i] += alpha * delta * e[i]
- e[i] = gamma * Lambda * e[i]
- if(terminal==0):
- ###########################################
- total_step = total_step + 1
- RMSE(prox_sens)
- track_success(track_reward(nextState), reward(currentState,action,nextState))
- ##########################################
- steps=steps+1
- prox_sens=aux_get_prox()
- while(prox_sens[3]==temp_prox_sens[3]):
- prox_sens=aux_get_prox()
- temp_prox_sens=prox_sens
- nextState=get_state(prox_sens)
- prev_currentState=currentState
- prev_action=action
- prev_nextState=nextState
- except KeyboardInterrupt:
- print("\n")
- save_file(path)
- ###################
- show_result()
- ##################
- log('Learned values have been saved!!!')
- log('Stopping the robot.')
- robot.close()
- log('Quitting program.')
- sys.exit()
- except Exception, e:
- error(e)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement