arl

# -*- coding: utf-8 -*-
#
#       ARL_remote_api_v10.1.py
#
#       Copyright 2015 Can Aykin, Max Hinteregger, Baris Can Durak
#		<canaykin@hotmail.com>, <max.hinterregger@mytum.de>, <bariscandurak@hotmail.com>
#
#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 2 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#       MA 02110-1301, USA.

from ePuckVRep import ePuck
import time
import os
import sys
import math
import random
import numpy as np
import matplotlib.pyplot as plt

def log(text):
        blue = '\033[1;34m'
        off = '\033[1;m'

        print(''.join((blue, '[Log] ', off, str(text))))

def error(text):
        red = '\033[1;31m'
        off = '\033[1;m'

        print(''.join((red, '[Error] ', off, str(text))))

maxVel = 1000
random.seed()
dim=441
Lambda=0.6
gamma=0.8
alpha=(1.0/(5.0*dim))
epsilon=0.0
epsilon0=0.5
epsilon_min=0.000
epsilon_decay = 1.0/2500
velStep=maxVel/7
steps=0
episodes=1
bestSteps=0
matrixAux=[0]*2
matrixAux[0]=dim/9
matrixAux[1]=matrixAux[0]/7
w=[0.0]*dim
e=[0.0]*dim
action=[0]*2
currentState=0
nextState=0
prev_action=0
prev_currentState=0
prev_nextState=0
sym_action=0
sym_currentState=0
sym_nextState=0
resetState=0


total_step = 0
result = np.zeros((1,2),dtype=float)
temp_r = np.zeros((1,2),dtype=float)
result_rmse = np.zeros((1,2),dtype=float)
temp_rmse = np.zeros((1,2),dtype=float)
result_error = np.zeros((1,2),dtype=float)
temp_error = np.zeros((1,2),dtype=float)
result_r2 = np.zeros((1,2),dtype=float)
temp_r2 = np.zeros((1,2),dtype=float)

debugSave = False

def dot_product(v1, v2):
        ret=0
        for i in range(0, dim):
                ret+=v1[i]*v2[i]
        return ret

def aux_get_prox():
        robot.step()
        prox=robot.get_proximity()
        while not(prox):
                robot.step()
                prox=robot.get_proximity()
        return prox

def get_state(prox_sensors):
        sensLeft_front=prox_sensors[1]
        sensRight_front=prox_sensors[4]
        orientation=sensLeft_front-sensRight_front
        state=0
        if(orientation<-50):
                state=0
        elif(orientation<-20):
                state=1
        elif(orientation<-8):
                state=2
        elif(orientation<-1):
                state=3
        elif(orientation<=1):
                state=4
        elif(orientation<=8):
                state=5
        elif(orientation<=20):
                state=6
        elif(orientation<=50):
                state=7
        else:
                state=8
        return state

def reward(state,action,nextstate):
        ret=0
        if(state==4 and nextstate==4 and (action[1]+action[0]==12)):
                ret=10
        elif(nextstate==0 or nextstate==8):
                ret=-100
        else:
                ret=-1
        return ret

def track_reward(state):
        ret=0
        if(state>=3 and state<=5):
                ret=1
        elif(state==0 or state==8):
                ret=0
        else:
                ret=0
        return ret

def get_features(state, action):
        return ((state*matrixAux[1])+(action[0]*matrixAux[2])+action[1])

def get_best_action(state):
        bestAction = [0]*2
        maximum=-9999
        temp=0
        for i in range(0, matrixAux[1]):
                for j in range(0,matrixAux[1]):
                        temp=w[(state*matrixAux[0])+(i*matrixAux[1])+j]
                        if(temp>=maximum):
                                bestAction[0]=i
                                bestAction[1]=j
                                maximum=temp
        return bestAction

def read_file(path):
        global total_step
        global w
        my_file = open(path, 'r')
        temp_str = my_file.readline()
        w_str = temp_str.split('#')
        for i in range(0, dim):
                w[i] = float(w_str[i+1])
        total_step = float(w_str[dim+1])
        if(debugSave):
                print(str(w))

        my_file.close()


def save_file(path):
        global total_step
        global w
        my_file = open(path, 'w')
        temp_str = ''
        for i in range(0, dim):
                temp_str = temp_str + '#' + str(w[i])
        temp_str = temp_str + '#' + str(total_step)
        my_file.write(temp_str)
        my_file.close()

##################################################

def RMSE(prox):
        global total_step
        global result_rmse
        global temp_rmse
        global result_error
        global temp_error

        temp_error[0][0] = total_step
        temp_error[0][1] = (prox[1]-prox[4])
        result_error = np.concatenate((result_error,temp_error),axis=0)


        temp_rmse[0][0] = total_step
        temp_rmse[0][1] = math.sqrt(((((temp_rmse[0][1])*(temp_rmse[0][1]))*(total_step-1))+((prox[1]-prox[4])*(prox[1]-prox[4])))/total_step)
        result_rmse = np.concatenate((result_rmse,temp_rmse),axis=0)

        return
def track_success(a_reward,b_reward):

        global total_step
        global result
        global temp_r
        global result_r2
        global temp_r2

        if(b_reward>=1):
               foo=1
        else:
               foo=0

        temp_r2[0][0] = total_step
        temp_r2[0][1] = ((((temp_r2[0][1]/100.0)*(total_step-1))+foo)/total_step)*100
        result_r2 = np.concatenate((result_r2,temp_r2),axis=0)

        temp_r[0][0] = total_step
        temp_r[0][1] = ((((temp_r[0][1]/100.0)*(total_step-1))+a_reward)/total_step)*100
        result = np.concatenate((result,temp_r),axis=0)

        return

def show_result():

        global result
        global result_error
        global result_r2
        global result_rmse

        # label axis, adjust axis length
        plt.ylabel('mm')
        plt.xlabel('Steps')
        plt.title('Error')
        plt.axis([0, max(result_error[:,0])+1, min(result_error[:,1])-1, max(result_error[:,1])+1])

        # plot and show
        plt.plot(result_error[:,0], result_error[:,1], 'b', result_error[:,0], result_error[:,1], 'ro')
        plt.show()

        # label axis, adjust axis length
        plt.ylabel('mm')
        plt.xlabel('Steps')
        plt.title('RMSE')
        plt.axis([0, max(result_rmse[:,0])+1, 0, max(result_rmse[:,1])+1])

        # plot and show
        plt.plot(result_rmse[:,0], result_rmse[:,1], 'b', result_rmse[:,0], result_rmse[:,1], 'ro')
        plt.show()

        plt.ylabel('%')
        plt.xlabel('Steps')
        plt.title('Percentage of optimal reward')
        plt.axis([0, max(result_r2[:,0])+1, 0, 100])

        # plot and show
        plt.plot(result_r2[:,0], result_r2[:,1], 'b', result_r2[:,0], result_r2[:,1], 'ro')
        plt.show()

        plt.ylabel('%')
        plt.xlabel('Steps')
        plt.title('Percentage of optimal position')
        plt.axis([0, max(result[:,0])+1, 0, 100])

        # plot and show
        plt.plot(result[:,0], result[:,1], 'b', result[:,0], result[:,1], 'ro')
        plt.show()

        return

##########################################################################

print("---------------------------------------------------------------")
print("---------------------------------------------------------------")
print("----------------- Welcome to the Canyon Rider -----------------")
print("---------------------------------------------------------------")
print("---------------------------------------------------------------\n")
print("-- Please specify the file name for learning weights ----------")
filename = str(raw_input("-- File name : "))
path = os.path.dirname(os.path.abspath(sys.argv[0]))
path = os.path.join(path,filename)
ans = raw_input("-- Do you wish to start learning from beginning? (y/n) --------\n-- WARNING : This will erase the contents of any existing file! --------\n")
if(ans=='y'):
        save_file(path)
read_file(path)

log('Conecting with ePuck')
try:
        robot = ePuck(host='127.0.0.1',port=19999,debug=False)
        robot.connect()
        robot.enable('proximity')
        log('Connection established.')
        log('Library version: ' + robot.version)
        log('Starting learning. Press \'CTRL+C\' to stop.')
        temp_prox_sens=aux_get_prox()

except Exception, e:
        error(e)
        sys.exit(1)

try:
        while True:
                terminal=0
                epsilon=epsilon0*pow(2, (-epsilon_decay*total_step))
                if(epsilon<=epsilon_min):
                        epsilon=epsilon_min

                if(total_step%1000==0):
                        print (total_step)
                if(steps==0):
                        prox_sens=aux_get_prox()
                        while(prox_sens[3]==temp_prox_sens[3]):
                                prox_sens=aux_get_prox()
                        temp_prox_sens=prox_sens
                        currentState=get_state(prox_sens)
                else:
                        currentState = nextState

                if (random.randint(0,1000)<=(epsilon*1000)):
                        action[0]=random.randint(0,6)
                        action[1]=random.randint(0,6)
                else:
                        action=get_best_action(currentState)

                velLeft=(velStep*(action[0]+1))
                velRight=(velStep*(action[1]+1))
                robot.set_motors_speed(velLeft, velRight)
                robot.step()
                if(steps==0):
                        time.sleep(0.01)
                else:
                        sym_action[0]=prev_action[1]
                        sym_action[1]=prev_action[0]
                        sym_currentState=8-prev_currentState

                        phi = get_features(prev_currentState,prev_action)
                        phi2 = get_features(sym_currentState,sym_action)
                        delta = reward(prev_currentState,prev_action,prev_nextState)-w[phi]
                        e[phi] = 1
                        if(phi==phi2):
                            e[phi2] = 2
                        else:
                            e[phi2] = 1

                        if (reward(prev_currentState,prev_action,prev_nextState)<-10):
                                terminal = 1
                                for i in range(0,dim):
                                        w[i] += alpha * delta * e[i]

                                log('Number of last episode was :...................................' + str(episodes))
                                log('Last episode continued for :...................................' + str(steps) + ' steps.')
                                log('Last episode was teminated by reaching the terminal state :....' + str(prev_nextState))


                                save_file(path)

                                episodes=episodes+1
                                if(steps>bestSteps):
                                        bestSteps=steps
                                steps=0

                                log('Best episode continued for :...................................' + str(bestSteps) + ' steps.')
                                print(' ')


                                prox_sens=aux_get_prox()
                                while(prox_sens[3]==temp_prox_sens[3]):
                                        prox_sens=aux_get_prox()
                                temp_prox_sens=prox_sens
                                resetState=get_state(prox_sens)
                                while(resetState==0 or resetState==8):
                                        if(resetState<4):
                                                robot.set_motors_speed(0.1*maxVel, -0.1*maxVel)
                                        elif(resetState>4):
                                                robot.set_motors_speed(-0.1*maxVel, 0.1*maxVel)
                                        robot.step()
                                        time.sleep(0.001)
                                        prox_sens=aux_get_prox()
                                        while(prox_sens[3]==temp_prox_sens[3]):
                                                prox_sens=aux_get_prox()
                                        temp_prox_sens=prox_sens
                                        resetState=get_state(prox_sens)

                                for i in range(0,dim):
                                        e[i]=0.0

                        else:
                                terminal = 0


                                delta += gamma * w[get_features(prev_nextState,get_best_action(prev_nextState))]

                                for i in range(0,dim):
                                        w[i] += alpha * delta * e[i]
                                        e[i] = gamma * Lambda * e[i]


                if(terminal==0):

                        ###########################################

                        total_step = total_step + 1

                        RMSE(prox_sens)
                        track_success(track_reward(nextState), reward(currentState,action,nextState))

                        ##########################################

                        steps=steps+1

                        prox_sens=aux_get_prox()
                        while(prox_sens[3]==temp_prox_sens[3]):
                                prox_sens=aux_get_prox()
                        temp_prox_sens=prox_sens
                        nextState=get_state(prox_sens)

                        prev_currentState=currentState
                        prev_action=action
                        prev_nextState=nextState


except KeyboardInterrupt:

        print("\n")

        save_file(path)

        ###################
        show_result()
        ##################

        log('Learned values have been saved!!!')
        log('Stopping the robot.')
        robot.close()
        log('Quitting program.')
        sys.exit()

except Exception, e:
        error(e)