Untitled

/**
 * Copyright (c) 2001-2018 Mathew A. Nelson and Robocode contributors
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://robocode.sourceforge.net/license/epl-v10.html
 */


import LearningRobots.*;
import robocode.*;
import robocode.HitByBulletEvent;
import robocode.HitRobotEvent;
import robocode.HitWallEvent;

import java.awt.Color;
import java.awt.geom.Point2D;

//import Enemy;
//import LearningRobots.LUT;
//import LearningRobots.Qlearning;
//import LearningRobots.RobotActions;
//import LearningRobots.RobotStates;
//import robocode.Robot;
import robocode.AdvancedRobot;
import robocode.BulletHitEvent;
import robocode.BulletMissedEvent;
import robocode.Rules;
import robocode.ScannedRobotEvent;
//import robocode.control.snapshot.RobotState;
import robocode.util.Utils;


/**
 * MyFirstRobot - a sample robot by Mathew Nelson.
 * <p>
 * Moves in a seesaw motion, and spins the gun around at each end.
 *
 * @author Mathew A. Nelson (original)
 */
public class NNBot extends AdvancedRobot {
	//Reinforcement learning objects
    Qlearning learning;
    LUT table;

    Enemy e;


    //reward variable
    double reward;


    //status variables
    int hitWall = 0;
    int hitByEnemy = 0;


    //###################
    //main robot function
    //###################
    public void run(){
        //set colors of robot
        setColors(Color.red, Color.yellow, Color.pink);

        //independent moving of gun and radar
        setAdjustGunForRobotTurn(true);
        setAdjustRadarForGunTurn(true);


        e = new Enemy();
        table = new LUT(10, 6, 3200);
        load("LUtable.txt");
        learning = new Qlearning(table);
        e.distance = 100000; //fix


        //play game
        while(true){
        	robotMovement();
        	radarMovement();


        }
    }

	public void robotMovement(){

        int state = getState();
        int action = learning.selectActionEGreedy(state); //choose between greedy and softmax
        learning.Qlearn(state, action, reward);
        out.println(action);
        hitByEnemy = 0;
        hitWall = 0;
        out.println("State " + state);
        switch(action){
            case RobotActions.moveAhead:
                ahead(RobotActions.moveDistance);
                break;
            case (RobotActions.moveBack):
                back(RobotActions.moveDistance);
                break;
            case (RobotActions.aheadTurnLeft):
                turnLeft(90);
                ahead(RobotActions.moveDistance);
                break;
            case (RobotActions.aheadTurnRight):
                turnRight(90);
                ahead(RobotActions.moveDistance);
                break;
            case (RobotActions.backTurnLeft):
                turnLeft(90);
                back(RobotActions.moveDistance);
                break;
            case (RobotActions.backTurnRight):
                turnRight(90);
                back(RobotActions.moveDistance);
                break;}
        }

    public void radarMovement() {

        if (getRadarTurnRemaining() == 0.0)
            setTurnRadarRightRadians(Double.POSITIVE_INFINITY);


    }
    int getState(){

        int direction = RobotStates.convertDirection(getHeading());
        int enemyDist = RobotStates.convertEnemyDistance(e.distance);
        int enemyEnergy = (int)e.energy;
        int enemyBearing = RobotStates.convertEnemyBearing(e.bearing);
        int hitW = hitWall;
        int hitByE = hitByEnemy;
        int currState = RobotStates.state[direction][enemyDist][enemyEnergy][enemyBearing][hitW][hitByE]; //CHEKCK THIS

        System.out.println(direction + ", " + enemyDist + ", " + enemyEnergy + ", " + enemyBearing);
        return currState;
}


    public void onScannedRobot(ScannedRobotEvent e) {

        // Absolute angle towards target
        double angleToEnemy = getHeadingRadians() + e.getBearingRadians();

        // Subtract current radar heading to get the turn required to face the enemy, be sure it is normalized
        double radarTurn = Utils.normalRelativeAngle( angleToEnemy - getRadarHeadingRadians() );

        // Distance we want to scan from middle of enemy to either side
        // The 36.0 is how many units from the center of the enemy robot it scans.
        double extraTurn = Math.min( Math.atan( 36.0 / e.getDistance() ), Rules.RADAR_TURN_RATE_RADIANS );

        // Adjust the radar turn so it goes that much further in the direction it is going to turn
        // Basically if we were going to turn it left, turn it even more left, if right, turn more right.
        // This allows us to overshoot our enemy so that we get a good sweep that will not slip.
        if (radarTurn < 0)
            radarTurn -= extraTurn;
        else
            radarTurn += extraTurn;

        //Turn the radar
        setTurnRadarRightRadians(radarTurn);

        //Fire operations -- CircularTargeting method from robowiki:
        circularTargeting(e);

    }
	/**
	 * Fire when we see a robot
	 */
    public void circularTargeting(ScannedRobotEvent e){
        double bulletPower = Math.min(3.0, getEnergy());
        double myX = getX();
        double myY = getY();
        double absoluteBearing = getHeadingRadians() + e.getBearingRadians();
        double enemyX = getX() + e.getDistance() * Math.sin(absoluteBearing);
        double enemyY = getY() + e.getDistance() * Math.cos(absoluteBearing);
        double oldEnemyHeading = 0; //correct this
        double enemyHeading = e.getHeadingRadians();
        double enemyHeadingChange = enemyHeading - oldEnemyHeading;
        double enemyVelocity = e.getVelocity();
        oldEnemyHeading = enemyHeading;

        double deltaTime = 0;
        double battleFieldHeight = getBattleFieldHeight(),
                battleFieldWidth = getBattleFieldWidth();
        double predictedX = enemyX, predictedY = enemyY;
        while((++deltaTime) * (20.0 - 3.0 * bulletPower) <
                Point2D.Double.distance(myX, myY, predictedX, predictedY)){
            predictedX += Math.sin(enemyHeading) * enemyVelocity;
            predictedY += Math.cos(enemyHeading) * enemyVelocity;
            enemyHeading += enemyHeadingChange;
            if(	predictedX < 18.0
                    || predictedY < 18.0
                    || predictedX > battleFieldWidth - 18.0
                    || predictedY > battleFieldHeight - 18.0){

                predictedX = Math.min(Math.max(18.0, predictedX),
                        battleFieldWidth - 18.0);
                predictedY = Math.min(Math.max(18.0, predictedY),
                        battleFieldHeight - 18.0);
                break;
            }
        }
        double theta = Utils.normalAbsoluteAngle(Math.atan2(
                predictedX - getX(), predictedY - getY()));

        setTurnRadarRightRadians(Utils.normalRelativeAngle(
                absoluteBearing - getRadarHeadingRadians()));
        setTurnGunRightRadians(Utils.normalRelativeAngle(
                theta - getGunHeadingRadians()));
        fire(3); //alter firepower with distance?
    }

	/**
	 * We were hit!  Turn perpendicular to the bullet,
	 * so our seesaw might avoid a future shot.
	 */
    public void onHitWall(HitWallEvent e){
        reward += -2; //decrement reward by some value
        hitWall = 1;
    }

    public void onBulletHit(BulletHitEvent e) {

        //out.println("Bullet Hit: " + change);
        reward += e.getBullet().getPower() * 9;
        }
    public void onBulletMissed(BulletMissedEvent e) {
        //out.println("Bullet Missed: " + change);
        reward += -e.getBullet().getPower();
    }
    public void onHitByBullet(HitByBulletEvent e) {
        double power = e.getBullet().getPower();
        hitByEnemy  = 1;
        //out.println("Hit By Bullet: " + -(4 * power + 2 * (power - 1)));
        reward += -(4 * power + 2 * (power - 1));
        }

    public void onHitRobot(HitRobotEvent e) {
            out.println("Hit Robot: " + -6.0);
            reward += -6.0;
        }


    public void onWin(WinEvent event)
    {
    	//System.out.println(table.getMaxValue(5));

            table.save(getDataFile("LUtable.txt"));

    }

    public void onDeath(DeathEvent event)
    {
    	//System.out.println(table.getMaxValue(5));

            table.save(getDataFile("LUtable.txt"));


    }
    public void load(String file) {
    	try {
    		table.load(getDataFile(file));
    	}catch(Exception e) {
    		out.println("Caught exception trying to write to file: " + e);
    	}

    }

}


public class RobotStates {


    static int numDirections = 4;      //number of direction states
    static int numEnemyDistance = 10;   //number of enemy distance states
    static int numEnemyBearing = 4;     //number of enemy bearing states
    static int numEnergy = 5;          //energy levels of robot
    static int hitWall = 2;            //hit wall or not
    static int hitByBullet = 2;        //hit by bullet or not

    static int numOfStates = numDirections * numEnemyDistance * numEnergy * hitWall * hitByBullet; //total num of states

    public static int state[][][][][][] = new int[numDirections][numEnemyDistance][numEnemyBearing][numEnergy][hitWall][hitByBullet];


    //converting functions
    public static int convertDirection(double dir){
        double angles = 360 / numDirections;
        double newDirection = dir + angles / 2;
        if (newDirection > 360) newDirection -= 360;
        return (int)(newDirection / angles);


    }

    public static int convertEnemyDistance(double dist){
        int distance = (int)(dist / 30.0); //look this up
        if (distance > numEnemyDistance -1) distance = numEnemyDistance -1;
        return distance;
    }


    public static int convertEnemyBearing(double bearing)
    {
        double PIx2 = Math.PI * 2;
        if (bearing < 0)
            bearing = PIx2 + bearing;
        double angle = PIx2 / numEnemyBearing;
        double newBearing = bearing + angle / 2;
        if (newBearing > PIx2)
            newBearing -= PIx2;
        return (int)(newBearing / angle);
    }

}


import java.io.*;
import robocode.*;

public class LUT {

    int argNumInputs;
    int[] argVariableFloor;
    int[] argVariableCeiling;

    int numActions;
    int numStates;

    public double[][] table;

    public LUT(int argNumInputs,  int numActions, int numStates) {

        this.argNumInputs = argNumInputs;
        //this.argVariableFloor = argVariableFloor;
        //this.argVariableCeiling = argVariableCeiling;
        this.numActions = numActions;
        this.numStates = numStates;
        table = new double[numStates][numActions];
        //initializing table when constructor is called
        initialiseLUT();

    }


    //initialize entries to zero
    void initialiseLUT() {
        table = new double[numStates][numActions];
        for (int i = 0; i < numStates; i++) {
            for (int j = 0; j < numActions; j++) {
                table[i][j] = 0.0;
            }
        }
    }

    //return specified table value
    public double getValue(int state, int action){
        return table[state][action];
    }
    //set specified table value
    public void setValue(int state, int action, double QVal){
        table[state][action] = QVal;
    }


    double getMaxValue(int state){

        double maxValue = -100000;
        for (int j = 0; j < numActions; j++){
            if (table[state][j] > maxValue){
                maxValue = table[state][j];
                }
            }
        return maxValue;
    }

    int getBestAction(int state){
        double maxValue = -100000;
        int bestAction = 0;             //return this action by default
        for (int i = 0; i < table[state].length; i ++){
            if (getValue(state, i) > maxValue){
                maxValue = getValue(state, i);
                bestAction = i;
            }
        }
        return bestAction;
    }

    public void save( File file ) {
        PrintStream saveFile = null;
        System.out.println("hei");
        try {
            saveFile = new PrintStream( new RobocodeFileOutputStream( file ));
        }
        catch (IOException e) {
            System.out.println( "*** Could not create output stream for LUT save file.");
        }

        saveFile.println( numStates );
        int numEntriesSaved = 0;
        for (int i=0; i<numStates; i++) {
            for (int j = 0; j < numActions; j++) {
            	if ((i % 100)==0) System.out.println(table[i][j]);
                saveFile.println(table[i][j]);
                numEntriesSaved++;
            }
        }
        saveFile.close();
        System.out.println ( "--+ Number of LUT table entries saved is " + numEntriesSaved );
    }

    /**
     * Loads the LUT from file
     * Expects that the 1st line match the maxIndex otherwise returns
     */

    public void load( File argFileName ) throws IOException {

        FileInputStream inputFile = new FileInputStream(argFileName);
        BufferedReader inputReader = new BufferedReader(new InputStreamReader(inputFile));

        // Check that the maxIndex matches
        int maxIndexFromFile = Integer.valueOf(inputReader.readLine());
        if (maxIndexFromFile != numStates) {
            System.out.println("*** MaxIndex for file does not match LUT.");
            inputReader.close();
            return;
        }

        // Now load the LUT
        int numEntriesLoaded = 0;

        for (int i = 0; i < numStates; i++) {
            for (int j = 0; j < numActions; j++){
            table[i][j] = Double.parseDouble(inputReader.readLine());

            numEntriesLoaded++;
            }
        }
        inputReader.close();
        System.out.println ( "--+ Number of LUT entries loaded was " + numEntriesLoaded );
    }


}


import java.io.*;


public class Qlearning {

    //hyperparameters
    private double learningRate = 0.1;
    private double discountFactor = 0.9;

    private double epsilon = 0.7; //exploration rate
    private double tau = 0.3; //exploitation rate

    //reward variable
    //double reward;


    //Look-up table
    LUT table;

    //Updated action and state variables
    int prevState = 0;
    int prevAction = 0;


    //FUNCTIONS
    public Qlearning(LUT table){
        this.table = table;
    }

    public void Qlearn(int state, int action, double reward){

        double oldQVal = table.getValue(prevState, prevAction);
        double learnedValue = reward + discountFactor * table.getMaxValue(state);
        double newQVal = (1 - learningRate) * oldQVal + learningRate * learnedValue;

        table.setValue(state, action, newQVal);


        //update state and action
        prevAction = action;
        prevState = state;


    }
    //implement both Softmax and epsilon-greedy action selection
    public int selectActionEGreedy(int state){
        int action;
        double rand = Math.random();
        System.out.println(rand);
        if (epsilon < rand){
        	System.out.println("random");
            action = (int) Math.random() * table.numActions; //verify this
        }
        else{
            action = table.getBestAction(state);

        }

        return action;
    }

    int selectActionSoftMax(int state){
        int action = 0;
        double Qsum = 0;
        double[] Qprob = new double[table.numActions];

        for (int i = 0; i < table.numActions; i ++){
            Qprob[i] = Math.exp(table.getValue(state, i) / tau);
            Qsum += Qprob[i];
        }
        if (Qsum != 0){
            for (int i = 0; i < table.numActions; i ++) {
                Qprob[i] /= Qsum;
            }

        }else{
            action = table.getBestAction(state);
            return action;
        }

        //Look into this
        double cumulativeProb = 0.0;
        double randomNum = Math.random();
        while (randomNum > cumulativeProb && action < table.numActions)
        {
            cumulativeProb += Qprob[action];
            action ++;
        }
        return action - 1;


    }


}


import java.awt.geom.*;

public class Enemy
{
  String name;
  public double bearing;
  public double head;
  public long ctime;
  public double speed;
  public double x, y;
  public double distance;
  public double changehead;
  public double energy = 3;

  public Point2D.Double guessPosition(long when)
  {
    double diff = when - ctime;
    double newY, newX;

    /**if the change in heading is significant, use circular targeting**/
    if (Math.abs(changehead) > 0.00001)
    {
      double radius = speed/changehead;
      double tothead = diff * changehead;
      newY = y + (Math.sin(head + tothead) * radius) - (Math.sin(head) * radius);
      newX = x + (Math.cos(head) * radius) - (Math.cos(head + tothead) * radius);
    }
    /**If the change in heading is insignificant, use linear**/
    else {
      newY = y + Math.cos(head) * speed * diff;
      newX = x + Math.sin(head) * speed * diff;
    }
    return new Point2D.Double(newX, newY);
  }

  public double guessX(long when)
  {
    long diff = when - ctime;
    System.out.println(diff);
    return x+Math.sin(head)*speed*diff;
  }
  public double guessY(long when)
  {
    long diff = when - ctime;
    return y+Math.cos(head)*speed*diff;
  }
}


public class RobotActions {

    //variables defining actions
    public static final int moveAhead = 0;
    public static final int moveBack = 1;
    public static final int aheadTurnLeft = 2;
    public static final int aheadTurnRight = 3;
    public static final int backTurnLeft = 4;
    public static final int backTurnRight = 5;

    //RobotActions(double moveDistance, int numActions){
     //   this.moveDistance = moveDistance;
     //   this.numActions = numActions;
    //}


    //variables defining what to do when action called i.e. distance
    public static final double moveDistance = 100; //set manually for now

    //total number of move-actions
    //int numActions;


}