Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Copyright (c) 2001-2018 Mathew A. Nelson and Robocode contributors
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://robocode.sourceforge.net/license/epl-v10.html
- */
- import LearningRobots.*;
- import robocode.*;
- import robocode.HitByBulletEvent;
- import robocode.HitRobotEvent;
- import robocode.HitWallEvent;
- import java.awt.Color;
- import java.awt.geom.Point2D;
- //import Enemy;
- //import LearningRobots.LUT;
- //import LearningRobots.Qlearning;
- //import LearningRobots.RobotActions;
- //import LearningRobots.RobotStates;
- //import robocode.Robot;
- import robocode.AdvancedRobot;
- import robocode.BulletHitEvent;
- import robocode.BulletMissedEvent;
- import robocode.Rules;
- import robocode.ScannedRobotEvent;
- //import robocode.control.snapshot.RobotState;
- import robocode.util.Utils;
- /**
- * MyFirstRobot - a sample robot by Mathew Nelson.
- * <p>
- * Moves in a seesaw motion, and spins the gun around at each end.
- *
- * @author Mathew A. Nelson (original)
- */
- public class NNBot extends AdvancedRobot {
- //Reinforcement learning objects
- Qlearning learning;
- LUT table;
- Enemy e;
- //reward variable
- double reward;
- //status variables
- int hitWall = 0;
- int hitByEnemy = 0;
- //###################
- //main robot function
- //###################
- public void run(){
- //set colors of robot
- setColors(Color.red, Color.yellow, Color.pink);
- //independent moving of gun and radar
- setAdjustGunForRobotTurn(true);
- setAdjustRadarForGunTurn(true);
- e = new Enemy();
- table = new LUT(10, 6, 3200);
- load("LUtable.txt");
- learning = new Qlearning(table);
- e.distance = 100000; //fix
- //play game
- while(true){
- robotMovement();
- radarMovement();
- }
- }
- public void robotMovement(){
- int state = getState();
- int action = learning.selectActionEGreedy(state); //choose between greedy and softmax
- learning.Qlearn(state, action, reward);
- out.println(action);
- hitByEnemy = 0;
- hitWall = 0;
- out.println("State " + state);
- switch(action){
- case RobotActions.moveAhead:
- ahead(RobotActions.moveDistance);
- break;
- case (RobotActions.moveBack):
- back(RobotActions.moveDistance);
- break;
- case (RobotActions.aheadTurnLeft):
- turnLeft(90);
- ahead(RobotActions.moveDistance);
- break;
- case (RobotActions.aheadTurnRight):
- turnRight(90);
- ahead(RobotActions.moveDistance);
- break;
- case (RobotActions.backTurnLeft):
- turnLeft(90);
- back(RobotActions.moveDistance);
- break;
- case (RobotActions.backTurnRight):
- turnRight(90);
- back(RobotActions.moveDistance);
- break;}
- }
- public void radarMovement() {
- if (getRadarTurnRemaining() == 0.0)
- setTurnRadarRightRadians(Double.POSITIVE_INFINITY);
- }
- int getState(){
- int direction = RobotStates.convertDirection(getHeading());
- int enemyDist = RobotStates.convertEnemyDistance(e.distance);
- int enemyEnergy = (int)e.energy;
- int enemyBearing = RobotStates.convertEnemyBearing(e.bearing);
- int hitW = hitWall;
- int hitByE = hitByEnemy;
- int currState = RobotStates.state[direction][enemyDist][enemyEnergy][enemyBearing][hitW][hitByE]; //CHEKCK THIS
- System.out.println(direction + ", " + enemyDist + ", " + enemyEnergy + ", " + enemyBearing);
- return currState;
- }
- public void onScannedRobot(ScannedRobotEvent e) {
- // Absolute angle towards target
- double angleToEnemy = getHeadingRadians() + e.getBearingRadians();
- // Subtract current radar heading to get the turn required to face the enemy, be sure it is normalized
- double radarTurn = Utils.normalRelativeAngle( angleToEnemy - getRadarHeadingRadians() );
- // Distance we want to scan from middle of enemy to either side
- // The 36.0 is how many units from the center of the enemy robot it scans.
- double extraTurn = Math.min( Math.atan( 36.0 / e.getDistance() ), Rules.RADAR_TURN_RATE_RADIANS );
- // Adjust the radar turn so it goes that much further in the direction it is going to turn
- // Basically if we were going to turn it left, turn it even more left, if right, turn more right.
- // This allows us to overshoot our enemy so that we get a good sweep that will not slip.
- if (radarTurn < 0)
- radarTurn -= extraTurn;
- else
- radarTurn += extraTurn;
- //Turn the radar
- setTurnRadarRightRadians(radarTurn);
- //Fire operations -- CircularTargeting method from robowiki:
- circularTargeting(e);
- }
- /**
- * Fire when we see a robot
- */
- public void circularTargeting(ScannedRobotEvent e){
- double bulletPower = Math.min(3.0, getEnergy());
- double myX = getX();
- double myY = getY();
- double absoluteBearing = getHeadingRadians() + e.getBearingRadians();
- double enemyX = getX() + e.getDistance() * Math.sin(absoluteBearing);
- double enemyY = getY() + e.getDistance() * Math.cos(absoluteBearing);
- double oldEnemyHeading = 0; //correct this
- double enemyHeading = e.getHeadingRadians();
- double enemyHeadingChange = enemyHeading - oldEnemyHeading;
- double enemyVelocity = e.getVelocity();
- oldEnemyHeading = enemyHeading;
- double deltaTime = 0;
- double battleFieldHeight = getBattleFieldHeight(),
- battleFieldWidth = getBattleFieldWidth();
- double predictedX = enemyX, predictedY = enemyY;
- while((++deltaTime) * (20.0 - 3.0 * bulletPower) <
- Point2D.Double.distance(myX, myY, predictedX, predictedY)){
- predictedX += Math.sin(enemyHeading) * enemyVelocity;
- predictedY += Math.cos(enemyHeading) * enemyVelocity;
- enemyHeading += enemyHeadingChange;
- if( predictedX < 18.0
- || predictedY < 18.0
- || predictedX > battleFieldWidth - 18.0
- || predictedY > battleFieldHeight - 18.0){
- predictedX = Math.min(Math.max(18.0, predictedX),
- battleFieldWidth - 18.0);
- predictedY = Math.min(Math.max(18.0, predictedY),
- battleFieldHeight - 18.0);
- break;
- }
- }
- double theta = Utils.normalAbsoluteAngle(Math.atan2(
- predictedX - getX(), predictedY - getY()));
- setTurnRadarRightRadians(Utils.normalRelativeAngle(
- absoluteBearing - getRadarHeadingRadians()));
- setTurnGunRightRadians(Utils.normalRelativeAngle(
- theta - getGunHeadingRadians()));
- fire(3); //alter firepower with distance?
- }
- /**
- * We were hit! Turn perpendicular to the bullet,
- * so our seesaw might avoid a future shot.
- */
- public void onHitWall(HitWallEvent e){
- reward += -2; //decrement reward by some value
- hitWall = 1;
- }
- public void onBulletHit(BulletHitEvent e) {
- //out.println("Bullet Hit: " + change);
- reward += e.getBullet().getPower() * 9;
- }
- public void onBulletMissed(BulletMissedEvent e) {
- //out.println("Bullet Missed: " + change);
- reward += -e.getBullet().getPower();
- }
- public void onHitByBullet(HitByBulletEvent e) {
- double power = e.getBullet().getPower();
- hitByEnemy = 1;
- //out.println("Hit By Bullet: " + -(4 * power + 2 * (power - 1)));
- reward += -(4 * power + 2 * (power - 1));
- }
- public void onHitRobot(HitRobotEvent e) {
- out.println("Hit Robot: " + -6.0);
- reward += -6.0;
- }
- public void onWin(WinEvent event)
- {
- //System.out.println(table.getMaxValue(5));
- table.save(getDataFile("LUtable.txt"));
- }
- public void onDeath(DeathEvent event)
- {
- //System.out.println(table.getMaxValue(5));
- table.save(getDataFile("LUtable.txt"));
- }
- public void load(String file) {
- try {
- table.load(getDataFile(file));
- }catch(Exception e) {
- out.println("Caught exception trying to write to file: " + e);
- }
- }
- }
- public class RobotStates {
- static int numDirections = 4; //number of direction states
- static int numEnemyDistance = 10; //number of enemy distance states
- static int numEnemyBearing = 4; //number of enemy bearing states
- static int numEnergy = 5; //energy levels of robot
- static int hitWall = 2; //hit wall or not
- static int hitByBullet = 2; //hit by bullet or not
- static int numOfStates = numDirections * numEnemyDistance * numEnergy * hitWall * hitByBullet; //total num of states
- public static int state[][][][][][] = new int[numDirections][numEnemyDistance][numEnemyBearing][numEnergy][hitWall][hitByBullet];
- //converting functions
- public static int convertDirection(double dir){
- double angles = 360 / numDirections;
- double newDirection = dir + angles / 2;
- if (newDirection > 360) newDirection -= 360;
- return (int)(newDirection / angles);
- }
- public static int convertEnemyDistance(double dist){
- int distance = (int)(dist / 30.0); //look this up
- if (distance > numEnemyDistance -1) distance = numEnemyDistance -1;
- return distance;
- }
- public static int convertEnemyBearing(double bearing)
- {
- double PIx2 = Math.PI * 2;
- if (bearing < 0)
- bearing = PIx2 + bearing;
- double angle = PIx2 / numEnemyBearing;
- double newBearing = bearing + angle / 2;
- if (newBearing > PIx2)
- newBearing -= PIx2;
- return (int)(newBearing / angle);
- }
- }
- import java.io.*;
- import robocode.*;
- public class LUT {
- int argNumInputs;
- int[] argVariableFloor;
- int[] argVariableCeiling;
- int numActions;
- int numStates;
- public double[][] table;
- public LUT(int argNumInputs, int numActions, int numStates) {
- this.argNumInputs = argNumInputs;
- //this.argVariableFloor = argVariableFloor;
- //this.argVariableCeiling = argVariableCeiling;
- this.numActions = numActions;
- this.numStates = numStates;
- table = new double[numStates][numActions];
- //initializing table when constructor is called
- initialiseLUT();
- }
- //initialize entries to zero
- void initialiseLUT() {
- table = new double[numStates][numActions];
- for (int i = 0; i < numStates; i++) {
- for (int j = 0; j < numActions; j++) {
- table[i][j] = 0.0;
- }
- }
- }
- //return specified table value
- public double getValue(int state, int action){
- return table[state][action];
- }
- //set specified table value
- public void setValue(int state, int action, double QVal){
- table[state][action] = QVal;
- }
- double getMaxValue(int state){
- double maxValue = -100000;
- for (int j = 0; j < numActions; j++){
- if (table[state][j] > maxValue){
- maxValue = table[state][j];
- }
- }
- return maxValue;
- }
- int getBestAction(int state){
- double maxValue = -100000;
- int bestAction = 0; //return this action by default
- for (int i = 0; i < table[state].length; i ++){
- if (getValue(state, i) > maxValue){
- maxValue = getValue(state, i);
- bestAction = i;
- }
- }
- return bestAction;
- }
- public void save( File file ) {
- PrintStream saveFile = null;
- System.out.println("hei");
- try {
- saveFile = new PrintStream( new RobocodeFileOutputStream( file ));
- }
- catch (IOException e) {
- System.out.println( "*** Could not create output stream for LUT save file.");
- }
- saveFile.println( numStates );
- int numEntriesSaved = 0;
- for (int i=0; i<numStates; i++) {
- for (int j = 0; j < numActions; j++) {
- if ((i % 100)==0) System.out.println(table[i][j]);
- saveFile.println(table[i][j]);
- numEntriesSaved++;
- }
- }
- saveFile.close();
- System.out.println ( "--+ Number of LUT table entries saved is " + numEntriesSaved );
- }
- /**
- * Loads the LUT from file
- * Expects that the 1st line match the maxIndex otherwise returns
- */
- public void load( File argFileName ) throws IOException {
- FileInputStream inputFile = new FileInputStream(argFileName);
- BufferedReader inputReader = new BufferedReader(new InputStreamReader(inputFile));
- // Check that the maxIndex matches
- int maxIndexFromFile = Integer.valueOf(inputReader.readLine());
- if (maxIndexFromFile != numStates) {
- System.out.println("*** MaxIndex for file does not match LUT.");
- inputReader.close();
- return;
- }
- // Now load the LUT
- int numEntriesLoaded = 0;
- for (int i = 0; i < numStates; i++) {
- for (int j = 0; j < numActions; j++){
- table[i][j] = Double.parseDouble(inputReader.readLine());
- numEntriesLoaded++;
- }
- }
- inputReader.close();
- System.out.println ( "--+ Number of LUT entries loaded was " + numEntriesLoaded );
- }
- }
- import java.io.*;
- public class Qlearning {
- //hyperparameters
- private double learningRate = 0.1;
- private double discountFactor = 0.9;
- private double epsilon = 0.7; //exploration rate
- private double tau = 0.3; //exploitation rate
- //reward variable
- //double reward;
- //Look-up table
- LUT table;
- //Updated action and state variables
- int prevState = 0;
- int prevAction = 0;
- //FUNCTIONS
- public Qlearning(LUT table){
- this.table = table;
- }
- public void Qlearn(int state, int action, double reward){
- double oldQVal = table.getValue(prevState, prevAction);
- double learnedValue = reward + discountFactor * table.getMaxValue(state);
- double newQVal = (1 - learningRate) * oldQVal + learningRate * learnedValue;
- table.setValue(state, action, newQVal);
- //update state and action
- prevAction = action;
- prevState = state;
- }
- //implement both Softmax and epsilon-greedy action selection
- public int selectActionEGreedy(int state){
- int action;
- double rand = Math.random();
- System.out.println(rand);
- if (epsilon < rand){
- System.out.println("random");
- action = (int) Math.random() * table.numActions; //verify this
- }
- else{
- action = table.getBestAction(state);
- }
- return action;
- }
- int selectActionSoftMax(int state){
- int action = 0;
- double Qsum = 0;
- double[] Qprob = new double[table.numActions];
- for (int i = 0; i < table.numActions; i ++){
- Qprob[i] = Math.exp(table.getValue(state, i) / tau);
- Qsum += Qprob[i];
- }
- if (Qsum != 0){
- for (int i = 0; i < table.numActions; i ++) {
- Qprob[i] /= Qsum;
- }
- }else{
- action = table.getBestAction(state);
- return action;
- }
- //Look into this
- double cumulativeProb = 0.0;
- double randomNum = Math.random();
- while (randomNum > cumulativeProb && action < table.numActions)
- {
- cumulativeProb += Qprob[action];
- action ++;
- }
- return action - 1;
- }
- }
- import java.awt.geom.*;
- public class Enemy
- {
- String name;
- public double bearing;
- public double head;
- public long ctime;
- public double speed;
- public double x, y;
- public double distance;
- public double changehead;
- public double energy = 3;
- public Point2D.Double guessPosition(long when)
- {
- double diff = when - ctime;
- double newY, newX;
- /**if the change in heading is significant, use circular targeting**/
- if (Math.abs(changehead) > 0.00001)
- {
- double radius = speed/changehead;
- double tothead = diff * changehead;
- newY = y + (Math.sin(head + tothead) * radius) - (Math.sin(head) * radius);
- newX = x + (Math.cos(head) * radius) - (Math.cos(head + tothead) * radius);
- }
- /**If the change in heading is insignificant, use linear**/
- else {
- newY = y + Math.cos(head) * speed * diff;
- newX = x + Math.sin(head) * speed * diff;
- }
- return new Point2D.Double(newX, newY);
- }
- public double guessX(long when)
- {
- long diff = when - ctime;
- System.out.println(diff);
- return x+Math.sin(head)*speed*diff;
- }
- public double guessY(long when)
- {
- long diff = when - ctime;
- return y+Math.cos(head)*speed*diff;
- }
- }
- public class RobotActions {
- //variables defining actions
- public static final int moveAhead = 0;
- public static final int moveBack = 1;
- public static final int aheadTurnLeft = 2;
- public static final int aheadTurnRight = 3;
- public static final int backTurnLeft = 4;
- public static final int backTurnRight = 5;
- //RobotActions(double moveDistance, int numActions){
- // this.moveDistance = moveDistance;
- // this.numActions = numActions;
- //}
- //variables defining what to do when action called i.e. distance
- public static final double moveDistance = 100; //set manually for now
- //total number of move-actions
- //int numActions;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement