Advertisement
Guest User

Untitled

a guest
Nov 20th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.56 KB | None | 0 0
  1. /**
  2. * Copyright (c) 2001-2018 Mathew A. Nelson and Robocode contributors
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the Eclipse Public License v1.0
  5. * which accompanies this distribution, and is available at
  6. * http://robocode.sourceforge.net/license/epl-v10.html
  7. */
  8.  
  9.  
  10. import LearningRobots.*;
  11. import robocode.*;
  12. import robocode.HitByBulletEvent;
  13. import robocode.HitRobotEvent;
  14. import robocode.HitWallEvent;
  15.  
  16. import java.awt.Color;
  17. import java.awt.geom.Point2D;
  18.  
  19. //import Enemy;
  20. //import LearningRobots.LUT;
  21. //import LearningRobots.Qlearning;
  22. //import LearningRobots.RobotActions;
  23. //import LearningRobots.RobotStates;
  24. //import robocode.Robot;
  25. import robocode.AdvancedRobot;
  26. import robocode.BulletHitEvent;
  27. import robocode.BulletMissedEvent;
  28. import robocode.Rules;
  29. import robocode.ScannedRobotEvent;
  30. //import robocode.control.snapshot.RobotState;
  31. import robocode.util.Utils;
  32.  
  33.  
  34. /**
  35. * MyFirstRobot - a sample robot by Mathew Nelson.
  36. * <p>
  37. * Moves in a seesaw motion, and spins the gun around at each end.
  38. *
  39. * @author Mathew A. Nelson (original)
  40. */
  41. public class NNBot extends AdvancedRobot {
  42. //Reinforcement learning objects
  43. Qlearning learning;
  44. LUT table;
  45.  
  46. Enemy e;
  47.  
  48.  
  49. //reward variable
  50. double reward;
  51.  
  52.  
  53. //status variables
  54. int hitWall = 0;
  55. int hitByEnemy = 0;
  56.  
  57.  
  58. //###################
  59. //main robot function
  60. //###################
  61. public void run(){
  62. //set colors of robot
  63. setColors(Color.red, Color.yellow, Color.pink);
  64.  
  65. //independent moving of gun and radar
  66. setAdjustGunForRobotTurn(true);
  67. setAdjustRadarForGunTurn(true);
  68.  
  69.  
  70.  
  71.  
  72. e = new Enemy();
  73. table = new LUT(10, 6, 3200);
  74. load("LUtable.txt");
  75. learning = new Qlearning(table);
  76. e.distance = 100000; //fix
  77.  
  78.  
  79. //play game
  80. while(true){
  81. robotMovement();
  82. radarMovement();
  83.  
  84.  
  85.  
  86.  
  87.  
  88. }
  89. }
  90.  
  91. public void robotMovement(){
  92.  
  93. int state = getState();
  94. int action = learning.selectActionEGreedy(state); //choose between greedy and softmax
  95. learning.Qlearn(state, action, reward);
  96. out.println(action);
  97. hitByEnemy = 0;
  98. hitWall = 0;
  99. out.println("State " + state);
  100. switch(action){
  101. case RobotActions.moveAhead:
  102. ahead(RobotActions.moveDistance);
  103. break;
  104. case (RobotActions.moveBack):
  105. back(RobotActions.moveDistance);
  106. break;
  107. case (RobotActions.aheadTurnLeft):
  108. turnLeft(90);
  109. ahead(RobotActions.moveDistance);
  110. break;
  111. case (RobotActions.aheadTurnRight):
  112. turnRight(90);
  113. ahead(RobotActions.moveDistance);
  114. break;
  115. case (RobotActions.backTurnLeft):
  116. turnLeft(90);
  117. back(RobotActions.moveDistance);
  118. break;
  119. case (RobotActions.backTurnRight):
  120. turnRight(90);
  121. back(RobotActions.moveDistance);
  122. break;}
  123. }
  124.  
  125. public void radarMovement() {
  126.  
  127. if (getRadarTurnRemaining() == 0.0)
  128. setTurnRadarRightRadians(Double.POSITIVE_INFINITY);
  129.  
  130.  
  131.  
  132. }
  133. int getState(){
  134.  
  135. int direction = RobotStates.convertDirection(getHeading());
  136. int enemyDist = RobotStates.convertEnemyDistance(e.distance);
  137. int enemyEnergy = (int)e.energy;
  138. int enemyBearing = RobotStates.convertEnemyBearing(e.bearing);
  139. int hitW = hitWall;
  140. int hitByE = hitByEnemy;
  141. int currState = RobotStates.state[direction][enemyDist][enemyEnergy][enemyBearing][hitW][hitByE]; //CHEKCK THIS
  142.  
  143. System.out.println(direction + ", " + enemyDist + ", " + enemyEnergy + ", " + enemyBearing);
  144. return currState;
  145. }
  146.  
  147.  
  148. public void onScannedRobot(ScannedRobotEvent e) {
  149.  
  150. // Absolute angle towards target
  151. double angleToEnemy = getHeadingRadians() + e.getBearingRadians();
  152.  
  153. // Subtract current radar heading to get the turn required to face the enemy, be sure it is normalized
  154. double radarTurn = Utils.normalRelativeAngle( angleToEnemy - getRadarHeadingRadians() );
  155.  
  156. // Distance we want to scan from middle of enemy to either side
  157. // The 36.0 is how many units from the center of the enemy robot it scans.
  158. double extraTurn = Math.min( Math.atan( 36.0 / e.getDistance() ), Rules.RADAR_TURN_RATE_RADIANS );
  159.  
  160. // Adjust the radar turn so it goes that much further in the direction it is going to turn
  161. // Basically if we were going to turn it left, turn it even more left, if right, turn more right.
  162. // This allows us to overshoot our enemy so that we get a good sweep that will not slip.
  163. if (radarTurn < 0)
  164. radarTurn -= extraTurn;
  165. else
  166. radarTurn += extraTurn;
  167.  
  168. //Turn the radar
  169. setTurnRadarRightRadians(radarTurn);
  170.  
  171. //Fire operations -- CircularTargeting method from robowiki:
  172. circularTargeting(e);
  173.  
  174. }
  175. /**
  176. * Fire when we see a robot
  177. */
  178. public void circularTargeting(ScannedRobotEvent e){
  179. double bulletPower = Math.min(3.0, getEnergy());
  180. double myX = getX();
  181. double myY = getY();
  182. double absoluteBearing = getHeadingRadians() + e.getBearingRadians();
  183. double enemyX = getX() + e.getDistance() * Math.sin(absoluteBearing);
  184. double enemyY = getY() + e.getDistance() * Math.cos(absoluteBearing);
  185. double oldEnemyHeading = 0; //correct this
  186. double enemyHeading = e.getHeadingRadians();
  187. double enemyHeadingChange = enemyHeading - oldEnemyHeading;
  188. double enemyVelocity = e.getVelocity();
  189. oldEnemyHeading = enemyHeading;
  190.  
  191. double deltaTime = 0;
  192. double battleFieldHeight = getBattleFieldHeight(),
  193. battleFieldWidth = getBattleFieldWidth();
  194. double predictedX = enemyX, predictedY = enemyY;
  195. while((++deltaTime) * (20.0 - 3.0 * bulletPower) <
  196. Point2D.Double.distance(myX, myY, predictedX, predictedY)){
  197. predictedX += Math.sin(enemyHeading) * enemyVelocity;
  198. predictedY += Math.cos(enemyHeading) * enemyVelocity;
  199. enemyHeading += enemyHeadingChange;
  200. if( predictedX < 18.0
  201. || predictedY < 18.0
  202. || predictedX > battleFieldWidth - 18.0
  203. || predictedY > battleFieldHeight - 18.0){
  204.  
  205. predictedX = Math.min(Math.max(18.0, predictedX),
  206. battleFieldWidth - 18.0);
  207. predictedY = Math.min(Math.max(18.0, predictedY),
  208. battleFieldHeight - 18.0);
  209. break;
  210. }
  211. }
  212. double theta = Utils.normalAbsoluteAngle(Math.atan2(
  213. predictedX - getX(), predictedY - getY()));
  214.  
  215. setTurnRadarRightRadians(Utils.normalRelativeAngle(
  216. absoluteBearing - getRadarHeadingRadians()));
  217. setTurnGunRightRadians(Utils.normalRelativeAngle(
  218. theta - getGunHeadingRadians()));
  219. fire(3); //alter firepower with distance?
  220. }
  221.  
  222. /**
  223. * We were hit! Turn perpendicular to the bullet,
  224. * so our seesaw might avoid a future shot.
  225. */
  226. public void onHitWall(HitWallEvent e){
  227. reward += -2; //decrement reward by some value
  228. hitWall = 1;
  229. }
  230.  
  231. public void onBulletHit(BulletHitEvent e) {
  232.  
  233. //out.println("Bullet Hit: " + change);
  234. reward += e.getBullet().getPower() * 9;
  235. }
  236. public void onBulletMissed(BulletMissedEvent e) {
  237. //out.println("Bullet Missed: " + change);
  238. reward += -e.getBullet().getPower();
  239. }
  240. public void onHitByBullet(HitByBulletEvent e) {
  241. double power = e.getBullet().getPower();
  242. hitByEnemy = 1;
  243. //out.println("Hit By Bullet: " + -(4 * power + 2 * (power - 1)));
  244. reward += -(4 * power + 2 * (power - 1));
  245. }
  246.  
  247. public void onHitRobot(HitRobotEvent e) {
  248. out.println("Hit Robot: " + -6.0);
  249. reward += -6.0;
  250. }
  251.  
  252.  
  253. public void onWin(WinEvent event)
  254. {
  255. //System.out.println(table.getMaxValue(5));
  256.  
  257. table.save(getDataFile("LUtable.txt"));
  258.  
  259. }
  260.  
  261. public void onDeath(DeathEvent event)
  262. {
  263. //System.out.println(table.getMaxValue(5));
  264.  
  265. table.save(getDataFile("LUtable.txt"));
  266.  
  267.  
  268. }
  269. public void load(String file) {
  270. try {
  271. table.load(getDataFile(file));
  272. }catch(Exception e) {
  273. out.println("Caught exception trying to write to file: " + e);
  274. }
  275.  
  276. }
  277.  
  278. }
  279.  
  280.  
  281. public class RobotStates {
  282.  
  283.  
  284. static int numDirections = 4; //number of direction states
  285. static int numEnemyDistance = 10; //number of enemy distance states
  286. static int numEnemyBearing = 4; //number of enemy bearing states
  287. static int numEnergy = 5; //energy levels of robot
  288. static int hitWall = 2; //hit wall or not
  289. static int hitByBullet = 2; //hit by bullet or not
  290.  
  291. static int numOfStates = numDirections * numEnemyDistance * numEnergy * hitWall * hitByBullet; //total num of states
  292.  
  293. public static int state[][][][][][] = new int[numDirections][numEnemyDistance][numEnemyBearing][numEnergy][hitWall][hitByBullet];
  294.  
  295.  
  296.  
  297.  
  298.  
  299. //converting functions
  300. public static int convertDirection(double dir){
  301. double angles = 360 / numDirections;
  302. double newDirection = dir + angles / 2;
  303. if (newDirection > 360) newDirection -= 360;
  304. return (int)(newDirection / angles);
  305.  
  306.  
  307. }
  308.  
  309. public static int convertEnemyDistance(double dist){
  310. int distance = (int)(dist / 30.0); //look this up
  311. if (distance > numEnemyDistance -1) distance = numEnemyDistance -1;
  312. return distance;
  313. }
  314.  
  315.  
  316. public static int convertEnemyBearing(double bearing)
  317. {
  318. double PIx2 = Math.PI * 2;
  319. if (bearing < 0)
  320. bearing = PIx2 + bearing;
  321. double angle = PIx2 / numEnemyBearing;
  322. double newBearing = bearing + angle / 2;
  323. if (newBearing > PIx2)
  324. newBearing -= PIx2;
  325. return (int)(newBearing / angle);
  326. }
  327.  
  328. }
  329.  
  330.  
  331.  
  332.  
  333. import java.io.*;
  334. import robocode.*;
  335.  
  336. public class LUT {
  337.  
  338. int argNumInputs;
  339. int[] argVariableFloor;
  340. int[] argVariableCeiling;
  341.  
  342. int numActions;
  343. int numStates;
  344.  
  345. public double[][] table;
  346.  
  347. public LUT(int argNumInputs, int numActions, int numStates) {
  348.  
  349. this.argNumInputs = argNumInputs;
  350. //this.argVariableFloor = argVariableFloor;
  351. //this.argVariableCeiling = argVariableCeiling;
  352. this.numActions = numActions;
  353. this.numStates = numStates;
  354. table = new double[numStates][numActions];
  355. //initializing table when constructor is called
  356. initialiseLUT();
  357.  
  358. }
  359.  
  360.  
  361. //initialize entries to zero
  362. void initialiseLUT() {
  363. table = new double[numStates][numActions];
  364. for (int i = 0; i < numStates; i++) {
  365. for (int j = 0; j < numActions; j++) {
  366. table[i][j] = 0.0;
  367. }
  368. }
  369. }
  370.  
  371. //return specified table value
  372. public double getValue(int state, int action){
  373. return table[state][action];
  374. }
  375. //set specified table value
  376. public void setValue(int state, int action, double QVal){
  377. table[state][action] = QVal;
  378. }
  379.  
  380.  
  381. double getMaxValue(int state){
  382.  
  383. double maxValue = -100000;
  384. for (int j = 0; j < numActions; j++){
  385. if (table[state][j] > maxValue){
  386. maxValue = table[state][j];
  387. }
  388. }
  389. return maxValue;
  390. }
  391.  
  392. int getBestAction(int state){
  393. double maxValue = -100000;
  394. int bestAction = 0; //return this action by default
  395. for (int i = 0; i < table[state].length; i ++){
  396. if (getValue(state, i) > maxValue){
  397. maxValue = getValue(state, i);
  398. bestAction = i;
  399. }
  400. }
  401. return bestAction;
  402. }
  403.  
  404. public void save( File file ) {
  405. PrintStream saveFile = null;
  406. System.out.println("hei");
  407. try {
  408. saveFile = new PrintStream( new RobocodeFileOutputStream( file ));
  409. }
  410. catch (IOException e) {
  411. System.out.println( "*** Could not create output stream for LUT save file.");
  412. }
  413.  
  414. saveFile.println( numStates );
  415. int numEntriesSaved = 0;
  416. for (int i=0; i<numStates; i++) {
  417. for (int j = 0; j < numActions; j++) {
  418. if ((i % 100)==0) System.out.println(table[i][j]);
  419. saveFile.println(table[i][j]);
  420. numEntriesSaved++;
  421. }
  422. }
  423. saveFile.close();
  424. System.out.println ( "--+ Number of LUT table entries saved is " + numEntriesSaved );
  425. }
  426.  
  427. /**
  428. * Loads the LUT from file
  429. * Expects that the 1st line match the maxIndex otherwise returns
  430. */
  431.  
  432. public void load( File argFileName ) throws IOException {
  433.  
  434. FileInputStream inputFile = new FileInputStream(argFileName);
  435. BufferedReader inputReader = new BufferedReader(new InputStreamReader(inputFile));
  436.  
  437. // Check that the maxIndex matches
  438. int maxIndexFromFile = Integer.valueOf(inputReader.readLine());
  439. if (maxIndexFromFile != numStates) {
  440. System.out.println("*** MaxIndex for file does not match LUT.");
  441. inputReader.close();
  442. return;
  443. }
  444.  
  445. // Now load the LUT
  446. int numEntriesLoaded = 0;
  447.  
  448. for (int i = 0; i < numStates; i++) {
  449. for (int j = 0; j < numActions; j++){
  450. table[i][j] = Double.parseDouble(inputReader.readLine());
  451.  
  452. numEntriesLoaded++;
  453. }
  454. }
  455. inputReader.close();
  456. System.out.println ( "--+ Number of LUT entries loaded was " + numEntriesLoaded );
  457. }
  458.  
  459.  
  460.  
  461.  
  462. }
  463.  
  464.  
  465.  
  466.  
  467. import java.io.*;
  468.  
  469.  
  470.  
  471. public class Qlearning {
  472.  
  473. //hyperparameters
  474. private double learningRate = 0.1;
  475. private double discountFactor = 0.9;
  476.  
  477. private double epsilon = 0.7; //exploration rate
  478. private double tau = 0.3; //exploitation rate
  479.  
  480. //reward variable
  481. //double reward;
  482.  
  483.  
  484. //Look-up table
  485. LUT table;
  486.  
  487. //Updated action and state variables
  488. int prevState = 0;
  489. int prevAction = 0;
  490.  
  491.  
  492.  
  493.  
  494.  
  495. //FUNCTIONS
  496. public Qlearning(LUT table){
  497. this.table = table;
  498. }
  499.  
  500. public void Qlearn(int state, int action, double reward){
  501.  
  502. double oldQVal = table.getValue(prevState, prevAction);
  503. double learnedValue = reward + discountFactor * table.getMaxValue(state);
  504. double newQVal = (1 - learningRate) * oldQVal + learningRate * learnedValue;
  505.  
  506. table.setValue(state, action, newQVal);
  507.  
  508.  
  509. //update state and action
  510. prevAction = action;
  511. prevState = state;
  512.  
  513.  
  514.  
  515.  
  516. }
  517. //implement both Softmax and epsilon-greedy action selection
  518. public int selectActionEGreedy(int state){
  519. int action;
  520. double rand = Math.random();
  521. System.out.println(rand);
  522. if (epsilon < rand){
  523. System.out.println("random");
  524. action = (int) Math.random() * table.numActions; //verify this
  525. }
  526. else{
  527. action = table.getBestAction(state);
  528.  
  529. }
  530.  
  531. return action;
  532. }
  533.  
  534. int selectActionSoftMax(int state){
  535. int action = 0;
  536. double Qsum = 0;
  537. double[] Qprob = new double[table.numActions];
  538.  
  539. for (int i = 0; i < table.numActions; i ++){
  540. Qprob[i] = Math.exp(table.getValue(state, i) / tau);
  541. Qsum += Qprob[i];
  542. }
  543. if (Qsum != 0){
  544. for (int i = 0; i < table.numActions; i ++) {
  545. Qprob[i] /= Qsum;
  546. }
  547.  
  548. }else{
  549. action = table.getBestAction(state);
  550. return action;
  551. }
  552.  
  553. //Look into this
  554. double cumulativeProb = 0.0;
  555. double randomNum = Math.random();
  556. while (randomNum > cumulativeProb && action < table.numActions)
  557. {
  558. cumulativeProb += Qprob[action];
  559. action ++;
  560. }
  561. return action - 1;
  562.  
  563.  
  564.  
  565. }
  566.  
  567.  
  568.  
  569.  
  570.  
  571.  
  572.  
  573. }
  574.  
  575.  
  576.  
  577.  
  578.  
  579.  
  580.  
  581.  
  582.  
  583.  
  584. import java.awt.geom.*;
  585.  
  586. public class Enemy
  587. {
  588. String name;
  589. public double bearing;
  590. public double head;
  591. public long ctime;
  592. public double speed;
  593. public double x, y;
  594. public double distance;
  595. public double changehead;
  596. public double energy = 3;
  597.  
  598. public Point2D.Double guessPosition(long when)
  599. {
  600. double diff = when - ctime;
  601. double newY, newX;
  602.  
  603. /**if the change in heading is significant, use circular targeting**/
  604. if (Math.abs(changehead) > 0.00001)
  605. {
  606. double radius = speed/changehead;
  607. double tothead = diff * changehead;
  608. newY = y + (Math.sin(head + tothead) * radius) - (Math.sin(head) * radius);
  609. newX = x + (Math.cos(head) * radius) - (Math.cos(head + tothead) * radius);
  610. }
  611. /**If the change in heading is insignificant, use linear**/
  612. else {
  613. newY = y + Math.cos(head) * speed * diff;
  614. newX = x + Math.sin(head) * speed * diff;
  615. }
  616. return new Point2D.Double(newX, newY);
  617. }
  618.  
  619. public double guessX(long when)
  620. {
  621. long diff = when - ctime;
  622. System.out.println(diff);
  623. return x+Math.sin(head)*speed*diff;
  624. }
  625. public double guessY(long when)
  626. {
  627. long diff = when - ctime;
  628. return y+Math.cos(head)*speed*diff;
  629. }
  630. }
  631.  
  632.  
  633.  
  634. public class RobotActions {
  635.  
  636. //variables defining actions
  637. public static final int moveAhead = 0;
  638. public static final int moveBack = 1;
  639. public static final int aheadTurnLeft = 2;
  640. public static final int aheadTurnRight = 3;
  641. public static final int backTurnLeft = 4;
  642. public static final int backTurnRight = 5;
  643.  
  644. //RobotActions(double moveDistance, int numActions){
  645. // this.moveDistance = moveDistance;
  646. // this.numActions = numActions;
  647. //}
  648.  
  649.  
  650.  
  651. //variables defining what to do when action called i.e. distance
  652. public static final double moveDistance = 100; //set manually for now
  653.  
  654. //total number of move-actions
  655. //int numActions;
  656.  
  657.  
  658. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement