001 /*
002 * SimuCS - Simulator to use with Classifier Systems
003 * MSc project - Oxford University
004 * by Benoit Isaac - Summer 2005
005 */
006
007 package simuLCS;
008 import java.awt.Color;
009 import java.awt.Graphics2D;
010 import java.awt.geom.Point2D;
011 import java.util.Comparator;
012 import java.util.Iterator;
013 import java.util.Random;
014
015 import javax.swing.JButton;
016
017 import simuLCS.graphics.*;
018
019 /**
020 * Agent with a Behaviour and <b>a Learning Classifier System plugged in</b>.
021 * @author Benoit
022 *
023 */
024 public class AgentClassifierLearning extends AgentClassifier {
025
026 public static final int REAL_BEHAVIOUR = 0;
027 public static final int EXPECTED_BEHAVIOUR = 1;
028 public static final int EXPECTED_BEHAVIOUR_TO_SHOW = 2;
029
030 /**
031 * This behaviour is just a copy of <code>behaviourLearning<code>, with
032 * the classifiers ordered to present them to the user.
033 */
034 protected ClassifierSet behaviourToShow;
035 protected ClassifierSet behaviourTemp;
036
037 protected boolean updateBehaviourTemp = false;
038 protected boolean updateBehaviourToShow = false;
039
040 protected JButton buttonUpdate = null;
041
042 protected Point2D coordGhost;
043 protected Point2D oldcoordGhost;
044 protected boolean isLearning = true;
045 protected boolean isGhostPainted = true;
046
047 private final static double MAX_ERROR = 10;
048 private final static int UPDATE_BEHAVIOUR_TO_SHOW = 100;
049
050 protected int nbOfStepsLearning = 0;
051
052 protected double totalRewardUntilNow = 0;
053
054 protected double totalRewardFromLastPlot = 0;
055 protected int nbOfStepsLearningFromLastPlot = 0;
056
057 protected ClassifierSet behaviourLearning;
058
059 /**
060 * Creates an ACL with a given Template and a given Behaviour
061 * @param gen
062 * @param a
063 * @param name
064 * @param t
065 * @param b
066 */
067 protected AgentClassifierLearning(
068 Random gen,
069 Arena a,
070 String name,
071 Template t,
072 ClassifierSet b) {
073 super(gen,a,name);
074 this.isDetectionLimited = true;
075
076 coordGhost = new Point2D.Double(coord.getX(), coord.getY());
077 oldcoordGhost = new Point2D.Double(coord.getX(), coord.getY());
078
079 this.setColor_ext(Color.RED);
080
081 this.setTemplate(t);
082 behaviour = b;
083 /* behaviour of the ghost : initialised with 20 cl, cover will occur */
084 ZClassifierSet bl = new ZClassifierSet(t, 20);
085 behaviourLearning = bl;
086
087 behaviourToShow = new ClassifierSet(t, true);
088 updateBehaviourToShow();
089
090 }
091
092 /**
093 * Creates an ACL with a default behaviour "Avoid danger".
094 * @param gen
095 * @param a
096 * @param name
097 */
098 public AgentClassifierLearning(Random gen, Arena a, String name) {
099 super(gen, a, name);
100 //
101
102 // Tests TemplateRSPVerySimple
103 Template t = new TemplateRSPVerySimple();
104
105 /* real default behaviour */
106 ClassifierSet b = new ClassifierSet(t);
107
108
109 // cD : avoiding everything that is dangerous
110 Classifier cD =
111 new Classifier("#1", "111", "Avoiding Danger" + this.getId());
112 // cW : avoiding the Wall
113 // Classifier cW = new Classifier("01", "101", "Avoiding Wall" + this.getId());
114 // Classifier cR = new Classifier("11", "111", "Avoiding Robot" + this.getId());
115 // Classifier cND = new Classifier("#0","000","No reaction to Non Dangerous" + this.getId());
116
117 b.addClassifier(cD);
118 // b.addClassifier(cND);
119
120
121
122 this.isDetectionLimited = true;
123
124 coordGhost = new Point2D.Double(coord.getX(), coord.getY());
125 oldcoordGhost = new Point2D.Double(coord.getX(), coord.getY());
126
127 this.setColor_ext(Color.RED);
128
129 this.setTemplate(t);
130 behaviour = b;
131 /* behaviour of the ghost : initialised with 20 cl, cover will occur */
132 // ZClassifierSet bl = new ZClassifierSet(t, 20,new ZClassifier("##","111"));
133 ZClassifierSet bl = new ZClassifierSet(t, 20);
134 behaviourLearning = bl;
135
136 behaviourToShow = new ClassifierSet(t, true);
137 updateBehaviourToShow();
138 }
139
140 /**
141 * From the actual position of the agent, the system will try to guess where
142 * it is going to be at the next time step, by calculating its response according
143 * to its expected behaviour (ie the current behaviour provided by the Learning
144 * System).
145 * @param arena
146 * @param ag
147 * @param nbAgents
148 * @param g
149 * @return the total set of Classifiers that lead this response (Set of all the
150 * ActionSets involved in the process)
151 * @see #behaviourLearning
152 */
153 protected ZClassifierSet moveLearner(
154 Arena arena,
155 Entity[] others,
156 int nbEntities,
157 Graphics2D g) {
158
159 ZClassifierSet totalActionSet =
160 new ZClassifierSet(getTemplate(), false);
161
162 // the Learning system starts from the ACTUAL position of the agent
163 // (before it did its move)
164 // and tries to guess the move it has done
165 double newX = coord.getX();
166 double newY = coord.getY();
167
168 Vector2D sumForces = new Vector2D();
169 Vector2D currentVector;
170
171 /* Putting all the entities together: adding the arena */
172 Entity[] en = new Entity[nbEntities + 1];
173 for (int i = 0; i < nbEntities; i++) {
174 en[i] = others[i];
175 }
176 en[nbEntities] = arena;
177
178 /* Adding the response to the other entities*/
179
180 ZClassifierSet matchSet, actionSet;
181 // TODO : put this in a method moveAccordingTo(behaviour) of AgentClassifier ?
182 for (int i = 0; i < nbEntities + 1; i++) {
183 Entity current = en[i];
184 Vector2D action = new Vector2D();
185 if ((current.getId() != this.getId())
186 && this.canDetect(current)) // OTHER agents
187 {
188 /* get the definition of this agent (situation) */
189 String s = getTemplate().testCondition(current);
190 /* find the classifiers satisfied */
191 matchSet =
192 ((ZClassifierSet) behaviourLearning).getMatchSet(
193 s,
194 (ZClassifierSet) behaviourLearning);
195 if (Config.PRINT_MODE > 7) {
196 System.out.flush();
197 System.out.println(
198 "--[Learner] Size MatchSet for "
199 + s
200 + " :"
201 + matchSet.getSize());
202 // System.out.println(matchSet.toString());
203 System.out.flush();
204 }
205 /* from these, do a selection among the satisfied */
206 // TODO change the selection depending on EXPLORE / EXPLOIT
207 actionSet = (ZClassifierSet) matchSet.getActionSet();
208 /* calculate the vector generated by the action parts of
209 * the classifiers selected
210 */
211 if (Config.PRINT_MODE > 7) {
212 System.out.flush();
213 System.out.println(
214 "--[Learner] Action Set for:"
215 + s
216 + " : size "
217 + actionSet.getSize());
218 System.out.println(actionSet.toString());
219 System.out.flush();
220 }
221 Classifier[] actionSetArray = actionSet.getClassifiers();
222 currentVector =
223 getTemplate().getVectorFromActions(
224 actionSetArray,
225 this,
226 current);
227 if (Config.PRINT_MODE > 8) {
228 System.out.println(
229 "Reaction to:" + current + " - " + currentVector);
230 System.out.println("");
231 }
232
233 totalActionSet.addClassifierSet(actionSet);
234 action.add(currentVector);
235 }
236 sumForces.add(action);
237 }
238
239 Vector2D newMovVector = new Vector2D(sumForces);
240 // The learning system has seen the last Movement, it can use it
241 Vector2D inertia = new Vector2D(lastMovVector);
242 inertia.multiplyByConstant(0.1);
243 newMovVector.add(inertia);
244
245 newX = newX + newMovVector.getX();
246 newY = newY + newMovVector.getY();
247
248 coordGhost.setLocation(newX, newY);
249
250 return totalActionSet;
251 }
252
253 /**
254 * Moves the Ghost and updates the rules with the reward
255 * @param nextCoord the REAL next position (to compare with the expected one)
256 * @param nextMovVector the REAL next Movement Vector
257 * @param arena
258 * @param others
259 * @param nbEntities
260 * @param g
261 */
262 protected void moveAndLearn(
263 Point2D nextCoord,
264 Vector2D nextMovVector,
265 Arena arena,
266 Entity[] others,
267 int nbEntities,
268 Graphics2D g) {
269
270 // increments the number of steps learning has occured
271 nbOfStepsLearning++;
272 nbOfStepsLearningFromLastPlot++;
273
274 // get the new position of the ghost learner
275 // from the real (current) position of the agent
276 // and according to the EXPECTED behaviour
277 ZClassifierSet totalActionSet =
278 moveLearner(arena, others, nbEntities, g);
279
280 // get the reward
281 Point2D expectedNextPosition = coordGhost;
282 Point2D realNextPosition = nextCoord;
283 double r = getReward(realNextPosition, expectedNextPosition);
284
285 // Update the totalReward in order to plot
286 totalRewardUntilNow += r;
287 totalRewardFromLastPlot += r;
288
289 // update the set of Classifiers
290 totalActionSet.updateSet(
291 r,
292 getReward(new Point2D.Double(1, 1), new Point2D.Double(1, 1)));
293
294 // run the Genetic Algorithm
295 totalActionSet.runGA(
296 nbOfStepsLearning,
297 "",
298 (ZClassifierSet) behaviourLearning);
299
300 //print the current Behaviour ordered by fitness
301 if (Config.PRINT_MODE > 5) {
302 System.out.flush();
303 System.out.println(
304 "-- Step : " + nbOfStepsLearning + "-----------");
305 System.out.println("**Real behaviour**");
306 System.out.println(behaviour);
307 System.out.println("-----------------------------------");
308 System.out.flush();
309 } else if (Config.PRINT_MODE > 2 && (nbOfStepsLearning % 100) == 0) {
310 System.out.flush();
311 System.out.println(
312 "-- Step : " + nbOfStepsLearning + "-----------");
313 System.out.println("**Best Classifiers**");
314 ((ZClassifierSet) behaviourLearning).printBestInSet(10);
315 System.out.println("*******");
316 System.out.flush();
317 System.out.println("**Real behaviour**");
318 System.out.println(behaviour);
319 System.out.println("-----------------------------------");
320 System.out.flush();
321 }
322
323 if (updateBehaviourToShow) {
324 // run the update now that no modif will occur
325 updateBehaviourToShow();
326 updateBehaviourToShow = false;
327 if (buttonUpdate != null) {
328 buttonUpdate.setText("Update Behaviour");
329 buttonUpdate.setEnabled(true);
330 G_Panel.repaintCustom();
331 }
332 }
333 }
334
335 /**
336 * Get the reward from the environnement.
337 * @param actual
338 * @param expected
339 * @return
340 */
341 protected double getReward(Point2D real, Point2D expected) {
342 double r;
343 double reward;
344
345 r =
346 Point2D.distance(
347 real.getX(),
348 real.getY(),
349 expected.getX(),
350 expected.getY());
351 if (MAX_ERROR - r > 0) {
352 reward = (MAX_ERROR - r);
353 } else {
354 reward = 0; // no reward
355 }
356 reward = reward * 200;
357 if (Config.PRINT_MODE > 4) {
358 System.out.println("Actual :" + real + " - Expected:" + expected);
359 System.out.println("Reward : " + reward);
360 }
361 return reward;
362 }
363
364 /**
365 * Moves the agent.
366 * Moves the ghost and applies the learning system if <code>isLearning == true</code>.
367 * @see #isLearning
368 */
369 public void move(
370 Arena arena,
371 Entity[] others,
372 int nbEntities,
373 Graphics2D g) {
374
375 // Calculate the new REAL movement Vector
376 // without moving yet the agent (we need to calculate the
377 // EXPECTED position from its actual coordinates)
378 Vector2D newMovVector =
379 getNewMovVectorAccordingTo(
380 behaviour,
381 arena,
382 others,
383 nbEntities,
384 g,
385 null);
386
387 // next coordinates for the Agent
388
389 // newMovVector = Vector2D.getCorrectedVector(newMovVector,arena,this,g);
390
391 double newX = getCoord().getX() + newMovVector.getX();
392 double newY = getCoord().getY() + newMovVector.getY();
393
394 if (isLearning) { // moving the Ghost and applying the Learning
395 moveAndLearn(
396 new Point2D.Double(newX, newY),
397 newMovVector,
398 arena,
399 others,
400 nbEntities,
401 g);
402 }
403
404 // we can now update the position of the Agent
405
406 lastMovVector = newMovVector;
407 coord.setLocation(newX, newY);
408
409 }
410
411 /**
412 * Clear the drawing of the ghost
413 * @param g
414 * @param c
415 */
416 protected void clearGhost(Graphics2D g, Point2D c) {
417 g.setColor(Color.WHITE);
418 int x = (int) (c.getX() - (size / 2));
419 int y = (int) (c.getY() - (size / 2));
420 g.drawOval(x, y, size, size);
421 }
422
423 /**
424 * Paint the ghost (ie the Agent at the expected position)
425 * @param g
426 */
427 protected void paintGhost(Graphics2D g) {
428 // clear the previous drawing
429 clear(g, oldcoordGhost);
430
431 g.setColor(color_int);
432 int x = (int) (coordGhost.getX() - (size / 2));
433 int y = (int) (coordGhost.getY() - (size / 2));
434 g.fillOval(x, y, size, size);
435 g.setColor(Color.GRAY);
436 g.drawOval(x, y, size, size);
437 if (paintName)
438 g.drawString(
439 "" + getName() + "G",
440 x + size / 2 - 6,
441 y + size / 2 + 4);
442
443 // to avoid blinking effect
444 oldcoordGhost =
445 new Point2D.Double(coordGhost.getX(), coordGhost.getY());
446 }
447
448 /**
449 * Create a copy of the current population and order the classifiers
450 * so that they can be shown <b>ranked</b>.
451 *
452 */
453 protected synchronized void updateBehaviourToShow() {
454 // creation of a new ordered Set
455 class DecreaseOrder implements Comparator {
456 public int compare(Object o1, Object o2) {
457 ZClassifier c1 = (ZClassifier) o1;
458 ZClassifier c2 = (ZClassifier) o2;
459 if (c1.equals(c2))
460 return 0;
461 double diff = c1.getStrength() - c2.getStrength();
462 if (diff != 0)
463 return (diff > 0) ? -1 : 1;
464
465 return 1; // by default
466 }
467
468 public boolean equals(ZClassifier c1, ZClassifier c2) {
469 return c1.equals(c2);
470 }
471 }
472
473 synchronized (behaviourToShow) {
474 behaviourToShow =
475 new ClassifierSet(
476 behaviourLearning.getTemplate(),
477 new DecreaseOrder());
478 synchronized (behaviourLearning) {
479 Iterator iter = behaviourLearning.getIterator();
480 ZClassifier current;
481 if (Config.getPRINT_MODE() > 2)
482 System.out.println("Creating the ordered set to show.");
483 while (iter.hasNext()) {
484 behaviourToShow.addClassifier((ZClassifier) iter.next());
485 }
486 }
487
488 }
489
490 }
491
492 /**
493 * Asks for the behaviour to be updated. In order to avoid
494 * Concurrency problems, the update will be done at a thread-safe stage.
495 * @param simuIsRunning
496 * @param b
497 */
498 public synchronized void requestUpdateBehaviourToShow(
499 boolean simuIsRunning,
500 JButton b) {
501 buttonUpdate = b;
502 if (simuIsRunning && isLearning) {
503 // put a request to be executed when it's thread safe
504 updateBehaviourToShow = true;
505 } else {
506 // no learning is occuring now, we can update
507 updateBehaviourToShow();
508 b.setText("Update Behaviour");
509 b.setEnabled(true);
510 G_Panel.repaintCustom();
511 }
512 }
513
514 /**
515 * Removes the drawing of the agent for the old coordinates
516 * and repaint the agent at its new coordinates.
517 * Paints the ghost as well if <code>isGhostPainted==true</code>.
518 * (This avoids a blinking effect)
519 * @see Agent#paint()
520 * @see #isGhostPainted
521 */
522 public void paint(Graphics2D g) {
523
524 // paint the Agent
525 super.paint(g);
526
527 if (isLearning && isGhostPainted)
528 paintGhost(g);
529 }
530
531 public Point2D getCoordGhost() {
532 return coordGhost;
533 }
534
535 public boolean isGhostPainted() {
536 return isGhostPainted;
537 }
538
539 public boolean isLearning() {
540 return isLearning;
541 }
542
543 public int getNbOfStepsLearning() {
544 return nbOfStepsLearning;
545 }
546
547 public Point2D getOldcoordGhost() {
548 return oldcoordGhost;
549 }
550 /**
551 * @param b
552 */
553 public void setGhostPainted(boolean b) {
554 isGhostPainted = b;
555 }
556
557 public void setLearning(boolean b) {
558 isLearning = b;
559 isGhostPainted = b;
560 }
561
562 /**
563 * Returns the given behaviour (real one, expected one, or expected one ordered)
564 */
565 public ClassifierSet getBehaviour(int whichOne) {
566 switch (whichOne) {
567 case EXPECTED_BEHAVIOUR :
568 return behaviourLearning;
569 case EXPECTED_BEHAVIOUR_TO_SHOW :
570 return behaviourToShow;
571 default :
572 return getBehaviour();
573 }
574 }
575
576 private boolean isUpdateBehaviourTemp() {
577 return updateBehaviourTemp;
578 }
579
580 private void setUpdateBehaviourTemp(boolean b) {
581 updateBehaviourTemp = b;
582 }
583
584 public double getTotalRewardUntilNow() {
585 return totalRewardUntilNow;
586 }
587
588 public double getAverageRewardUntilNow() {
589 return getTotalRewardUntilNow() / nbOfStepsLearning;
590 }
591
592 public void setTotalRewardUntilNow(double d) {
593 totalRewardUntilNow = d;
594 }
595
596 /**
597 * Get the average reward from the last plot, and reset the counters
598 * to start again to record
599 * @return
600 */
601 public double getAverageRewardFromLastPlot() {
602 // re-initialize the plotting
603 double averageRewardFromLastPlot =
604 totalRewardFromLastPlot / nbOfStepsLearningFromLastPlot;
605 totalRewardFromLastPlot = 0;
606 nbOfStepsLearningFromLastPlot = 0;
607 return averageRewardFromLastPlot;
608 }
609
610 }