001    /*
002     * SimuCS - Simulator to use with Classifier Systems 
003     * MSc project - Oxford University 
004     * by Benoit Isaac - Summer 2005
005     */
006    
007    package simuLCS;
008    import java.awt.Color;
009    import java.awt.Graphics2D;
010    import java.awt.geom.Point2D;
011    import java.util.Comparator;
012    import java.util.Iterator;
013    import java.util.Random;
014    
015    import javax.swing.JButton;
016    
017    import simuLCS.graphics.*;
018    
019    /**
020     * Agent with a Behaviour and <b>a Learning Classifier System plugged in</b>.
021     * @author Benoit
022     * 
023     */
024    public class AgentClassifierLearning extends AgentClassifier {
025    
026            public static final int REAL_BEHAVIOUR = 0;
027            public static final int EXPECTED_BEHAVIOUR = 1;
028            public static final int EXPECTED_BEHAVIOUR_TO_SHOW = 2;
029    
030            /**
031             * This behaviour is just a copy of <code>behaviourLearning<code>, with
032             * the classifiers ordered to present them to the user.
033             */
034            protected ClassifierSet behaviourToShow;
035            protected ClassifierSet behaviourTemp;
036    
037            protected boolean updateBehaviourTemp = false;
038            protected boolean updateBehaviourToShow = false;
039    
040            protected JButton buttonUpdate = null;
041    
042            protected Point2D coordGhost;
043            protected Point2D oldcoordGhost;
044            protected boolean isLearning = true;
045            protected boolean isGhostPainted = true;
046    
047            private final static double MAX_ERROR = 10;
048            private final static int UPDATE_BEHAVIOUR_TO_SHOW = 100;
049    
050            protected int nbOfStepsLearning = 0;
051    
052            protected double totalRewardUntilNow = 0;
053    
054            protected double totalRewardFromLastPlot = 0;
055            protected int nbOfStepsLearningFromLastPlot = 0;
056    
057            protected ClassifierSet behaviourLearning;
058    
059            /**
060             * Creates an ACL with a given Template and a given Behaviour
061             * @param gen
062             * @param a
063             * @param name
064             * @param t
065             * @param b
066             */
067            protected AgentClassifierLearning(
068                    Random gen,
069                    Arena a,
070                    String name,
071                    Template t,
072                    ClassifierSet b) {
073                    super(gen,a,name);
074                    this.isDetectionLimited = true;
075    
076                    coordGhost = new Point2D.Double(coord.getX(), coord.getY());
077                    oldcoordGhost = new Point2D.Double(coord.getX(), coord.getY());
078    
079                    this.setColor_ext(Color.RED);
080    
081                    this.setTemplate(t);
082                    behaviour = b;
083                    /* behaviour of the ghost : initialised with 20 cl, cover will occur */
084                    ZClassifierSet bl = new ZClassifierSet(t, 20);
085                    behaviourLearning = bl;
086    
087                    behaviourToShow = new ClassifierSet(t, true);
088                    updateBehaviourToShow();
089    
090            }
091    
092            /**
093             * Creates an ACL with a default behaviour "Avoid danger".
094             * @param gen
095             * @param a
096             * @param name
097             */
098            public AgentClassifierLearning(Random gen, Arena a, String name) {
099                    super(gen, a, name);
100                    //              
101    
102                    // Tests TemplateRSPVerySimple
103                    Template t = new TemplateRSPVerySimple();
104    
105                    /* real default behaviour  */
106                    ClassifierSet b = new ClassifierSet(t);
107    
108                    
109                    //      cD : avoiding everything that is dangerous 
110                    Classifier cD =
111                            new Classifier("#1", "111", "Avoiding Danger" + this.getId());
112                    //      cW : avoiding the Wall
113                    //              Classifier cW = new Classifier("01", "101", "Avoiding Wall" + this.getId());
114                    //              Classifier cR = new Classifier("11", "111", "Avoiding Robot" + this.getId());
115                    //              Classifier cND = new Classifier("#0","000","No reaction to Non Dangerous" + this.getId());
116    
117                    b.addClassifier(cD);
118                    //              b.addClassifier(cND);
119    
120                    
121    
122                    this.isDetectionLimited = true;
123    
124                    coordGhost = new Point2D.Double(coord.getX(), coord.getY());
125                    oldcoordGhost = new Point2D.Double(coord.getX(), coord.getY());
126    
127                    this.setColor_ext(Color.RED);
128    
129                    this.setTemplate(t);
130                    behaviour = b;
131                    /* behaviour of the ghost : initialised with 20 cl, cover will occur */
132                    //              ZClassifierSet bl = new ZClassifierSet(t, 20,new ZClassifier("##","111"));
133                    ZClassifierSet bl = new ZClassifierSet(t, 20);
134                    behaviourLearning = bl;
135    
136                    behaviourToShow = new ClassifierSet(t, true);
137                    updateBehaviourToShow();
138            }
139    
140            /**
141             * From the actual position of the agent, the system will try to guess where
142             * it is going to be at the next time step, by calculating its response according
143             * to its expected behaviour (ie the current behaviour provided by the Learning
144             * System).
145             * @param arena
146             * @param ag
147             * @param nbAgents
148             * @param g
149             * @return the total set of Classifiers that lead this response (Set of all the 
150             * ActionSets involved in the process)
151             * @see #behaviourLearning
152             */
153            protected ZClassifierSet moveLearner(
154                    Arena arena,
155                    Entity[] others,
156                    int nbEntities,
157                    Graphics2D g) {
158    
159                    ZClassifierSet totalActionSet =
160                            new ZClassifierSet(getTemplate(), false);
161    
162                    // the Learning system starts from the ACTUAL position of the agent
163                    // (before it did its move)
164                    // and tries to guess the move it has done
165                    double newX = coord.getX();
166                    double newY = coord.getY();
167    
168                    Vector2D sumForces = new Vector2D();
169                    Vector2D currentVector;
170    
171                    /* Putting all the entities together: adding the arena */
172                    Entity[] en = new Entity[nbEntities + 1];
173                    for (int i = 0; i < nbEntities; i++) {
174                            en[i] = others[i];
175                    }
176                    en[nbEntities] = arena;
177    
178                    /* Adding the response to the other entities*/
179    
180                    ZClassifierSet matchSet, actionSet;
181                    // TODO : put this in a method moveAccordingTo(behaviour) of AgentClassifier ?
182                    for (int i = 0; i < nbEntities + 1; i++) {
183                            Entity current = en[i];
184                            Vector2D action = new Vector2D();
185                            if ((current.getId() != this.getId())
186                                    && this.canDetect(current)) // OTHER agents
187                                    {
188                                    /* get the definition of this agent (situation) */
189                                    String s = getTemplate().testCondition(current);
190                                    /* find the classifiers satisfied */
191                                    matchSet =
192                                            ((ZClassifierSet) behaviourLearning).getMatchSet(
193                                                    s,
194                                                    (ZClassifierSet) behaviourLearning);
195                                    if (Config.PRINT_MODE > 7) {
196                                            System.out.flush();
197                                            System.out.println(
198                                                    "--[Learner] Size MatchSet for "
199                                                            + s
200                                                            + " :"
201                                                            + matchSet.getSize());
202                                            //                                      System.out.println(matchSet.toString());
203                                            System.out.flush();
204                                    }
205                                    /* from these, do a selection among the satisfied */
206                                    // TODO change the selection depending on EXPLORE / EXPLOIT
207                                    actionSet = (ZClassifierSet) matchSet.getActionSet();
208                                    /* calculate the vector generated by the action parts of
209                                     * the classifiers selected
210                                     */
211                                    if (Config.PRINT_MODE > 7) {
212                                            System.out.flush();
213                                            System.out.println(
214                                                    "--[Learner] Action Set for:"
215                                                            + s
216                                                            + " : size "
217                                                            + actionSet.getSize());
218                                            System.out.println(actionSet.toString());
219                                            System.out.flush();
220                                    }
221                                    Classifier[] actionSetArray = actionSet.getClassifiers();
222                                    currentVector =
223                                            getTemplate().getVectorFromActions(
224                                                    actionSetArray,
225                                                    this,
226                                                    current);
227                                    if (Config.PRINT_MODE > 8) {
228                                            System.out.println(
229                                                    "Reaction to:" + current + " - " + currentVector);
230                                            System.out.println("");
231                                    }
232    
233                                    totalActionSet.addClassifierSet(actionSet);
234                                    action.add(currentVector);
235                            }
236                            sumForces.add(action);
237                    }
238    
239                    Vector2D newMovVector = new Vector2D(sumForces);
240                    // The learning system has seen the last Movement, it can use it
241                    Vector2D inertia = new Vector2D(lastMovVector);
242                    inertia.multiplyByConstant(0.1);
243                    newMovVector.add(inertia);
244    
245                    newX = newX + newMovVector.getX();
246                    newY = newY + newMovVector.getY();
247    
248                    coordGhost.setLocation(newX, newY);
249    
250                    return totalActionSet;
251            }
252    
253            /**
254             * Moves the Ghost and updates the rules with the reward
255             * @param nextCoord the REAL next position (to compare with the expected one)
256             * @param nextMovVector the REAL next Movement Vector
257             * @param arena
258             * @param others
259             * @param nbEntities
260             * @param g
261             */
262            protected void moveAndLearn(
263                    Point2D nextCoord,
264                    Vector2D nextMovVector,
265                    Arena arena,
266                    Entity[] others,
267                    int nbEntities,
268                    Graphics2D g) {
269    
270                    // increments the number of steps learning has occured
271                    nbOfStepsLearning++;
272                    nbOfStepsLearningFromLastPlot++;
273    
274                    // get the new position of the ghost learner
275                    // from the real (current) position of the agent
276                    // and according to the EXPECTED behaviour
277                    ZClassifierSet totalActionSet =
278                            moveLearner(arena, others, nbEntities, g);
279    
280                    // get the reward
281                    Point2D expectedNextPosition = coordGhost;
282                    Point2D realNextPosition = nextCoord;
283                    double r = getReward(realNextPosition, expectedNextPosition);
284    
285                    // Update the totalReward in order to plot
286                    totalRewardUntilNow += r;
287                    totalRewardFromLastPlot += r;
288    
289                    // update the set of Classifiers 
290                    totalActionSet.updateSet(
291                            r,
292                            getReward(new Point2D.Double(1, 1), new Point2D.Double(1, 1)));
293    
294                    // run the Genetic Algorithm
295                    totalActionSet.runGA(
296                            nbOfStepsLearning,
297                            "",
298                            (ZClassifierSet) behaviourLearning);
299    
300                    //print the current Behaviour ordered by fitness
301                    if (Config.PRINT_MODE > 5) {
302                            System.out.flush();
303                            System.out.println(
304                                    "-- Step : " + nbOfStepsLearning + "-----------");
305                            System.out.println("**Real behaviour**");
306                            System.out.println(behaviour);
307                            System.out.println("-----------------------------------");
308                            System.out.flush();
309                    } else if (Config.PRINT_MODE > 2 && (nbOfStepsLearning % 100) == 0) {
310                            System.out.flush();
311                            System.out.println(
312                                    "-- Step : " + nbOfStepsLearning + "-----------");
313                            System.out.println("**Best Classifiers**");
314                            ((ZClassifierSet) behaviourLearning).printBestInSet(10);
315                            System.out.println("*******");
316                            System.out.flush();
317                            System.out.println("**Real behaviour**");
318                            System.out.println(behaviour);
319                            System.out.println("-----------------------------------");
320                            System.out.flush();
321                    }
322    
323                    if (updateBehaviourToShow) {
324                            // run the update now that no modif will occur
325                            updateBehaviourToShow();
326                            updateBehaviourToShow = false;
327                            if (buttonUpdate != null) {
328                                    buttonUpdate.setText("Update Behaviour");
329                                    buttonUpdate.setEnabled(true);
330                                    G_Panel.repaintCustom();
331                            }
332                    }
333            }
334    
335            /**
336             * Get the reward from the environnement.
337             * @param actual
338             * @param expected
339             * @return
340             */
341            protected double getReward(Point2D real, Point2D expected) {
342                    double r;
343                    double reward;
344    
345                    r =
346                            Point2D.distance(
347                                    real.getX(),
348                                    real.getY(),
349                                    expected.getX(),
350                                    expected.getY());
351                    if (MAX_ERROR - r > 0) {
352                            reward = (MAX_ERROR - r);
353                    } else {
354                            reward = 0; // no reward        
355                    }
356                    reward = reward * 200;
357                    if (Config.PRINT_MODE > 4) {
358                            System.out.println("Actual :" + real + " - Expected:" + expected);
359                            System.out.println("Reward : " + reward);
360                    }
361                    return reward;
362            }
363    
364            /**
365             * Moves the agent.
366             * Moves the ghost and applies the learning system if <code>isLearning == true</code>.
367             * @see #isLearning
368             */
369            public void move(
370                    Arena arena,
371                    Entity[] others,
372                    int nbEntities,
373                    Graphics2D g) {
374    
375                    // Calculate the new REAL movement Vector
376                    // without moving yet the agent (we need to calculate the
377                    // EXPECTED position from its actual coordinates)
378                    Vector2D newMovVector =
379                            getNewMovVectorAccordingTo(
380                                    behaviour,
381                                    arena,
382                                    others,
383                                    nbEntities,
384                                    g,
385                                    null);
386    
387                    // next coordinates for the Agent
388            
389                    //              newMovVector = Vector2D.getCorrectedVector(newMovVector,arena,this,g);
390    
391                    double newX = getCoord().getX() + newMovVector.getX();
392                    double newY = getCoord().getY() + newMovVector.getY();
393    
394                    if (isLearning) { // moving the Ghost and applying the Learning
395                            moveAndLearn(
396                                    new Point2D.Double(newX, newY),
397                                    newMovVector,
398                                    arena,
399                                    others,
400                                    nbEntities,
401                                    g);
402                    }
403    
404                    // we can now update the position of the Agent
405    
406                    lastMovVector = newMovVector;
407                    coord.setLocation(newX, newY);
408    
409            }
410    
411            /**
412             * Clear the drawing of the ghost
413             * @param g
414             * @param c
415             */
416            protected void clearGhost(Graphics2D g, Point2D c) {
417                    g.setColor(Color.WHITE);
418                    int x = (int) (c.getX() - (size / 2));
419                    int y = (int) (c.getY() - (size / 2));
420                    g.drawOval(x, y, size, size);
421            }
422    
423            /**
424             * Paint the ghost (ie the Agent at the expected position)
425             * @param g
426             */
427            protected void paintGhost(Graphics2D g) {
428                    //              clear the previous drawing
429                    clear(g, oldcoordGhost);
430    
431                    g.setColor(color_int);
432                    int x = (int) (coordGhost.getX() - (size / 2));
433                    int y = (int) (coordGhost.getY() - (size / 2));
434                    g.fillOval(x, y, size, size);
435                    g.setColor(Color.GRAY);
436                    g.drawOval(x, y, size, size);
437                    if (paintName)
438                            g.drawString(
439                                    "" + getName() + "G",
440                                    x + size / 2 - 6,
441                                    y + size / 2 + 4);
442    
443                    // to avoid blinking effect
444                    oldcoordGhost =
445                            new Point2D.Double(coordGhost.getX(), coordGhost.getY());
446            }
447    
448            /**
449             * Create a copy of the current population and order the classifiers
450             * so that they can be shown <b>ranked</b>.
451             *
452             */
453            protected synchronized void updateBehaviourToShow() {
454                    // creation of a new ordered Set
455                    class DecreaseOrder implements Comparator {
456                            public int compare(Object o1, Object o2) {
457                                    ZClassifier c1 = (ZClassifier) o1;
458                                    ZClassifier c2 = (ZClassifier) o2;
459                                    if (c1.equals(c2))
460                                            return 0;
461                                    double diff = c1.getStrength() - c2.getStrength();
462                                    if (diff != 0)
463                                            return (diff > 0) ? -1 : 1;
464    
465                                    return 1; // by default
466                            }
467    
468                            public boolean equals(ZClassifier c1, ZClassifier c2) {
469                                    return c1.equals(c2);
470                            }
471                    }
472    
473                    synchronized (behaviourToShow) {
474                            behaviourToShow =
475                                    new ClassifierSet(
476                                            behaviourLearning.getTemplate(),
477                                            new DecreaseOrder());
478                            synchronized (behaviourLearning) {
479                                    Iterator iter = behaviourLearning.getIterator();
480                                    ZClassifier current;
481                                    if (Config.getPRINT_MODE() > 2)
482                                            System.out.println("Creating the ordered set to show.");
483                                    while (iter.hasNext()) {
484                                            behaviourToShow.addClassifier((ZClassifier) iter.next());
485                                    }
486                            }
487    
488                    }
489    
490            }
491    
492            /**
493             * Asks for the behaviour to be updated. In order to avoid
494             * Concurrency problems, the update will be done at a thread-safe stage.
495             * @param simuIsRunning
496             * @param b
497             */
498            public synchronized void requestUpdateBehaviourToShow(
499                    boolean simuIsRunning,
500                    JButton b) {
501                    buttonUpdate = b;
502                    if (simuIsRunning && isLearning) {
503                            // put a request to be executed when it's thread safe
504                            updateBehaviourToShow = true;
505                    } else {
506                            // no learning is occuring now, we can update
507                            updateBehaviourToShow();
508                            b.setText("Update Behaviour");
509                            b.setEnabled(true);
510                            G_Panel.repaintCustom();
511                    }
512            }
513            
514            /**
515             * Removes the drawing of the agent for the old coordinates
516             * and repaint the agent at its new coordinates.
517             * Paints the ghost as well if <code>isGhostPainted==true</code>.
518             * (This avoids a blinking effect)
519             * @see Agent#paint()
520             * @see #isGhostPainted
521             */
522            public void paint(Graphics2D g) {
523    
524                    // paint the Agent
525                    super.paint(g);
526    
527                    if (isLearning && isGhostPainted)
528                            paintGhost(g);
529            }
530    
531                    public Point2D getCoordGhost() {
532                    return coordGhost;
533            }
534    
535            public boolean isGhostPainted() {
536                    return isGhostPainted;
537            }
538    
539            public boolean isLearning() {
540                    return isLearning;
541            }
542    
543            public int getNbOfStepsLearning() {
544                    return nbOfStepsLearning;
545            }
546    
547            public Point2D getOldcoordGhost() {
548                    return oldcoordGhost;
549            }
550            /**
551             * @param b
552             */
553            public void setGhostPainted(boolean b) {
554                    isGhostPainted = b;
555            }
556    
557            public void setLearning(boolean b) {
558                    isLearning = b;
559                    isGhostPainted = b;
560            }
561    
562            /**
563             * Returns the given behaviour (real one, expected one, or expected one ordered)
564             */
565            public ClassifierSet getBehaviour(int whichOne) {
566                    switch (whichOne) {
567                            case EXPECTED_BEHAVIOUR :
568                                    return behaviourLearning;
569                            case EXPECTED_BEHAVIOUR_TO_SHOW :
570                                    return behaviourToShow;
571                            default :
572                                    return getBehaviour();
573                    }
574            }
575    
576            private boolean isUpdateBehaviourTemp() {
577                    return updateBehaviourTemp;
578            }
579    
580            private void setUpdateBehaviourTemp(boolean b) {
581                    updateBehaviourTemp = b;
582            }
583    
584            public double getTotalRewardUntilNow() {
585                    return totalRewardUntilNow;
586            }
587    
588            public double getAverageRewardUntilNow() {
589                    return getTotalRewardUntilNow() / nbOfStepsLearning;
590            }
591    
592            public void setTotalRewardUntilNow(double d) {
593                    totalRewardUntilNow = d;
594            }
595    
596            /**
597             * Get the average reward from the last plot, and reset the counters
598             * to start again to record
599             * @return
600             */
601            public double getAverageRewardFromLastPlot() {
602                    // re-initialize the plotting
603                    double averageRewardFromLastPlot =
604                            totalRewardFromLastPlot / nbOfStepsLearningFromLastPlot;
605                    totalRewardFromLastPlot = 0;
606                    nbOfStepsLearningFromLastPlot = 0;
607                    return averageRewardFromLastPlot;
608            }
609    
610    }