package rlpark.example.demos.learning;

import java.util.Random;
import rlpark.plugin.rltoys.agents.rl.LearnerAgentFA;
import rlpark.plugin.rltoys.algorithms.control.ControlLearner;
import rlpark.plugin.rltoys.algorithms.control.acting.EpsilonGreedy;
import rlpark.plugin.rltoys.algorithms.control.qlearning.QLearning;
import rlpark.plugin.rltoys.algorithms.control.qlearning.QLearningControl;
import rlpark.plugin.rltoys.algorithms.functions.stateactions.TabularAction;
import rlpark.plugin.rltoys.algorithms.functions.states.Projector;
import rlpark.plugin.rltoys.algorithms.traces.RTraces;
import rlpark.plugin.rltoys.experiments.runners.AbstractRunner;
import rlpark.plugin.rltoys.experiments.runners.Runner;
import rlpark.plugin.rltoys.math.vector.implementations.PVector;
import rlpark.plugin.rltoys.problems.mazes.Maze;
import rlpark.plugin.rltoys.problems.mazes.MazeValueFunction;
import rlpark.plugin.rltoys.problems.mazes.Mazes;
import zephyr.plugin.core.api.Zephyr;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;
import zephyr.plugin.core.api.signals.Listener;
import zephyr.plugin.core.api.synchronization.Clock;

@Monitor
/* loaded from: input_file:rlpark/example/demos/learning/QLearningMaze.class */
public class QLearningMaze implements Runnable {
    final MazeValueFunction mazeValueFunction;
    private final ControlLearner control;
    private final LearnerAgentFA agent;
    private final Maze problem = Mazes.createBookMaze();
    private final Clock clock = new Clock("QLearningMaze");
    private final Projector projector = this.problem.getMarkovProjector();
    private final PVector occupancy = new PVector(this.projector.vectorSize());

    public QLearningMaze() {
        TabularAction tabularAction = new TabularAction(this.problem.actions(), this.projector.vectorNorm(), this.projector.vectorSize());
        QLearning qLearning = new QLearning(this.problem.actions(), 0.15d / this.projector.vectorNorm(), 1.0d, 0.6d, tabularAction, new RTraces());
        this.control = new QLearningControl(new EpsilonGreedy(new Random(0L), this.problem.actions(), tabularAction, qLearning, 0.3d), qLearning);
        this.agent = new LearnerAgentFA(this.control, this.projector);
        this.mazeValueFunction = new MazeValueFunction(this.problem, qLearning, tabularAction, qLearning.greedy());
        Zephyr.advertise(this.clock, this);
    }

    @Override // java.lang.Runnable
    public void run() {
        Runner runner = new Runner(this.problem, this.agent);
        runner.onEpisodeEnd.connect(new Listener<AbstractRunner.RunnerEvent>() { // from class: rlpark.example.demos.learning.QLearningMaze.1
            @Override // zephyr.plugin.core.api.signals.Listener
            public void listen(AbstractRunner.RunnerEvent runnerEvent) {
                System.out.println(String.format("Episode %d: %d steps", Integer.valueOf(runnerEvent.nbEpisodeDone), Long.valueOf(runnerEvent.step.time)));
            }
        });
        while (this.clock.tick()) {
            runner.step();
            this.occupancy.addToSelf(this.agent.lastState());
        }
    }

    public static void main(String[] strArr) {
        new QLearningMaze().run();
    }
}
