package rlpark.example.demos.learning;

import java.util.Random;
import rlpark.plugin.rltoys.agents.functions.FunctionProjected2D;
import rlpark.plugin.rltoys.agents.functions.ValueFunction2D;
import rlpark.plugin.rltoys.algorithms.control.acting.EpsilonGreedy;
import rlpark.plugin.rltoys.algorithms.control.sarsa.Sarsa;
import rlpark.plugin.rltoys.algorithms.control.sarsa.SarsaControl;
import rlpark.plugin.rltoys.algorithms.functions.stateactions.TabularAction;
import rlpark.plugin.rltoys.algorithms.representations.tilescoding.TileCodersNoHashing;
import rlpark.plugin.rltoys.algorithms.traces.RTraces;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.envio.rl.TRStep;
import rlpark.plugin.rltoys.math.vector.BinaryVector;
import rlpark.plugin.rltoys.math.vector.RealVector;
import rlpark.plugin.rltoys.math.vector.implementations.Vectors;
import rlpark.plugin.rltoys.problems.mountaincar.MountainCar;
import zephyr.plugin.core.api.Zephyr;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;
import zephyr.plugin.core.api.synchronization.Clock;

@Monitor
/* loaded from: input_file:rlpark/example/demos/learning/SarsaMountainCar.class */
public class SarsaMountainCar implements Runnable {
    final FunctionProjected2D valueFunctionDisplay;
    private final SarsaControl control;
    private final Clock clock = new Clock("SarsaMountainCar");
    private final MountainCar problem = new MountainCar(null);
    private final TileCodersNoHashing projector = new TileCodersNoHashing(this.problem.getObservationRanges());

    public SarsaMountainCar() {
        this.projector.addFullTilings(10, 10);
        this.projector.includeActiveFeature();
        TabularAction tabularAction = new TabularAction(this.problem.actions(), this.projector.vectorNorm(), this.projector.vectorSize());
        tabularAction.includeActiveFeature();
        Sarsa sarsa = new Sarsa(0.15d / this.projector.vectorNorm(), 0.99d, 0.3d, tabularAction.vectorSize(), new RTraces());
        this.control = new SarsaControl(new EpsilonGreedy(new Random(0L), this.problem.actions(), tabularAction, sarsa, 0.01d), tabularAction, sarsa);
        this.valueFunctionDisplay = new ValueFunction2D(this.projector, this.problem, sarsa);
        Zephyr.advertise(this.clock, this);
    }

    @Override // java.lang.Runnable
    public void run() {
        TRStep initialize = this.problem.initialize();
        int i = 0;
        RealVector realVector = null;
        while (this.clock.tick()) {
            BinaryVector project = this.projector.project(initialize.o_tp1);
            Action step = this.control.step(realVector, initialize.a_t, project, initialize.r_tp1);
            realVector = Vectors.bufferedCopy(project, realVector);
            if (initialize.isEpisodeEnding()) {
                System.out.println(String.format("Episode %d: %d steps", Integer.valueOf(i), Long.valueOf(initialize.time)));
                initialize = this.problem.initialize();
                realVector = null;
                i++;
            } else {
                initialize = this.problem.step(step);
            }
        }
    }

    public static void main(String[] strArr) {
        new SarsaMountainCar().run();
    }
}
