package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy;

import rlpark.plugin.rltoys.agents.offpolicy.OffPolicyAgentEvaluable;
import rlpark.plugin.rltoys.agents.representations.RepresentationFactory;
import rlpark.plugin.rltoys.envio.rl.RLAgent;
import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter;
import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator;
import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.OnPolicyRewardMonitor;
import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.RewardMonitorAverage;
import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.RewardMonitorEpisode;
import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyAgentFactory;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyProblemFactory;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.RLParameters;
import rlpark.plugin.rltoys.experiments.runners.AbstractRunner;
import rlpark.plugin.rltoys.experiments.runners.Runner;
import rlpark.plugin.rltoys.problems.RLProblem;
import zephyr.plugin.core.api.signals.Listener;

/* loaded from: input_file:rlpark/plugin/rltoys/experiments/parametersweep/offpolicy/EpisodeTriggeredContinuousEvaluationOffPolicy.class */
public class EpisodeTriggeredContinuousEvaluationOffPolicy extends AbstractContextOffPolicy {
    private static final long serialVersionUID = -593900122821568271L;
    private final int resetPeriod;

    public EpisodeTriggeredContinuousEvaluationOffPolicy() {
        this(-1);
    }

    public EpisodeTriggeredContinuousEvaluationOffPolicy(int i) {
        this(null, null, null, i);
    }

    private EpisodeTriggeredContinuousEvaluationOffPolicy(OffPolicyProblemFactory offPolicyProblemFactory, RepresentationFactory representationFactory, OffPolicyAgentFactory offPolicyAgentFactory, int i) {
        super(offPolicyProblemFactory, representationFactory, offPolicyAgentFactory);
        this.resetPeriod = i;
    }

    private OnPolicyRewardMonitor createRewardMonitor(String str, int i, int i2, int i3) {
        return i3 == 1 ? new RewardMonitorAverage(str, i, i2) : new RewardMonitorEpisode(str, i, i3);
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyEvaluationContext
    public PerformanceEvaluator connectTargetRewardMonitor(int i, AbstractRunner abstractRunner, Parameters parameters) {
        OffPolicyAgentEvaluable offPolicyAgentEvaluable = (OffPolicyAgentEvaluable) abstractRunner.agent();
        if (RLParameters.nbEpisode(parameters) != 1) {
            throw new RuntimeException("This evaluation does not support multiple episode for the behaviour");
        }
        RLProblem createEvaluationEnvironment = this.environmentFactory.createEvaluationEnvironment(ExperimentCounter.newRandom(i));
        RLAgent createEvaluatedAgent = offPolicyAgentEvaluable.createEvaluatedAgent();
        int nbRewardCheckpoint = RLParameters.nbRewardCheckpoint(parameters);
        int maxEpisodeTimeSteps = this.resetPeriod > 0 ? RLParameters.maxEpisodeTimeSteps(parameters) / nbRewardCheckpoint : 1;
        int maxEpisodeTimeSteps2 = this.resetPeriod > 0 ? this.resetPeriod : RLParameters.maxEpisodeTimeSteps(parameters);
        final Runner runner = new Runner(createEvaluationEnvironment, createEvaluatedAgent, maxEpisodeTimeSteps, this.resetPeriod);
        OnPolicyRewardMonitor createRewardMonitor = createRewardMonitor("Target", nbRewardCheckpoint, maxEpisodeTimeSteps2, maxEpisodeTimeSteps);
        createRewardMonitor.connect(runner);
        abstractRunner.onTimeStep.connect(new Listener<AbstractRunner.RunnerEvent>() { // from class: rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.EpisodeTriggeredContinuousEvaluationOffPolicy.1
            @Override // zephyr.plugin.core.api.signals.Listener
            public void listen(AbstractRunner.RunnerEvent runnerEvent) {
                runner.step();
            }
        });
        return createRewardMonitor;
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.ReinforcementLearningContext
    public AbstractRunner createRunner(int i, Parameters parameters) {
        RLProblem createEnvironment = this.environmentFactory.createEnvironment(ExperimentCounter.newRandom(i));
        return new Runner(createEnvironment, this.agentFactory.createAgent(i, createEnvironment, parameters, this.projectorFactory), RLParameters.nbEpisode(parameters), RLParameters.maxEpisodeTimeSteps(parameters));
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.AbstractContextOffPolicy
    public AbstractContextOffPolicy newContext(OffPolicyProblemFactory offPolicyProblemFactory, RepresentationFactory representationFactory, OffPolicyAgentFactory offPolicyAgentFactory) {
        return new EpisodeTriggeredContinuousEvaluationOffPolicy(offPolicyProblemFactory, representationFactory, offPolicyAgentFactory, this.resetPeriod);
    }
}
