package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy;

import rlpark.plugin.rltoys.agents.offpolicy.OffPolicyAgentEvaluable;
import rlpark.plugin.rltoys.agents.representations.RepresentationFactory;
import rlpark.plugin.rltoys.experiments.helpers.ExperimentCounter;
import rlpark.plugin.rltoys.experiments.parametersweep.interfaces.PerformanceEvaluator;
import rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyRewardMonitor;
import rlpark.plugin.rltoys.experiments.parametersweep.onpolicy.internal.RewardMonitorAverage;
import rlpark.plugin.rltoys.experiments.parametersweep.parameters.Parameters;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyAgentFactory;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.OffPolicyProblemFactory;
import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.RLParameters;
import rlpark.plugin.rltoys.experiments.runners.AbstractRunner;
import rlpark.plugin.rltoys.experiments.runners.Runner;
import rlpark.plugin.rltoys.experiments.runners.RunnerTimeSteps;
import rlpark.plugin.rltoys.problems.RLProblem;
import zephyr.plugin.core.api.internal.monitoring.abstracts.MonitoredDataTraverser;
import zephyr.plugin.core.api.signals.Listener;

/* loaded from: input_file:rlpark/plugin/rltoys/experiments/parametersweep/offpolicy/TimeStepContextOffPolicy.class */
public class TimeStepContextOffPolicy extends AbstractContextOffPolicy {
    private static final long serialVersionUID = -593900122821568271L;

    public TimeStepContextOffPolicy() {
        super(null, null, null);
    }

    private TimeStepContextOffPolicy(OffPolicyProblemFactory offPolicyProblemFactory, RepresentationFactory representationFactory, OffPolicyAgentFactory offPolicyAgentFactory) {
        super(offPolicyProblemFactory, representationFactory, offPolicyAgentFactory);
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.AbstractContextOffPolicy, rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyEvaluationContext
    public PerformanceEvaluator connectBehaviourRewardMonitor(AbstractRunner abstractRunner, Parameters parameters) {
        RewardMonitorAverage rewardMonitorAverage = new RewardMonitorAverage("Behaviour", RLParameters.nbRewardCheckpoint(parameters), RLParameters.totalNumberOfTimeSteps(parameters));
        rewardMonitorAverage.connect(abstractRunner);
        return rewardMonitorAverage;
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal.OffPolicyEvaluationContext
    public PerformanceEvaluator connectTargetRewardMonitor(int i, AbstractRunner abstractRunner, Parameters parameters) {
        final OffPolicyRewardMonitor offPolicyRewardMonitor = new OffPolicyRewardMonitor(new Runner(this.environmentFactory.createEvaluationEnvironment(ExperimentCounter.newRandom(i)), ((OffPolicyAgentEvaluable) abstractRunner.agent()).createEvaluatedAgent(), MonitoredDataTraverser.MonitorEverythingLevel, RLParameters.maxEpisodeTimeSteps(parameters)), RLParameters.nbRewardCheckpoint(parameters), RLParameters.totalNumberOfTimeSteps(parameters), RLParameters.nbEpisodePerEvaluation(parameters));
        offPolicyRewardMonitor.runEvaluationIFN(0);
        abstractRunner.onTimeStep.connect(new Listener<AbstractRunner.RunnerEvent>() { // from class: rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.TimeStepContextOffPolicy.1
            @Override // zephyr.plugin.core.api.signals.Listener
            public void listen(AbstractRunner.RunnerEvent runnerEvent) {
                offPolicyRewardMonitor.runEvaluationIFN(runnerEvent.nbTotalTimeSteps);
            }
        });
        return offPolicyRewardMonitor;
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.ReinforcementLearningContext
    public AbstractRunner createRunner(int i, Parameters parameters) {
        RLProblem createEnvironment = this.environmentFactory.createEnvironment(ExperimentCounter.newRandom(i));
        return new RunnerTimeSteps(createEnvironment, this.agentFactory.createAgent(i, createEnvironment, parameters, this.projectorFactory), RLParameters.maxEpisodeTimeSteps(parameters), RLParameters.totalNumberOfTimeSteps(parameters));
    }

    @Override // rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.AbstractContextOffPolicy
    public AbstractContextOffPolicy newContext(OffPolicyProblemFactory offPolicyProblemFactory, RepresentationFactory representationFactory, OffPolicyAgentFactory offPolicyAgentFactory) {
        return new TimeStepContextOffPolicy(offPolicyProblemFactory, representationFactory, offPolicyAgentFactory);
    }
}
