package rlpark.plugin.rltoys.experiments.parametersweep.offpolicy.internal;

import rlpark.plugin.rltoys.experiments.parametersweep.reinforcementlearning.internal.AbstractEpisodeRewardMonitor;
import rlpark.plugin.rltoys.experiments.runners.AbstractRunner;
import rlpark.plugin.rltoys.experiments.runners.Runner;

/* loaded from: input_file:rlpark/plugin/rltoys/experiments/parametersweep/offpolicy/internal/OffPolicyRewardMonitor.class */
public class OffPolicyRewardMonitor extends AbstractEpisodeRewardMonitor {
    private final Runner runner;
    private int nextEvaluationIndex;
    private final int nbEpisodePerEvaluation;

    public OffPolicyRewardMonitor(Runner runner, int i, int i2, int i3) {
        super("Target", createStartingPoints(i, i2));
        this.nextEvaluationIndex = 0;
        this.runner = runner;
        this.nbEpisodePerEvaluation = i3;
    }

    protected static int[] createStartingPoints(int i, int i2) {
        int[] iArr = new int[i];
        double d = i2 / (i - 1);
        for (int i3 = 0; i3 < iArr.length; i3++) {
            iArr[i3] = (int) (i3 * d);
        }
        iArr[iArr.length - 1] = i2 - 1;
        return iArr;
    }

    public void runEvaluationIFN(int i) {
        if (this.nextEvaluationIndex >= this.starts.length || this.starts[this.nextEvaluationIndex] > i) {
            return;
        }
        for (int i2 = 0; i2 < this.nbEpisodePerEvaluation; i2++) {
            this.runner.runEpisode();
            AbstractRunner.RunnerEvent runnerEvent = this.runner.runnerEvent();
            registerMeasurement(i, runnerEvent.episodeReward, runnerEvent.step.time);
        }
        this.nextEvaluationIndex++;
    }
}
