package rlpark.plugin.rltoys.experiments.testing.predictions;

import java.util.Random;
import rlpark.plugin.rltoys.algorithms.predictions.td.OffPolicyTD;
import rlpark.plugin.rltoys.envio.policy.ConstantPolicy;
import rlpark.plugin.rltoys.experiments.testing.results.TestingResult;
import rlpark.plugin.rltoys.math.vector.implementations.PVector;
import rlpark.plugin.rltoys.math.vector.implementations.Vectors;
import rlpark.plugin.rltoys.problems.stategraph.FSGAgentState;
import rlpark.plugin.rltoys.problems.stategraph.FiniteStateGraph;
import rlpark.plugin.rltoys.problems.stategraph.RandomWalk;

/* loaded from: input_file:rlpark/plugin/rltoys/experiments/testing/predictions/RandomWalkOffPolicy.class */
public class RandomWalkOffPolicy {

    /* loaded from: input_file:rlpark/plugin/rltoys/experiments/testing/predictions/RandomWalkOffPolicy$OffPolicyTDFactory.class */
    public interface OffPolicyTDFactory {
        OffPolicyTD newTD(double d, double d2, double d3, int i);
    }

    public static TestingResult<OffPolicyTD> testOffPolicyGTD(int i, double d, double d2, double d3, double d4, double d5, OffPolicyTDFactory offPolicyTDFactory) {
        Random random = new Random(0L);
        ConstantPolicy newPolicy = RandomWalk.newPolicy(random, d5);
        ConstantPolicy newPolicy2 = RandomWalk.newPolicy(random, d4);
        FSGAgentState fSGAgentState = new FSGAgentState(new RandomWalk(newPolicy));
        OffPolicyTD newTD = offPolicyTDFactory.newTD(d2, d3, fSGAgentState.vectorNorm(), fSGAgentState.vectorSize());
        int i2 = 0;
        double[] computeSolution = fSGAgentState.computeSolution(newPolicy2, d3, d2);
        PVector pVector = null;
        if (FiniteStateGraphOnPolicy.distanceToSolution(computeSolution, newTD.weights()) <= d) {
            return new TestingResult<>(false, "Precision is incorrect!", newTD);
        }
        while (FiniteStateGraphOnPolicy.distanceToSolution(computeSolution, newTD.weights()) > d) {
            FiniteStateGraph.StepData step = fSGAgentState.step();
            PVector currentFeatureState = fSGAgentState.currentFeatureState();
            newTD.update(step.a_t != null ? newPolicy2.pi(step.a_t) : 0.0d, step.a_t != null ? newPolicy.pi(step.a_t) : 1.0d, pVector, currentFeatureState, step.r_tp1);
            if (step.s_tp1 == null) {
                i2++;
                if (i2 > i) {
                    return new TestingResult<>(false, "Not learning fast enough. Distance to solution: " + FiniteStateGraphOnPolicy.distanceToSolution(computeSolution, newTD.weights()), newTD);
                }
                if (!Vectors.checkValues(newTD.weights())) {
                    return new TestingResult<>(false, "Weights are wrong", newTD);
                }
            }
            pVector = step.s_tp1 != null ? currentFeatureState.copy() : null;
        }
        return new TestingResult<>(true, null, newTD);
    }
}
