package rlpark.plugin.rltoys.algorithms.control.actorcritic.offpolicy;

import rlpark.plugin.rltoys.algorithms.functions.policydistributions.PolicyDistribution;
import rlpark.plugin.rltoys.algorithms.traces.Traces;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.math.vector.RealVector;
import rlpark.plugin.rltoys.math.vector.implementations.PVector;

/* loaded from: input_file:rlpark/plugin/rltoys/algorithms/control/actorcritic/offpolicy/ActorLambdaOffPolicy.class */
public class ActorLambdaOffPolicy extends AbstractActorOffPolicy {
    protected final Traces[] e_u;
    public final double lambda;
    protected final double alpha_u;
    private double rho_t;

    public ActorLambdaOffPolicy(double d, double d2, PolicyDistribution policyDistribution, double d3, int i, Traces traces) {
        this(policyDistribution.createParameters(i), d, d2, policyDistribution, d3, traces);
    }

    public ActorLambdaOffPolicy(PVector[] pVectorArr, double d, double d2, PolicyDistribution policyDistribution, double d3, Traces traces) {
        super(pVectorArr, policyDistribution);
        this.alpha_u = d3;
        this.lambda = d;
        this.e_u = new Traces[this.u.length];
        for (int i = 0; i < this.e_u.length; i++) {
            this.e_u[i] = traces.newTraces(this.u[i].size);
        }
    }

    protected void updateEligibilityTraces(double d, Action action, double d2) {
        RealVector[] computeGradLog = this.targetPolicy.computeGradLog(action);
        for (int i = 0; i < this.u.length; i++) {
            this.e_u[i].update(this.lambda, computeGradLog[i]);
            this.e_u[i].vect().mapMultiplyToSelf(d);
        }
    }

    protected void updatePolicyParameters(double d, Action action, double d2) {
        for (int i = 0; i < this.u.length; i++) {
            this.u[i].addToSelf(this.alpha_u * d2, (RealVector) this.e_u[i].vect());
        }
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.actorcritic.offpolicy.AbstractActorOffPolicy
    protected void updateParameters(double d, double d2, RealVector realVector, Action action, double d3) {
        this.targetPolicy.update(realVector);
        this.rho_t = d / d2;
        updateEligibilityTraces(this.rho_t, action, d3);
        updatePolicyParameters(this.rho_t, action, d3);
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.actorcritic.offpolicy.AbstractActorOffPolicy
    protected void initEpisode() {
        for (Traces traces : this.e_u) {
            traces.clear();
        }
    }

    public Traces[] eligibilities() {
        return this.e_u;
    }
}
