package rlpark.plugin.rltoys.algorithms.control.gq;

import rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner;
import rlpark.plugin.rltoys.algorithms.functions.stateactions.StateToStateAction;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.envio.policy.Policies;
import rlpark.plugin.rltoys.envio.policy.Policy;
import rlpark.plugin.rltoys.math.vector.MutableVector;
import rlpark.plugin.rltoys.math.vector.RealVector;
import rlpark.plugin.rltoys.math.vector.implementations.PVector;
import rlpark.plugin.rltoys.math.vector.implementations.Vectors;
import rlpark.plugin.rltoys.math.vector.pool.VectorPool;
import rlpark.plugin.rltoys.math.vector.pool.VectorPools;
import rlpark.plugin.rltoys.utils.Prototype;
import rlpark.plugin.rltoys.utils.Utils;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;

/* loaded from: input_file:rlpark/plugin/rltoys/algorithms/control/gq/GreedyGQ.class */
public class GreedyGQ implements OffPolicyLearner {
    private static final long serialVersionUID = 7017521530598253457L;

    @Monitor
    protected final GQ gq;

    @Monitor
    protected final Policy target;
    protected final Policy behaviour;
    protected final StateToStateAction toStateAction;

    @Monitor
    public double rho_t;
    private final Action[] actions;
    private double delta_t;
    private final RealVector prototype;
    static final /* synthetic */ boolean $assertionsDisabled;

    public GreedyGQ(GQ gq, Action[] actionArr, StateToStateAction stateToStateAction, Policy policy, Policy policy2) {
        this.gq = gq;
        this.target = policy;
        this.behaviour = policy2;
        this.toStateAction = stateToStateAction;
        this.actions = actionArr;
        this.prototype = (RealVector) ((Prototype) gq.e).prototype();
    }

    public double update(RealVector realVector, Action action, double d, double d2, double d3, RealVector realVector2, Action action2) {
        this.rho_t = 0.0d;
        if (action != null && !Vectors.isNull(realVector)) {
            this.target.update(realVector);
            this.behaviour.update(realVector);
            this.rho_t = this.target.pi(action) / this.behaviour.pi(action);
        }
        if (!$assertionsDisabled && !Utils.checkValue(this.rho_t)) {
            throw new AssertionError();
        }
        VectorPool pool = VectorPools.pool(this.prototype, this.gq.v.size);
        MutableVector newVector = pool.newVector();
        if (!Vectors.isNull(realVector) && !Vectors.isNull(realVector2)) {
            this.target.update(realVector2);
            for (Action action3 : this.actions) {
                double pi = this.target.pi(action3);
                if (pi != 0.0d) {
                    newVector.addToSelf(pi, this.toStateAction.stateAction(realVector2, action3));
                }
            }
        }
        this.delta_t = this.gq.update(!Vectors.isNull(realVector) ? this.toStateAction.stateAction(realVector, action) : null, this.rho_t, d, newVector, d3);
        pool.releaseAll();
        return this.delta_t;
    }

    public PVector theta() {
        return this.gq.v;
    }

    public double gamma() {
        return 1.0d - this.gq.beta_tp1;
    }

    public GQ gq() {
        return this.gq;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner
    public Policy targetPolicy() {
        return this.target;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner
    public void learn(RealVector realVector, Action action, RealVector realVector2, Action action2, double d) {
        update(realVector, action, d, gamma(), 0.0d, realVector2, action2);
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.Control
    public Action proposeAction(RealVector realVector) {
        return Policies.decide(this.target, realVector);
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.OffPolicyLearner
    public GQ predictor() {
        return this.gq;
    }

    static {
        $assertionsDisabled = !GreedyGQ.class.desiredAssertionStatus();
    }
}
