package rlpark.plugin.rltoys.algorithms.control.actorcritic.onpolicy;

import rlpark.plugin.rltoys.algorithms.predictions.td.OnPolicyTD;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.math.vector.RealVector;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;

@Monitor
/* loaded from: input_file:rlpark/plugin/rltoys/algorithms/control/actorcritic/onpolicy/AverageRewardActorCritic.class */
public class AverageRewardActorCritic extends AbstractActorCritic {
    private static final long serialVersionUID = 3772938582043052714L;
    protected double averageReward;
    private final double alpha_r;

    public AverageRewardActorCritic(double d, OnPolicyTD onPolicyTD, Actor actor) {
        super(onPolicyTD, actor);
        this.averageReward = 0.0d;
        this.alpha_r = d;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.actorcritic.onpolicy.AbstractActorCritic
    protected double updateCritic(RealVector realVector, RealVector realVector2, double d) {
        double update = this.critic.update(realVector, realVector2, d - this.averageReward);
        this.averageReward += this.alpha_r * update;
        return update;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.actorcritic.onpolicy.AbstractActorCritic
    protected void updateActor(RealVector realVector, Action action, double d) {
        this.actor.update(realVector, action, d);
    }

    public double currentAverage() {
        return this.averageReward;
    }
}
