package rlpark.plugin.rltoys.horde.demons;

import rlpark.plugin.rltoys.algorithms.LinearLearner;
import rlpark.plugin.rltoys.algorithms.functions.Predictor;
import rlpark.plugin.rltoys.algorithms.predictions.td.GTDLambda;
import rlpark.plugin.rltoys.algorithms.predictions.td.GVF;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.envio.policy.Policy;
import rlpark.plugin.rltoys.horde.functions.ConstantGamma;
import rlpark.plugin.rltoys.horde.functions.ConstantOutcomeFunction;
import rlpark.plugin.rltoys.horde.functions.GammaFunction;
import rlpark.plugin.rltoys.horde.functions.OutcomeFunction;
import rlpark.plugin.rltoys.horde.functions.RewardFunction;
import rlpark.plugin.rltoys.math.vector.RealVector;
import zephyr.plugin.core.api.labels.Labeled;
import zephyr.plugin.core.api.labels.Labels;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;

/* loaded from: input_file:rlpark/plugin/rltoys/horde/demons/PredictionOffPolicyDemon.class */
public class PredictionOffPolicyDemon implements Demon, Labeled {
    private static final long serialVersionUID = 2103050204892958885L;
    private final RewardFunction rewardFunction;

    @Monitor
    private final GVF gtd;

    @Monitor
    protected final Policy target;
    protected final Policy behaviour;

    @Monitor
    private double rho_t;
    private final OutcomeFunction outcomeFunction;
    private final GammaFunction gammaFunction;

    public PredictionOffPolicyDemon(Policy policy, Policy policy2, GTDLambda gTDLambda, RewardFunction rewardFunction) {
        this(policy, policy2, gTDLambda, rewardFunction, new ConstantGamma(gTDLambda.gamma()), new ConstantOutcomeFunction(0.0d));
    }

    public PredictionOffPolicyDemon(Policy policy, Policy policy2, GVF gvf, RewardFunction rewardFunction, GammaFunction gammaFunction, OutcomeFunction outcomeFunction) {
        this.rewardFunction = rewardFunction;
        this.gammaFunction = gammaFunction;
        this.outcomeFunction = outcomeFunction;
        this.gtd = gvf;
        this.target = policy;
        this.behaviour = policy2;
    }

    @Override // rlpark.plugin.rltoys.horde.demons.Demon
    public void update(RealVector realVector, Action action, RealVector realVector2) {
        this.rho_t = action != null ? this.target.pi(action) / this.behaviour.pi(action) : 0.0d;
        this.gtd.update(1.0d, 1.0d, realVector, realVector2, this.rewardFunction.reward(), this.gammaFunction.gamma(), this.outcomeFunction.outcome());
    }

    public RewardFunction rewardFunction() {
        return this.rewardFunction;
    }

    public Predictor predicter() {
        return this.gtd;
    }

    public Policy targetPolicy() {
        return this.target;
    }

    @Override // rlpark.plugin.rltoys.horde.demons.Demon
    public LinearLearner learner() {
        return this.gtd;
    }

    @Override // zephyr.plugin.core.api.labels.Labeled
    public String label() {
        return "offpolicyDemon" + Labels.label(this.rewardFunction);
    }
}
