package rlpark.plugin.rltoys.algorithms.control.sarsa;

import rlpark.plugin.rltoys.algorithms.control.ControlLearner;
import rlpark.plugin.rltoys.algorithms.functions.stateactions.StateToStateAction;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.envio.policy.Policies;
import rlpark.plugin.rltoys.envio.policy.Policy;
import rlpark.plugin.rltoys.math.vector.RealVector;
import rlpark.plugin.rltoys.math.vector.implementations.Vectors;
import zephyr.plugin.core.api.monitoring.annotations.Monitor;

/* loaded from: input_file:rlpark/plugin/rltoys/algorithms/control/sarsa/SarsaControl.class */
public class SarsaControl implements ControlLearner {
    private static final long serialVersionUID = 2848271828496458933L;

    @Monitor
    protected final Sarsa sarsa;

    @Monitor
    protected final Policy acting;
    protected final StateToStateAction toStateAction;
    protected RealVector xa_t = null;

    public SarsaControl(Policy policy, StateToStateAction stateToStateAction, Sarsa sarsa) {
        this.sarsa = sarsa;
        this.toStateAction = stateToStateAction;
        this.acting = policy;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.ControlLearner
    public Action step(RealVector realVector, Action action, RealVector realVector2, double d) {
        Action decide = Policies.decide(this.acting, realVector2);
        RealVector stateAction = this.toStateAction.stateAction(realVector2, decide);
        this.sarsa.update(realVector != null ? this.xa_t : null, stateAction, d);
        this.xa_t = Vectors.bufferedCopy(stateAction, this.xa_t);
        return decide;
    }

    public Policy acting() {
        return this.acting;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.Control
    public Action proposeAction(RealVector realVector) {
        return Policies.decide(this.acting, realVector);
    }
}
