package rlpark.plugin.rltoys.algorithms.control.acting;

import java.util.Random;
import rlpark.plugin.rltoys.algorithms.functions.Predictor;
import rlpark.plugin.rltoys.algorithms.functions.stateactions.StateToStateAction;
import rlpark.plugin.rltoys.envio.actions.Action;
import rlpark.plugin.rltoys.utils.Utils;

/* loaded from: input_file:rlpark/plugin/rltoys/algorithms/control/acting/EpsilonGreedy.class */
public class EpsilonGreedy extends Greedy {
    private static final long serialVersionUID = -2618584767896890494L;
    private final double epsilon;
    private final Random random;

    public EpsilonGreedy(Random random, Action[] actionArr, StateToStateAction stateToStateAction, Predictor predictor, double d) {
        super(predictor, actionArr, stateToStateAction);
        this.epsilon = d;
        this.random = random;
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.acting.Greedy, rlpark.plugin.rltoys.envio.policy.Policy
    public Action sampleAction() {
        return ((double) this.random.nextFloat()) < this.epsilon ? (Action) Utils.choose(this.random, this.actions) : super.bestAction();
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.acting.Greedy, rlpark.plugin.rltoys.envio.policy.Policy
    public double pi(Action action) {
        return (action == this.bestAction ? 1.0d - this.epsilon : 0.0d) + (this.epsilon / this.actions.length);
    }

    @Override // rlpark.plugin.rltoys.algorithms.control.acting.Greedy, rlpark.plugin.rltoys.envio.policy.PolicyPrototype
    public EpsilonGreedy duplicate() {
        return new EpsilonGreedy(this.random, this.actions, (StateToStateAction) Utils.clone(this.toStateAction), this.predictor, this.epsilon);
    }
}
