Source code for coax.wrappers._box_spaces

import gymnasium
import numpy as onp
from scipy.special import expit as sigmoid

from .._base.mixins import AddOrigToInfoDictMixin


__all__ = (
    'BoxActionsToReals',
    'BoxActionsToDiscrete',
)


[docs]class BoxActionsToReals(gymnasium.Wrapper, AddOrigToInfoDictMixin):
    r"""

    This wrapper decompactifies a :class:`Box <gymnasium.spaces.Box>` action space to the reals.
    This is required in order to be able to use a Gaussian policy.

    In practice, the wrapped environment expects the input action
    :math:`a_\text{real}\in\mathbb{R}^n` and then it compactifies it back to a Box of the right
    size:

    .. math::

        a_\text{box}\ =\ \text{low} + (\text{high}-\text{low})\times\text{sigmoid}(a_\text{real})

    Technically, the transformed space is still a Box, but that's only because we assume that the
    values lie between large but finite bounds, :math:`a_\text{real}\in[-10^{15}, 10^{15}]^n`.

    """
    def __init__(self, env):
        super().__init__(env)
        if not isinstance(self.action_space, gymnasium.spaces.Box):
            raise NotImplementedError("BoxActionsToReals is only implemented for Box action spaces")

        shape_flat = onp.prod(self.env.action_space.shape),
        self.action_space = gymnasium.spaces.Box(
            low=onp.full(shape_flat, -1e15, self.env.action_space.dtype),
            high=onp.full(shape_flat, 1e15, self.env.action_space.dtype))

[docs]    def step(self, a):
        assert self.action_space.contains(a)
        self._a_orig = self._compactify(a)
        s_next, r, done, truncated, info = super().step(self._a_orig)
        self._add_a_orig_to_info_dict(info)
        return s_next, r, done, truncated, info

    def _compactify(self, action):
        hi, lo = self.env.action_space.high, self.env.action_space.low
        action = onp.clip(action, -1e15, 1e15)
        action = onp.reshape(action, self.env.action_space.shape)
        return lo + (hi - lo) * sigmoid(action)


[docs]class BoxActionsToDiscrete(gymnasium.Wrapper, AddOrigToInfoDictMixin):
    r"""

    This wrapper splits a :class:`Box <gymnasium.spaces.Box>` action space into bins. The resulting
    action space is either :class:`Discrete <gymnasium.spaces.Discrete>` or :class:`MultiDiscrete
    <gymnasium.spaces.MultiDiscrete>`, depending on the shape of the original action space.

    Parameters
    ----------
    num_bins : int or tuple of ints

        The number of bins to use. A multi-dimenionsional box requires a tuple of num_bins instead
        of a single integer.

    random_seed : int, optional

        Sets the random state to get reproducible results.

    """
    def __init__(self, env, num_bins, random_seed=None):
        super().__init__(env)
        if not isinstance(self.action_space, gymnasium.spaces.Box):
            raise NotImplementedError(
                "BoxActionsToDiscrete is only implemented for Box action spaces")
        self._rnd = onp.random.RandomState(random_seed)
        self._init_action_space(num_bins)  # also sets self._nvec and self._size

[docs]    def step(self, a):
        assert self.action_space.contains(a)
        self._a_orig = self._discrete_to_box(a)
        s_next, r, done, truncated, info = super().step(self._a_orig)
        self._add_a_orig_to_info_dict(info)
        return s_next, r, done, truncated, info

    def _discrete_to_box(self, a_discrete):
        hi, lo = self.env.action_space.high, self.env.action_space.low
        a_flat = (a_discrete + self._rnd.rand(self._size)) / self._nvec
        a_reshaped = onp.reshape(a_flat, self.env.action_space.shape)
        a_rescaled = lo + a_reshaped * (hi - lo)
        return a_rescaled

    def _init_action_space(self, num_bins):
        self._size = onp.prod(self.env.action_space.shape)
        if isinstance(num_bins, int):
            self._nvec = [num_bins] * self._size
        elif isinstance(num_bins, tuple) and all(isinstance(i, int) for i in num_bins):
            if len(num_bins) != self._size:
                raise ValueError(
                    "len(num_bins) must be equal to the number of non-trivial dimensions: "
                    f"{self._size}")
            self._nvec = onp.asarray(num_bins)
        else:
            raise TypeError("num_bins must an int or tuple of ints")

        if self._size == 1:
            self.action_space = gymnasium.spaces.Discrete(self._nvec[0])
        else:
            self.action_space = gymnasium.spaces.MultiDiscrete(self._nvec)