import gymnasium
import numpy as onp
from scipy.special import expit as sigmoid
from .._base.mixins import AddOrigToInfoDictMixin
__all__ = (
'BoxActionsToReals',
'BoxActionsToDiscrete',
)
[docs]class BoxActionsToReals(gymnasium.Wrapper, AddOrigToInfoDictMixin):
r"""
This wrapper decompactifies a :class:`Box <gymnasium.spaces.Box>` action space to the reals.
This is required in order to be able to use a Gaussian policy.
In practice, the wrapped environment expects the input action
:math:`a_\text{real}\in\mathbb{R}^n` and then it compactifies it back to a Box of the right
size:
.. math::
a_\text{box}\ =\ \text{low} + (\text{high}-\text{low})\times\text{sigmoid}(a_\text{real})
Technically, the transformed space is still a Box, but that's only because we assume that the
values lie between large but finite bounds, :math:`a_\text{real}\in[-10^{15}, 10^{15}]^n`.
"""
def __init__(self, env):
super().__init__(env)
if not isinstance(self.action_space, gymnasium.spaces.Box):
raise NotImplementedError("BoxActionsToReals is only implemented for Box action spaces")
shape_flat = onp.prod(self.env.action_space.shape),
self.action_space = gymnasium.spaces.Box(
low=onp.full(shape_flat, -1e15, self.env.action_space.dtype),
high=onp.full(shape_flat, 1e15, self.env.action_space.dtype))
[docs] def step(self, a):
assert self.action_space.contains(a)
self._a_orig = self._compactify(a)
s_next, r, done, truncated, info = super().step(self._a_orig)
self._add_a_orig_to_info_dict(info)
return s_next, r, done, truncated, info
def _compactify(self, action):
hi, lo = self.env.action_space.high, self.env.action_space.low
action = onp.clip(action, -1e15, 1e15)
action = onp.reshape(action, self.env.action_space.shape)
return lo + (hi - lo) * sigmoid(action)
[docs]class BoxActionsToDiscrete(gymnasium.Wrapper, AddOrigToInfoDictMixin):
r"""
This wrapper splits a :class:`Box <gymnasium.spaces.Box>` action space into bins. The resulting
action space is either :class:`Discrete <gymnasium.spaces.Discrete>` or :class:`MultiDiscrete
<gymnasium.spaces.MultiDiscrete>`, depending on the shape of the original action space.
Parameters
----------
num_bins : int or tuple of ints
The number of bins to use. A multi-dimenionsional box requires a tuple of num_bins instead
of a single integer.
random_seed : int, optional
Sets the random state to get reproducible results.
"""
def __init__(self, env, num_bins, random_seed=None):
super().__init__(env)
if not isinstance(self.action_space, gymnasium.spaces.Box):
raise NotImplementedError(
"BoxActionsToDiscrete is only implemented for Box action spaces")
self._rnd = onp.random.RandomState(random_seed)
self._init_action_space(num_bins) # also sets self._nvec and self._size
[docs] def step(self, a):
assert self.action_space.contains(a)
self._a_orig = self._discrete_to_box(a)
s_next, r, done, truncated, info = super().step(self._a_orig)
self._add_a_orig_to_info_dict(info)
return s_next, r, done, truncated, info
def _discrete_to_box(self, a_discrete):
hi, lo = self.env.action_space.high, self.env.action_space.low
a_flat = (a_discrete + self._rnd.rand(self._size)) / self._nvec
a_reshaped = onp.reshape(a_flat, self.env.action_space.shape)
a_rescaled = lo + a_reshaped * (hi - lo)
return a_rescaled
def _init_action_space(self, num_bins):
self._size = onp.prod(self.env.action_space.shape)
if isinstance(num_bins, int):
self._nvec = [num_bins] * self._size
elif isinstance(num_bins, tuple) and all(isinstance(i, int) for i in num_bins):
if len(num_bins) != self._size:
raise ValueError(
"len(num_bins) must be equal to the number of non-trivial dimensions: "
f"{self._size}")
self._nvec = onp.asarray(num_bins)
else:
raise TypeError("num_bins must an int or tuple of ints")
if self._size == 1:
self.action_space = gymnasium.spaces.Discrete(self._nvec[0])
else:
self.action_space = gymnasium.spaces.MultiDiscrete(self._nvec)