[Feature] Optionally clamp input actions

matteobettini · matteobettini · commit b41c141acbd2 · 2024-01-17T09:22:58.000Z
diff --git a/vmas/make_env.py b/vmas/make_env.py
@@ -23,6 +23,7 @@ def make_env(
     seed: Optional[int] = None,
     dict_spaces: bool = False,
     multidiscrete_actions: bool = False,
+    clamp_actions: bool = False,
     **kwargs,
 ):
     """
@@ -40,6 +41,8 @@ def make_env(
         multidiscrete_actions (bool): Whether to use multidiscrete_actions action spaces when continuous_actions=False.
             Otherwise, (default) the action space will be Discrete, and it will be the cartesian product of the
             action spaces of an agent.
+        clamp_actions: Weather to clamp input actions to the range instead of throwing
+            an error when continuous_actions is True and actions are out of bounds
         **kwargs ():
 
     Returns:
@@ -60,6 +63,7 @@ def make_env(
         seed=seed,
         dict_spaces=dict_spaces,
         multidiscrete_actions=multidiscrete_actions,
+        clamp_actions=clamp_actions,
         **kwargs,
     )
 
diff --git a/vmas/simulator/environment/environment.py b/vmas/simulator/environment/environment.py
@@ -9,7 +9,6 @@
 import torch
 from gym import spaces
 from torch import Tensor
-
 from vmas.simulator.core import Agent, TorchVectorizedObject
 from vmas.simulator.scenario import BaseScenario
 import vmas.simulator.utils
@@ -43,6 +42,7 @@ def __init__(
         seed: Optional[int] = None,
         dict_spaces: bool = False,
         multidiscrete_actions: bool = False,
+        clamp_actions: bool = False,
         **kwargs,
     ):
         if multidiscrete_actions:
@@ -60,6 +60,7 @@ def __init__(
         self.max_steps = max_steps
         self.continuous_actions = continuous_actions
         self.dict_spaces = dict_spaces
+        self.clamp_action = clamp_actions
 
         self.reset(seed=seed)
 
@@ -379,6 +380,9 @@ def _set_action(self, action, agent):
             f"Agent {agent.name} has wrong action size, got {action.shape[1]}, "
             f"expected {self.get_agent_action_size(agent)}"
         )
+        if self.clamp_action and self.continuous_actions:
+            action = action.clamp(-agent.action.u_range, agent.action.u_range)
+
         action_index = 0
 
         if self.continuous_actions: