proroklab
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎vmas/examples/use_vmas_env.py‎
Lines changed: 43 additions & 45 deletions b/‎vmas/examples/use_vmas_env.py‎
Lines changed: 43 additions & 45 deletions
diff --git a/‎vmas/interactive_rendering.py‎
Lines changed: 52 additions & 55 deletions b/‎vmas/interactive_rendering.py‎
Lines changed: 52 additions & 55 deletions
diff --git a/‎vmas/make_env.py‎
Lines changed: 7 additions & 2 deletions b/‎vmas/make_env.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎vmas/scenarios/debug/diff_drive.py‎
Lines changed: 20 additions & 17 deletions b/‎vmas/scenarios/debug/diff_drive.py‎
Lines changed: 20 additions & 17 deletions
@@ -405,8 +405,9 @@ To create a fake screen you need to have `Xvfb` installed.
 - [ ] Reset any number of dimensions
 - [ ] Improve test efficiency and add new tests
 - [ ] Implement 1D camera sensor
-- [ ] Allow any number of actions
 - [ ] Implement 2D birds eye view camera sensor
+- [ ] Implement 2D drone dynamics
+- [X] Allow any number of actions
 - [X] Improve VMAS performance
 - [X] Dict obs support in torchrl
 - [X] Make TextLine a Geom usable in a scenario
 
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import random
@@ -11,45 +11,53 @@
 from vmas.simulator.utils import save_video
 
 
-def _get_random_action(agent: Agent, continuous: bool):
+def _get_random_action(agent: Agent, continuous: bool, env):
     if continuous:
-        action = torch.zeros(
-            (agent.batch_dim, 2),
-            device=agent.device,
-            dtype=torch.float32,
-        ).uniform_(
-            -agent.action.u_range,
-            agent.action.u_range,
-        )
-        if agent.u_rot_range > 0:
-            action = torch.cat(
-                [
-                    action,
+        actions = []
+        for action_index in range(agent.action_size):
+            actions.append(
+                torch.zeros(
+                    agent.batch_dim,
+                    device=agent.device,
+                    dtype=torch.float32,
+                ).uniform_(
+                    -agent.action.u_range_tensor[action_index],
+                    agent.action.u_range_tensor[action_index],
+                )
+            )
+        if env.world.dim_c != 0 and not agent.silent:
+            # If the agent needs to communicate
+            for _ in range(env.world.dim_c):
+                actions.append(
                     torch.zeros(
-                        (agent.batch_dim, 1),
+                        agent.batch_dim,
                         device=agent.device,
                         dtype=torch.float32,
                     ).uniform_(
-                        -agent.action.u_rot_range,
-                        agent.action.u_rot_range,
-                    ),
-                ],
-                dim=-1,
-            )
+                        0,
+                        1,
+                    )
+                )
+        action = torch.stack(actions, dim=-1)
     else:
         action = torch.randint(
-            low=0, high=5, size=(agent.batch_dim,), device=agent.device
+            low=0,
+            high=env.get_agent_action_space(agent).n,
+            size=(agent.batch_dim,),
+            device=agent.device,
+        )
+    return action
+
+
+def _get_deterministic_action(agent: Agent, continuous: bool, env):
+    if continuous:
+        action = -agent.action.u_range_tensor.expand(env.batch_dim, agent.action_size)
+    else:
+        action = (
+            torch.tensor([1], device=env.device, dtype=torch.long)
+            .unsqueeze(-1)
+            .expand(env.batch_dim, 1)
         )
-        if agent.u_rot_range > 0:
-            action = torch.stack(
-                [
-                    action,
-                    torch.randint(
-                        low=0, high=3, size=(agent.batch_dim,), device=agent.device
-                    ),
-                ],
-                dim=-1,
-            )
     return action
 
 
@@ -85,13 +93,6 @@ def use_vmas_env(
     dict_spaces = True  # Weather to return obs, rewards, and infos as dictionaries with agent names
     # (by default they are lists of len # of agents)
 
-    simple_2d_action = (
-        [0, -1.0] if continuous_actions else [3]
-    )  # Simple action for an agent with 2d actions
-    simple_3d_action = (
-        [0, -1.0, 0.1] if continuous_actions else [3, 1]
-    )  # Simple action for an agent with 3d actions (2d forces and torque)
-
     env = make_env(
         scenario=scenario_name,
         num_envs=num_envs,
@@ -120,12 +121,9 @@ def use_vmas_env(
         actions = {} if dict_actions else []
         for i, agent in enumerate(env.agents):
             if not random_action:
-                action = torch.tensor(
-                    simple_2d_action if agent.u_rot_range == 0 else simple_3d_action,
-                    device=device,
-                ).repeat(num_envs, 1)
+                action = _get_deterministic_action(agent, continuous_actions, env)
             else:
-                action = _get_random_action(agent, continuous_actions)
+                action = _get_random_action(agent, continuous_actions, env)
             if dict_actions:
                 actions.update({agent.name: action})
             else:
@@ -158,5 +156,5 @@ def use_vmas_env(
         render=True,
         save_render=False,
         random_action=False,
-        continuous_actions=True,
+        continuous_actions=False,
     )
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 """
@@ -101,21 +101,14 @@ def _cycle(self):
                 self.reset = False
                 total_rew = [0] * self.n_agents
 
-            action_list = [
-                [0.0] * self.env.unwrapped().get_agent_action_size(agent)
-                for agent in self.agents
-            ]
+            action_list = [[0.0] * agent.action_size for agent in self.agents]
             action_list[self.current_agent_index] = self.u[
-                : self.env.unwrapped().get_agent_action_size(
-                    self.agents[self.current_agent_index]
-                )
+                : self.agents[self.current_agent_index].action_size
             ]
 
             if self.n_agents > 1 and self.control_two_agents:
                 action_list[self.current_agent_index2] = self.u2[
-                    : self.env.unwrapped().get_agent_action_size(
-                        self.agents[self.current_agent_index2]
-                    )
+                    : self.agents[self.current_agent_index2].action_size
                 ]
             obs, rew, done, info = self.env.step(action_list)
 
@@ -167,56 +160,60 @@ def _write_values(self, index: int, message: str):
     def _key_press(self, k, mod):
         from pyglet.window import key
 
-        agent_range = self.agents[self.current_agent_index].u_range
-        agent_rot_range = self.agents[self.current_agent_index].u_rot_range
+        agent_range = self.agents[self.current_agent_index].action.u_range_tensor
+        try:
+            if k == key.LEFT:
+                self.keys[0] = agent_range[0]
+            elif k == key.RIGHT:
+                self.keys[1] = agent_range[0]
+            elif k == key.DOWN:
+                self.keys[2] = agent_range[1]
+            elif k == key.UP:
+                self.keys[3] = agent_range[1]
+            elif k == key.M:
+                self.keys[4] = agent_range[2]
+            elif k == key.N:
+                self.keys[5] = agent_range[2]
+            elif k == key.TAB:
+                self.current_agent_index = self._increment_selected_agent_index(
+                    self.current_agent_index
+                )
+                if self.control_two_agents:
+                    while self.current_agent_index == self.current_agent_index2:
+                        self.current_agent_index = self._increment_selected_agent_index(
+                            self.current_agent_index
+                        )
 
-        if k == key.LEFT:
-            self.keys[0] = agent_range
-        elif k == key.RIGHT:
-            self.keys[1] = agent_range
-        elif k == key.DOWN:
-            self.keys[2] = agent_range
-        elif k == key.UP:
-            self.keys[3] = agent_range
-        elif k == key.M:
-            self.keys[4] = agent_rot_range
-        elif k == key.N:
-            self.keys[5] = agent_rot_range
-        elif k == key.TAB:
-            self.current_agent_index = self._increment_selected_agent_index(
-                self.current_agent_index
-            )
             if self.control_two_agents:
-                while self.current_agent_index == self.current_agent_index2:
-                    self.current_agent_index = self._increment_selected_agent_index(
-                        self.current_agent_index
-                    )
-
-        if self.control_two_agents:
-            agent2_range = self.agents[self.current_agent_index2].u_range
-            agent2_rot_range = self.agents[self.current_agent_index2].u_rot_range
-
-            if k == key.A:
-                self.keys2[0] = agent2_range
-            elif k == key.D:
-                self.keys2[1] = agent2_range
-            elif k == key.S:
-                self.keys2[2] = agent2_range
-            elif k == key.W:
-                self.keys2[3] = agent2_range
-            elif k == key.E:
-                self.keys2[4] = agent2_rot_range
-            elif k == key.Q:
-                self.keys2[5] = agent2_rot_range
-
-            elif k == key.LSHIFT:
-                self.current_agent_index2 = self._increment_selected_agent_index(
+                agent2_range = self.agents[
                     self.current_agent_index2
-                )
-                while self.current_agent_index == self.current_agent_index2:
+                ].action.u_range_tensor
+
+                if k == key.A:
+                    self.keys2[0] = agent2_range[0]
+                elif k == key.D:
+                    self.keys2[1] = agent2_range[0]
+                elif k == key.S:
+                    self.keys2[2] = agent2_range[1]
+                elif k == key.W:
+                    self.keys2[3] = agent2_range[1]
+                elif k == key.E:
+                    self.keys2[4] = agent2_range[2]
+                elif k == key.Q:
+                    self.keys2[5] = agent2_range[2]
+
+                elif k == key.LSHIFT:
                     self.current_agent_index2 = self._increment_selected_agent_index(
                         self.current_agent_index2
                     )
+                    while self.current_agent_index == self.current_agent_index2:
+                        self.current_agent_index2 = (
+                            self._increment_selected_agent_index(
+                                self.current_agent_index2
+                            )
+                        )
+        except IndexError:
+            print("Action not available")
 
         if k == key.R:
             self.reset = True
 
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 
@@ -22,6 +22,7 @@ def make_env(
     max_steps: Optional[int] = None,
     seed: Optional[int] = None,
     dict_spaces: bool = False,
+    multidiscrete_actions: bool = False,
     **kwargs,
 ):
     """
@@ -35,7 +36,10 @@ def make_env(
         max_steps: Maximum number of steps in each vectorized environment after which done is returned
         seed: seed
         dict_spaces:  Weather to use dictionary i/o spaces with format {agent_name: tensor}
-        for obs, rewards, and info instead of tuples.
+            for obs, rewards, and info instead of tuples.
+        multidiscrete_actions (bool): Whether to use multidiscrete_actions action spaces when continuous_actions=False.
+            Otherwise, (default) the action space will be Discrete, and it will be the cartesian product of the
+            action spaces of an agent.
         **kwargs ():
 
     Returns:
@@ -55,6 +59,7 @@ def make_env(
         max_steps=max_steps,
         seed=seed,
         dict_spaces=dict_spaces,
+        multidiscrete_actions=multidiscrete_actions,
         **kwargs,
     )
 
 
@@ -1,4 +1,4 @@
-#  Copyright (c) 2022-2023.
+#  Copyright (c) 2022-2024.
 #  ProrokLab (https://www.proroklab.org/)
 #  All rights reserved.
 import typing
@@ -8,7 +8,8 @@
 
 from vmas import render_interactively
 from vmas.simulator.core import Agent, World
-from vmas.simulator.dynamics.diff_drive import DiffDriveDynamics
+from vmas.simulator.dynamics.diff_drive import DiffDrive
+from vmas.simulator.dynamics.holonomic_with_rot import HolonomicWithRotation
 from vmas.simulator.scenario import BaseScenario
 from vmas.simulator.utils import Color, ScenarioUtils
 
@@ -39,16 +40,24 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         world = World(batch_dim, device, substeps=10)
 
         for i in range(self.n_agents):
-            agent = Agent(
-                name=f"agent_{i}",
-                collide=True,
-                render_action=True,
-                u_range=1,
-                u_rot_range=1,
-                u_rot_multiplier=0.001,
-            )
             if i == 0:
-                agent.dynamics = DiffDriveDynamics(agent, world, integration="rk4")
+                agent = Agent(
+                    name=f"diff_drive_{i}",
+                    collide=True,
+                    render_action=True,
+                    u_range=[1, 1],
+                    u_multiplier=[1, 0.001],
+                    dynamics=DiffDrive(world, integration="rk4"),
+                )
+            else:
+                agent = Agent(
+                    name=f"holo_rot_{i}",
+                    collide=True,
+                    render_action=True,
+                    u_range=[1, 1, 1],
+                    u_multiplier=[1, 1, 0.001],
+                    dynamics=HolonomicWithRotation(),
+                )
 
             world.add_agent(agent)
 
@@ -64,12 +73,6 @@ def reset_world_at(self, env_index: int = None):
             y_bounds=(-1, 1),
         )
 
-    def process_action(self, agent: Agent):
-        try:
-            agent.dynamics.process_force()
-        except AttributeError:
-            pass
-
     def reward(self, agent: Agent):
         return torch.zeros(self.world.batch_dim)