Merge pull request #276 from huangshiyu13/main

huangshiyu13 · web-flow · commit e8c6ee9e0bce · 2023-11-28T15:27:31.000+08:00
- add net, gail test
diff --git a/openrl/envs/common/build_envs.py b/openrl/envs/common/build_envs.py
@@ -2,6 +2,7 @@
 import inspect
 from typing import Callable, Iterable, List, Optional, Union
 
+import gymnasium as gym
 from gymnasium import Env
 
 from openrl.envs.wrappers.base_wrapper import BaseWrapper
@@ -33,7 +34,7 @@ def _make_env() -> Env:
             if need_env_id:
                 new_kwargs["env_id"] = env_id
                 new_kwargs["env_num"] = env_num
-            if id.startswith("ALE/"):
+            if id.startswith("ALE/") or id in gym.envs.registry.keys():
                 new_kwargs.pop("cfg", None)
 
             env = make(
diff --git a/openrl/modules/vdn_module.py b/openrl/modules/vdn_module.py
@@ -68,6 +68,8 @@ def __init__(
             device=device,
         )
         self.cfg = cfg
+        self.obs_space = input_space
+        self.act_space = act_space
 
     def lr_decay(self, episode, episodes):
         update_linear_schedule(self.optimizers["q_net"], episode, episodes, self.lr)
diff --git a/tests/test_examples/test_train_gail.py b/tests/test_examples/test_train_gail.py
@@ -0,0 +1,75 @@
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.vec_env.wrappers.gen_data import GenDataWrapper
+from openrl.envs.wrappers.extra_wrappers import ZeroRewardWrapper
+from openrl.envs.wrappers.monitor import Monitor
+from openrl.modules.common import GAILNet as Net
+from openrl.modules.common import PPONet
+from openrl.runners.common import GAILAgent as Agent
+from openrl.runners.common import PPOAgent
+
+
+@pytest.fixture(scope="function")
+def gen_data(tmpdir):
+    tmp_data_path = os.path.join(tmpdir, "data.pkl")
+    env_wrappers = [
+        Monitor,
+    ]
+    print("generate data....")
+    env = make(
+        "CartPole-v1",
+        env_num=2,
+        asynchronous=True,
+        env_wrappers=env_wrappers,
+    )
+    agent = PPOAgent(PPONet(env))
+    env = GenDataWrapper(env, data_save_path=tmp_data_path, total_episode=5)
+    obs, info = env.reset()
+    done = False
+    while not done:
+        # Based on environmental observation input, predict next action.
+        action, _ = agent.act(obs, deterministic=True)
+        obs, r, done, info = env.step(action)
+    env.close()
+    print("generate data done!")
+    return tmp_data_path
+
+
+@pytest.fixture(
+    scope="function", params=[" --gail_use_action false", " --gail_use_action true"]
+)
+def config(request, gen_data):
+    input_str = (
+        "--episode_length 5 --use_recurrent_policy true --use_joint_action_loss true"
+        " --use_valuenorm true --use_adv_normalize true --reward_class.id GAILReward"
+    )
+    input_str += request.param
+    input_str += " --expert_data " + gen_data
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(input_str.split())
+    return cfg
+
+
+@pytest.mark.unittest
+def test_train_gail(config):
+    env = make("CartPole-v1", env_num=2, cfg=config, env_wrappers=[ZeroRewardWrapper])
+
+    net = Net(
+        env,
+        cfg=config,
+    )
+    # initialize the trainer
+    agent = Agent(net)
+    agent.train(total_time_steps=200)
+    env.close()
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_modules/test_common/test_ddpg_net.py b/tests/test_modules/test_common/test_ddpg_net.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.wrappers.extra_wrappers import AddStep
+from openrl.modules.common import DDPGNet as Net
+from openrl.runners.common import DDPGAgent as Agent
+
+env_wrappers = [AddStep]
+
+
+@pytest.fixture(scope="module", params=[""])
+def config(request):
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(request.param.split())
+    return cfg
+
+
+def train(Agent, Net, env_name, env_num, total_time_steps, config):
+    cfg = config
+    env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers)
+
+    net = Net(
+        env,
+        cfg=cfg,
+    )
+    # initialize the trainer
+    agent = Agent(net)
+    # start training, set total number of training steps to 20000
+    agent.train(total_time_steps=total_time_steps)
+    env.close()
+
+
+@pytest.mark.unittest
+def test_ddpg_net(config):
+    train(Agent, Net, "IdentityEnvcontinuous", 2, 100, config)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_modules/test_common/test_dqn_net.py b/tests/test_modules/test_common/test_dqn_net.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.wrappers.extra_wrappers import AddStep
+from openrl.modules.common import DQNNet as Net
+from openrl.runners.common import DQNAgent as Agent
+
+env_wrappers = [AddStep]
+
+
+@pytest.fixture(scope="module", params=[""])
+def config(request):
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(request.param.split())
+    return cfg
+
+
+def train(Agent, Net, env_name, env_num, total_time_steps, config):
+    cfg = config
+    env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers)
+
+    net = Net(
+        env,
+        cfg=cfg,
+    )
+    # initialize the trainer
+    agent = Agent(net)
+    # start training, set total number of training steps to 20000
+    agent.train(total_time_steps=total_time_steps)
+    env.close()
+
+
+@pytest.mark.unittest
+def test_dqn_net(config):
+    train(Agent, Net, "IdentityEnv", 2, 100, config)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_modules/test_common/test_sac_net.py b/tests/test_modules/test_common/test_sac_net.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.wrappers.extra_wrappers import AddStep
+from openrl.modules.common import SACNet as Net
+from openrl.runners.common import SACAgent as Agent
+
+env_wrappers = [AddStep]
+
+
+@pytest.fixture(scope="module", params=[""])
+def config(request):
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(request.param.split())
+    return cfg
+
+
+def train(Agent, Net, env_name, env_num, total_time_steps, config):
+    cfg = config
+    env = make(env_name, env_num=env_num, cfg=cfg, env_wrappers=env_wrappers)
+
+    net = Net(
+        env,
+        cfg=cfg,
+    )
+    # initialize the trainer
+    agent = Agent(net)
+    # start training, set total number of training steps to 20000
+    agent.train(total_time_steps=total_time_steps)
+    env.close()
+
+
+@pytest.mark.unittest
+def test_sac_net(config):
+    train(Agent, Net, "IdentityEnvcontinuous", 2, 100, config)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))
diff --git a/tests/test_modules/test_common/test_vdn_net.py b/tests/test_modules/test_common/test_vdn_net.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+import os
+import sys
+
+import pytest
+
+from openrl.configs.config import create_config_parser
+from openrl.envs.common import make
+from openrl.envs.wrappers.mat_wrapper import MATWrapper
+from openrl.modules.common import VDNNet
+from openrl.runners.common import VDNAgent as Agent
+
+
+@pytest.fixture(scope="module", params=[""])
+def config(request):
+    cfg_parser = create_config_parser()
+    cfg = cfg_parser.parse_args(request.param.split())
+    return cfg
+
+
+@pytest.mark.unittest
+def test_vdn_net(config):
+    env_num = 2
+    env = make(
+        "simple_spread",
+        env_num=env_num,
+        asynchronous=True,
+    )
+    env = MATWrapper(env)
+
+    net = VDNNet(env, cfg=config)
+    # initialize the trainer
+    agent = Agent(net)
+    # start training
+    agent.train(total_time_steps=100)
+    env.close()
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", os.path.basename(__file__)]))

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,8 @@ def __init__(`
`68`	`68`	`device=device,`
`69`	`69`	`)`
`70`	`70`	`self.cfg = cfg`
	`71`	`+ self.obs_space = input_space`
	`72`	`+ self.act_space = act_space`
`71`	`73`
`72`	`74`	`def lr_decay(self, episode, episodes):`
`73`	`75`	`update_linear_schedule(self.optimizers["q_net"], episode, episodes, self.lr)`