AgentJet/ajet/tuner_lib/experimental/swarm_server.py at 4658ce027f7288e945f50a67906bcf99df50da77 · modelscope/AgentJet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
import multiprocessing
import time
import zmq
import os
import asyncio
import threading
from loguru import logger
from functools import lru_cache
from types import SimpleNamespace
from fastapi import FastAPI, HTTPException
from multiprocessing.managers import DictProxy
from typing import Coroutine, Optional, Tuple, List
from ajet.utils.process_killer import kill_process_tree
from ajet.tuner_lib.experimental.swarm_overwatch_utils import (
    CurrentBatchRolloutPoolInformation,
    RewardHistoryEntry,
    RewardHistoryResponse,
)
from ajet.tuner_lib.experimental.interchange_utils import DEBUG, VERBOSE
from ajet.tuner_lib.experimental.interchange_utils import (
    SyncTrainConfigRequest,
    ClaimEpisodeRequest,
    ClaimEpisodeResponse,
    CheckWhetherEpisodeClaimedRequest,
    CanContinueEpisodeRequest,
    CanContinueEpisodeResponse,
    EndEpisodeRequest,
    EndEpisodeResponse,
    EpisodeStatus,
    EpisodeBufferResponse,
    BoolResponse,
    RegisterEpisodeRequest,
    UpdateEngineStatusRequest,
    VALID_STATUSES,
)

RCVTIMEO = 2 * 1000
RCVTIMEO_OUT = 300 * 1000
RCVTIMEO_WAIT_N = RCVTIMEO_OUT // RCVTIMEO


def is_key_episode_status(key: str) -> bool:
    return key.startswith("episodes-")

def is_key_finished_episode_status(key: str) -> bool:
    return key.startswith("finished-episodes-")

@lru_cache(maxsize=128)
def ep_key(episode_uuid: str) -> str:
    return f"episodes-{episode_uuid}"

@lru_cache(maxsize=128)
def finished_ep_key(episode_uuid: str) -> str:
    return f"finished-episodes-{episode_uuid}"


def register_enable_swarm_mode_routes(
    app,
    zmq_context,
    shared_mem_dict: DictProxy,
    shared_mem_dict_lock: threading.Lock,
) -> Tuple[FastAPI, Optional[Coroutine]]:

    if "unclaimed_episodes" not in shared_mem_dict:
        shared_mem_dict["unclaimed_episodes"] = []

    if "current_batch_rollout_pool_information" not in shared_mem_dict:
        shared_mem_dict["current_batch_rollout_pool_information"] = CurrentBatchRolloutPoolInformation()

    # Initialize reward history storage for visualization
    if "reward_history" not in shared_mem_dict:
        shared_mem_dict["reward_history"] = []  # List of RewardHistoryEntry dicts

    # Initialize reward accumulator for collecting rewards of current global step
    if "current_rewards" not in shared_mem_dict:
        shared_mem_dict["current_rewards"] = []  # [rewards...]

    # ------------------------------------------------------------------------------------------------
    # ------ Recycle claimed episodes that client failed to complete in (promised) time --------------
    # ---------------------------------  claimed -> unclaimed ----------------------------------------
    # ------------------------------------------------------------------------------------------------

    async def find_claimed_episodes_that_need_to_be_unclaimed() -> List[str]:
        to_unclaim_episodes = []
        current_time = time.time()

        for k, v in shared_mem_dict.items():
            if is_key_episode_status(k):
                es: EpisodeStatus = v
                if es.episode_status == "claimed":
                    if (current_time - es.latest_activity_timestamp) > es.discard_episode_timeout:
                        to_unclaim_episodes.append(es.episode_uuid)

        for episode_uuid in to_unclaim_episodes:
            try:
                await _revert_episode_to_unclaimed(episode_uuid, shared_mem_dict, shared_mem_dict_lock)
            except:
                logger.error(f"Error while reverting episode {episode_uuid} to unclaimed.")

        return to_unclaim_episodes

    def _context_tracker_reset_blocking(episode_uuid, shared_mem_dict):  # must async
        # send message to context tracker
        if ep_key(episode_uuid) not in shared_mem_dict:
            return
        zmq_addr = shared_mem_dict[ep_key(episode_uuid)].zmq_listen_result_addr
        socket = zmq_context.socket(zmq.REQ)
        socket.setsockopt(zmq.RCVTIMEO, RCVTIMEO)  # 2 seconds recv timeout
        socket.connect(zmq_addr)

        # <send to>
        #   <to_sourcefile>: ajet/task_runner/swarm_runner.py
        #   <to_code>: message = zmq_socket.recv_string()
        socket.send_string("RUNNER.SPECIAL.RESET_CONTEXT_TRACKER")

        # <wait for ack>
        for _ in range(RCVTIMEO_WAIT_N):  # max 5 minutes wait
            try:
                if DEBUG:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | recv_string begin.")
                # <wait for>:
                #   <from_sourcefile>: ajet/task_runner/swarm_runner.py
                #   <from_code>: zmq_socket.send_string("ack")
                #   <expect>: "ack"
                socket.recv_string()
                break
            except zmq.Again as e:
                if DEBUG:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | recv_string timeout, retrying.")
                if ep_key(episode_uuid) not in shared_mem_dict:
                    return
                if shared_mem_dict["engine_status"] not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | Engine is no longer rolling, aborting wait for ack.")
                    raise RuntimeError("Engine is no longer rolling, aborting wait for ack.")
                continue

    async def _revert_episode_to_unclaimed(episode_uuid: str, shared_mem_dict, shared_mem_dict_lock):
        # check status again, because other thread may have changed it
        if ep_key(episode_uuid) not in shared_mem_dict:
            logger.warning(f"Episode record for {episode_uuid} not found in shared memory. It may have been already processed by another thread. Skipping unclaim.")
            return

        with shared_mem_dict_lock:
            if shared_mem_dict[ep_key(episode_uuid)].episode_status != "claimed":
                if episode_uuid in shared_mem_dict["unclaimed_episodes"]:
                    pass
                else:
                    shared_mem_dict["unclaimed_episodes"] += [episode_uuid]
                return

        # reset context tracker
        # _context_tracker_reset_blocking(episode_uuid, shared_mem_dict)   # must async
        await asyncio.to_thread(_context_tracker_reset_blocking, episode_uuid, shared_mem_dict)

        # revert
        logger.warning(f"Reverting episode {episode_uuid} to unclaimed due to client timeout.")
        if ep_key(episode_uuid) in shared_mem_dict:
            es: EpisodeStatus = shared_mem_dict[ep_key(episode_uuid)]
            es.episode_status = "registered"
            es.client_uuid = ""
            es.latest_activity_timestamp = time.time()
            es.llm_call_count = 0
            es.discard_episode_timeout = -1
            with shared_mem_dict_lock:
                shared_mem_dict[ep_key(episode_uuid)] = es
                if episode_uuid in shared_mem_dict["unclaimed_episodes"]:
                    pass
                else:
                    shared_mem_dict["unclaimed_episodes"] += [episode_uuid]

    def _delete_episode_record(episode_uuid: str, shared_mem_dict, shared_mem_dict_lock):
        with shared_mem_dict_lock:
            # remove episode record
            if ep_key(episode_uuid) in shared_mem_dict:
                del shared_mem_dict[ep_key(episode_uuid)]  # RM--
                logger.info(f"Deleted episode record for {episode_uuid}.")
            # remove from unclaimed list if present
            if episode_uuid in shared_mem_dict["unclaimed_episodes"]:
                shared_mem_dict["unclaimed_episodes"].remove(episode_uuid)

    # --------------------------------------------------------------------------------------
    # -------------------------- reward history management ---------------------------------
    # --------------------------------------------------------------------------------------

    def _finalize_reward_history_for_step(global_step, shared_mem_dict, shared_mem_dict_lock):
        """Finalize reward statistics for a given global step and add to reward_history."""
        import numpy as np

        rewards = shared_mem_dict.get("current_rewards", [])
        if rewards:
            rewards = list(rewards)  # Convert proxy to list if needed
            mean_reward = float(np.mean(rewards))
            std_reward = float(np.std(rewards))

            history = shared_mem_dict.get("reward_history", [])
            history = list(history)  # Convert proxy to list if needed

            entry = RewardHistoryEntry(
                global_step=global_step,
                mean_reward=mean_reward,
                std_reward=std_reward,
                timestamp=time.time(),
            )
            history.append(entry.model_dump())
            shared_mem_dict["reward_history"] = history

            # Clear current rewards for next step
            shared_mem_dict["current_rewards"] = []

    # --------------------------------------------------------------------------------------
    # -------------------------- return workflow output ------------------------------------
    # --------------------------------------------------------------------------------------

    def _register_final_episode_output_blocking(episode_uuid, workflow_output, shared_mem_dict, shared_mem_dict_lock):  # must async
        # begin send workflow_output
        zmq_addr = shared_mem_dict[ep_key(episode_uuid)].zmq_listen_result_addr
        if DEBUG:
            logger.info(f"[server] episode_uuid: {episode_uuid} | Received new chat completion request")
        socket = zmq_context.socket(zmq.REQ)
        socket.setsockopt(zmq.RCVTIMEO, RCVTIMEO)  # 2 seconds recv timeout
        socket.connect(zmq_addr)
        if DEBUG:
            logger.info(f"[server] episode_uuid: {episode_uuid} | connect done")
        socket.send_string(workflow_output.model_dump_json())
        if DEBUG:
            logger.info(f"[server] episode_uuid: {episode_uuid} | send_string")
        # wait for ack
        for _ in range(RCVTIMEO_WAIT_N):  # max 5 minutes wait
            try:
                if DEBUG:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | recv_string begin.")
                # <wait for>:
                #   <from_sourcefile>: ajet/task_runner/swarm_runner.py
                #   <from_code>: zmq_socket.send_string("ack")
                #   <expect>: "ack"
                socket.recv_string()
                break
            except zmq.Again as e:
                if DEBUG:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | recv_string timeout, retrying.")
                if shared_mem_dict["engine_status"] not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
                    logger.info(f"[server] episode_uuid: {episode_uuid} | Engine is no longer rolling, aborting wait for ack.")
                    # raise RuntimeError("Engine is no longer rolling, aborting wait for ack.")
                    break
                continue
        # clean up episode records
        with shared_mem_dict_lock:
            # preserve a record snapshot
            shared_mem_dict[finished_ep_key(episode_uuid)] = shared_mem_dict[ep_key(episode_uuid)]
            # then remove the active record
            del shared_mem_dict[ep_key(episode_uuid)]
            if episode_uuid in shared_mem_dict["unclaimed_episodes"]:
                shared_mem_dict["unclaimed_episodes"].remove(episode_uuid)

    # --------------------------------------------------------------------------------------
    # -------------------------- status monitor --------------------------------------------
    # --------------------------------------------------------------------------------------

    async def register_episode_ready_listener():
        while True:
            await asyncio.sleep(10)  # check every 10 seconds
            await find_claimed_episodes_that_need_to_be_unclaimed()
            # read_all_episode_status()
            if DEBUG:
                _write_swarm_server_dynamic_log(shared_mem_dict)

    def read_all_episode_status() -> Optional[EpisodeStatus]:
        group_by_status = {}

        for k, v in shared_mem_dict.items():
            if is_key_episode_status(k):
                es: EpisodeStatus = v
                if es.episode_status not in group_by_status:
                    group_by_status[es.episode_status] = []
                group_by_status[es.episode_status].append(es)

        print_buffer_str = f"Registered: {len(group_by_status.get('registered', []))}, Claimed: {len(group_by_status.get('claimed', []))}"
        logger.info(f"Current engine status: [{shared_mem_dict['engine_status']}], " + print_buffer_str)

        return None

    def _write_swarm_server_dynamic_log(shared_mem_dict):
        if DEBUG:
            fp = "./swarm_server.dynamic.log"
            string_buffer = ""

            for k, v in shared_mem_dict.items():
                if is_key_episode_status(k):
                    es: EpisodeStatus = v
                    p = es.model_dump_json()
                    string_buffer += f"{p}\n"

            with open(fp, "w", encoding="utf-8") as f:
                f.write(string_buffer)
        return

    # --------------------------------------------------------------------------------------
    # -------------------------- engine status op ------------------------------------------
    # --------------------------------------------------------------------------------------
    shared_mem_dict["engine_status"] = "ENGINE.OFFLINE"  # initial status
    def _clean_up_engine_status(shared_mem_dict_lock, shared_mem_dict):
        with shared_mem_dict_lock:
            episode_keys = [k for k in shared_mem_dict.keys() if is_key_episode_status(k) or is_key_finished_episode_status(k)]
            # remove all episodes
            for key in episode_keys:
                del shared_mem_dict[key]
                if DEBUG:
                    logger.info(f"[_clean_up_engine_status] Removed: {key}")

            # clear unclaimed episodes list
            if "unclaimed_episodes" in shared_mem_dict:
                num_unclaimed = len(shared_mem_dict["unclaimed_episodes"])
                shared_mem_dict["unclaimed_episodes"] = []
                logger.info(f"[_clean_up_engine_status] Cleared {num_unclaimed} unclaimed episodes")

            # clear reward tracking
            shared_mem_dict["current_rewards"] = []
            shared_mem_dict["reward_history"] = []

    # --------------------------------------------------------------------------------------
    # -------------------------- fastapi routes --------------------------------------------
    # --------------------------------------------------------------------------------------

    @app.post("/sync_train_config")
    async def sync_train_config(req: SyncTrainConfigRequest):
        """
        Receive training configuration from client as YAML string.
        Store it in shared memory for later use by start_engine.
        """
        if VERBOSE:
            logger.info(f"Running: /sync_train_config")

        if shared_mem_dict["engine_status"] != "ENGINE.OFFLINE":
            raise HTTPException(
                status_code=400,
                detail="Engine is already started. Call `stop_engine` first before syncing new training configuration.",
            )

        try:
            yaml_str = req.yaml_as_string
            logger.info("[sync_train_config] Received training configuration")
            if DEBUG:
                logger.debug(f"[sync_train_config] YAML content:\n{yaml_str}...")

            # Store the YAML config in shared memory for start_engine to use
            with shared_mem_dict_lock:
                shared_mem_dict["train_config_yaml"] = yaml_str

            logger.info("[sync_train_config] Successfully stored training configuration")
            return {"success": True}
        except Exception as e:
            logger.error(f"[sync_train_config] Error: {e}")
            return {"success": False, "error": str(e)}

    @app.post("/start_engine")
    async def start_engine():
        """
        Start the training engine using the previously synced configuration.
        This creates a temporary YAML file and spawns a training process.
        """
        if VERBOSE:
            logger.info(f"Running: /start_engine")
        try:
            import ray
            import tempfile
            import yaml as yaml_module
            from ajet.utils.launch_utils import execute_training_process
            from ajet.utils.config_utils import prepare_experiment_config
            from ajet.launcher import get_backbone_target, setup_environment_vars

            # Check if config has been synced
            if "train_config_yaml" not in shared_mem_dict:
                logger.error("[start_engine] No training config found. Please call sync_train_config first.")
                return {"success": False, "error": "No training config found"}
            with shared_mem_dict_lock:
                shared_mem_dict["engine_status"] = "ENGINE.BOOTING"
                shared_mem_dict["booting_start_time"] = time.time()
            # Parse YAML to get backbone
            yaml_str = shared_mem_dict["train_config_yaml"]
            config_dict = yaml_module.safe_load(yaml_str)
            backbone = config_dict.get("ajet", {}).get("backbone", "verl")
            DEFAULT_DIR = "saved_experiments"
            experiment_dir = config_dict.get("ajet", {}).get("experiment_dir", DEFAULT_DIR)
            if experiment_dir == "auto":
                exp_base_dir = DEFAULT_DIR
            else:
                exp_base_dir = os.path.dirname(os.path.abspath(experiment_dir))

            # Save YAML to temporary file
            with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".yaml") as temp_file:
                temp_file.write(yaml_str)
                main_yaml_fp = temp_file.name
            logger.info(f"[start_engine] Saved config to temporary file: {main_yaml_fp}")

            # Create args namespace
            args = SimpleNamespace(
                conf=main_yaml_fp,
                backbone=backbone,
                with_logview=False,
                debug=False,
            )
            # get debug param
            should_debug = os.environ.get("RAY_DEBUG_POST_MORTEM", "0") == "1"
            debug_tags = os.environ.get("DEBUG_TAGS", "")
            if should_debug:
                args.debug = debug_tags

            def override_param_callback(config):
                config["ajet"]["interchange_server"]["already_started"] = True
                config["ajet"]["interchange_server"]["interchange_server_port"] = int(os.getenv("AJET_DAT_INTERCHANGE_PORT"))  # type: ignore
                return config

            # Finalize experiment config
            main_yaml_fp, exe_exp_base, exp_name, exp_config = prepare_experiment_config(
                yaml_path=main_yaml_fp,
                exp_base_dir=exp_base_dir,
                backbone=backbone,
                override_param_callback=override_param_callback,
            )

            # Setup environment variables
            env, exp_config = setup_environment_vars(args, exp_config, main_yaml_fp)

            # Start ray if not already started
            if not ray.is_initialized():
                from ajet.utils.launch_utils import start_ray_service

                logger.info("[start_engine] Starting Ray service...")
                # start_ray_service(args, env)
                await asyncio.to_thread(start_ray_service, args, env)  # start ray in separate thread to avoid blocking
            else:
                logger.info("[start_engine] Ray already initialized")

            # Start training process in a separate process
            p = multiprocessing.Process(
                target=execute_training_process,
                args=(
                    args,
                    get_backbone_target(args.backbone),
                    main_yaml_fp,
                    exe_exp_base,
                    main_yaml_fp,
                    env,
                    exp_config,
                    True,  # is_swarm_server
                ),
            )
            p.daemon = True
            p.start()

            # wait until p.pid is available
            while not isinstance(p.pid, int):
                time.sleep(1)

            # set new process group
            os.setpgid(p.pid, p.pid)

            # Store process info in shared memory
            _clean_up_engine_status(shared_mem_dict_lock, shared_mem_dict)
            with shared_mem_dict_lock:
                shared_mem_dict["training_process_pid"] = p.pid
                shared_mem_dict["engine_status"] = "ENGINE.BOOTING"
                shared_mem_dict["booting_start_time"] = time.time()

            logger.info(f"[start_engine] Successfully started training process (PID: {p.pid})")
            return {"success": True, "pid": p.pid}

        except Exception as e:
            logger.error(f"[start_engine] Error starting engine: {e}")
            import traceback

            traceback.print_exc()
            return {"success": False, "error": str(e)}

    @app.post("/update_engine_status", response_model=BoolResponse)
    async def update_engine_status(req: UpdateEngineStatusRequest):
        """Update the current engine status."""
        if VERBOSE:
            logger.info(f"Running /update_engine_status")
        if req.engine_status not in VALID_STATUSES:
            return BoolResponse(success=False, failure_reason="Invalid engine status")
        previous_status = shared_mem_dict["engine_status"]
        shared_mem_dict["engine_status"] = req.engine_status
        if previous_status in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"] and req.engine_status not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
            _clean_up_engine_status(shared_mem_dict_lock, shared_mem_dict)

        # Clear booting_start_time when transitioning away from BOOTING
        if previous_status == "ENGINE.BOOTING" and req.engine_status != "ENGINE.BOOTING":
            shared_mem_dict["booting_start_time"] = None

        engine_status_detail = req.engine_status_detail
        global_step = req.global_step
        if global_step is not None:
            previous_global_step = shared_mem_dict.get("global_step", None)
            shared_mem_dict["global_step"] = global_step
            # When global_step changes, finalize reward statistics for the previous step
            if previous_global_step is not None and previous_global_step != global_step:
                _finalize_reward_history_for_step(previous_global_step, shared_mem_dict, shared_mem_dict_lock)

        if engine_status_detail is not None:
            shared_mem_dict["engine_status_detail"] = engine_status_detail
        logger.info(f"[update_engine_status] Engine status set to {req.engine_status}")
        return BoolResponse(success=True)

    @app.get("/get_engine_status")
    async def get_engine_status():
        """Get the current engine status."""
        status = shared_mem_dict["engine_status"]
        engine_status_detail = shared_mem_dict.get("engine_status_detail", None)
        global_step = shared_mem_dict.get("global_step", None)
        return {
            "engine_status": status,
            "engine_status_detail": engine_status_detail,
            "global_step": global_step,
        }

    # --- episode status ---
    @app.post("/register_episode", response_model=BoolResponse)
    async def register_episode(req: RegisterEpisodeRequest):
        """(From task_runner) Register a new episode as ready to roll."""
        engine_status = shared_mem_dict["engine_status"]
        if engine_status not in ["ENGINE.ROLLING"]:
            return BoolResponse(
                success=False,
                failure_reason=f"Engine is not in rolling state. Cannot register episode.",
            )

        episode_uuid = req.episode_uuid
        if VERBOSE: logger.info(f"Running [{episode_uuid}]: /register_episode")

        es = EpisodeStatus(
            episode_uuid=req.episode_uuid,
            openai_base_url=req.openai_base_url,
            openai_api_key=req.openai_api_key,
            episode_status="registered",
            zmq_listen_result_addr=req.zmq_listen_result_addr,
            discard_episode_timeout=-1,
        )
        es.latest_activity_timestamp = time.time()
        es.llm_call_count = 0

        with shared_mem_dict_lock:
            shared_mem_dict[ep_key(episode_uuid)] = es
            shared_mem_dict["unclaimed_episodes"] += [req.episode_uuid]

        return BoolResponse(success=True)

    @app.post("/claim_episode", response_model=ClaimEpisodeResponse)
    async def claim_episode(req: ClaimEpisodeRequest):
        """(From client) Claim an available episode to rollout."""
        # find_claimed_episodes_that_need_to_be_unclaimed()

        engine_status = shared_mem_dict["engine_status"]

        if engine_status != "ENGINE.ROLLING":
            fail_cause = f"Engine not ready. Current status: [{engine_status}]."
            advise = ""
            if engine_status == "ENGINE.OFFLINE":
                advise = "Please start the engine first. Please use one of the client to run `client.sync_train_config() + client.start_engine()` to start the engine."
            elif engine_status == "ENGINE.BOOTING":
                advise = "Please wait until the engine is fully booted. Try again (maybe 1 minute) later."
            elif engine_status == "ENGINE.WEIGHT_SYNCING":
                advise = "Engine is syncing weights. Try again (maybe 1 minute) later."
            elif engine_status == "ENGINE.WEIGHT_EXPORTING":
                advise = "Engine is exporting weights (fsdp -> hf safetensor). Try again (maybe 1 minute) later."
            elif engine_status == "ENGINE.ROLLING_POST":
                advise = "Engine is in post-rolling phase. Try again (maybe 1 minute) later."
            return ClaimEpisodeResponse(
                success=False,
                client_uuid=req.client_uuid,
                episode_uuid="",
                openai_base_url="",
                openai_api_key="",
                fail_cause=fail_cause + " " + advise,
            )

        if req.episode_type == "train" or req.episode_type == "eval":
            with shared_mem_dict_lock:
                if len(shared_mem_dict["unclaimed_episodes"]) <= 0:
                    return ClaimEpisodeResponse(
                        success=False,
                        client_uuid=req.client_uuid,
                        episode_uuid="",
                        openai_base_url="",
                        openai_api_key="",
                        fail_cause="No available episodes to claim. Try again (maybe 1 minute) later.",
                    )

                # Hint: do NOT optimize these two lines
                episode_uuid = shared_mem_dict["unclaimed_episodes"][0]
                shared_mem_dict["unclaimed_episodes"] = shared_mem_dict["unclaimed_episodes"][1:]

                # get episode
                if ep_key(episode_uuid) not in shared_mem_dict:
                    return ClaimEpisodeResponse(
                        success=False,
                        client_uuid=req.client_uuid,
                        episode_uuid="",
                        openai_base_url="",
                        openai_api_key="",
                        fail_cause="No available episodes to claim. Try again (maybe 2 minutes) later.",
                    )
                es: EpisodeStatus = shared_mem_dict[ep_key(episode_uuid)]
                es.episode_status = "claimed"
                es.episode_type = req.episode_type
                es.client_uuid = req.client_uuid
                es.latest_activity_timestamp = time.time()
                es.llm_call_count = 0
                es.discard_episode_timeout = req.discard_episode_timeout

                # Store task_id if throttle_policy is provided with current_task_id
                if (req.throttle_policy is not None) and (req.throttle_policy.current_task_id):
                    es.optional_task_id = req.throttle_policy.current_task_id

                shared_mem_dict[ep_key(episode_uuid)] = es
                openai_base_url = es.openai_base_url
                openai_api_key = es.openai_api_key

            if VERBOSE:
                logger.info(f"Running [{episode_uuid}]: /claim_episode")

            return ClaimEpisodeResponse(
                success=True,
                client_uuid=req.client_uuid,
                episode_uuid=episode_uuid,
                openai_base_url=openai_base_url,
                openai_api_key=openai_api_key,
                fail_cause="",
            )

        else:
            raise HTTPException(status_code=400, detail=f"Unknown episode_type: {req.episode_type}")

    @app.post("/end_episode", response_model=EndEpisodeResponse)
    async def end_episode(req: EndEpisodeRequest):
        engine_status = shared_mem_dict["engine_status"]
        if engine_status not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
            raise HTTPException(
                status_code=400,
                detail=f"Engine is not in rolling state. Current status: [{engine_status}]. Cannot end episode.",
            )

        # receive workflow output data
        client_uuid = req.client_uuid
        episode_uuid = req.episode_uuid
        workflow_output = req.workflow_output
        task_id = req.task_id

        if VERBOSE:
            logger.info(f"Running [{episode_uuid}]: /end_episode")

        assert "task_id" in workflow_output.metadata, "workflow_output.metadata must contain task_id"
        assert workflow_output.metadata["task_id"] == task_id, "workflow_output.metadata.task_id must match req.task_id"


        if (ep_key(episode_uuid)) not in shared_mem_dict:
            logger.error(f"[server] Episode {episode_uuid} not found.")
            raise HTTPException(status_code=400, detail=f"Episode {episode_uuid} not found.")

        # send workflow_output to zmq
        ep_stat = shared_mem_dict[ep_key(episode_uuid)]
        episode_type = ep_stat.episode_type
        episode_status = ep_stat.episode_status
        client_uuid_recorded = ep_stat.client_uuid

        if episode_status != "claimed":
            logger.error(f"[server] Episode {episode_uuid} is not in claimed status.")
            raise HTTPException(
                status_code=400,
                detail=f"Episode {episode_uuid} is not in claimed status, maybe you take **too long** to submit the workflow output, try increase `discard_episode_timeout` when `begin_episode`.",
            )

        if client_uuid_recorded != client_uuid:
            logger.error(f"[server] Episode {episode_uuid} is claimed by different client: {client_uuid_recorded}, but got {client_uuid}.")
            raise HTTPException(
                status_code=404,
                detail=f"Episode {episode_uuid} is claimed by different client: {client_uuid_recorded}, but got {client_uuid}.",
            )

        if episode_type == "train":
            await asyncio.to_thread(
                _register_final_episode_output_blocking,
                episode_uuid,
                workflow_output,
                shared_mem_dict,
                shared_mem_dict_lock,
            )

            # Record reward to current_rewards
            if workflow_output.reward is not None:
                reward_value = workflow_output.reward
                # Handle both single reward and list of rewards
                if isinstance(reward_value, list):
                    rewards_to_record = reward_value
                else:
                    rewards_to_record = [reward_value]

                with shared_mem_dict_lock:
                    current_rewards = shared_mem_dict.get("current_rewards", [])
                    current_rewards = list(current_rewards)  # Convert proxy to list if needed
                    current_rewards.extend(rewards_to_record)
                    shared_mem_dict["current_rewards"] = current_rewards

        elif episode_type == "eval":
            if engine_status in ["ENGINE.ROLLING"]:
                await _revert_episode_to_unclaimed(episode_uuid, shared_mem_dict, shared_mem_dict_lock)
            else:
                _delete_episode_record(episode_uuid, shared_mem_dict, shared_mem_dict_lock)

        else:
            raise HTTPException(status_code=400, detail=f"Unknown episode_type: {episode_type}")

        # return success
        return EndEpisodeResponse(success=True)

    @app.post("/abort_episode", response_model=EndEpisodeResponse)
    async def abort_episode(req: EndEpisodeRequest):
        engine_status = shared_mem_dict["engine_status"]
        if engine_status not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
            return EndEpisodeResponse(success=True)

        # receive workflow output data
        episode_uuid = req.episode_uuid
        workflow_output = req.workflow_output
        task_id = req.task_id

        if VERBOSE:
            logger.info(f"Running [{episode_uuid}]: /abort_episode")

        # assert "task_id" in workflow_output.metadata, "workflow_output.metadata must contain task_id"
        # assert workflow_output.metadata["task_id"] == task_id, "workflow_output.metadata.task_id must match req.task_id"

        if (ep_key(episode_uuid)) not in shared_mem_dict:
            logger.error(f"[server] Episode {episode_uuid} not found.")
            return EndEpisodeResponse(success=True)

        if engine_status in ["ENGINE.ROLLING"]:
            await _revert_episode_to_unclaimed(episode_uuid, shared_mem_dict, shared_mem_dict_lock)
        else:
            _delete_episode_record(episode_uuid, shared_mem_dict, shared_mem_dict_lock)

        return EndEpisodeResponse(success=True)

    @app.post("/can_continue_episode", response_model=CanContinueEpisodeResponse)
    async def can_continue_episode(req: CanContinueEpisodeRequest):
        engine_status = shared_mem_dict["engine_status"]
        if engine_status not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
            return CanContinueEpisodeResponse(can_continue=False)

        can_continue = ep_key(req.episode_uuid) in shared_mem_dict
        can_continue = can_continue and shared_mem_dict[ep_key(req.episode_uuid)].episode_status == "claimed"

        return CanContinueEpisodeResponse(can_continue=can_continue)

    @app.post("/is_episode_claimed", response_model=BoolResponse)
    async def is_episode_claimed(req: CheckWhetherEpisodeClaimedRequest):
        engine_status = shared_mem_dict["engine_status"]
        if engine_status not in ["ENGINE.ROLLING", "ENGINE.ROLLING_POST"]:
            return BoolResponse(success=False)
        if ep_key(req.episode_uuid) not in shared_mem_dict:
            return BoolResponse(success=False)
        es = shared_mem_dict[ep_key(req.episode_uuid)]
        if not es:
            return BoolResponse(success=False)
        if es.episode_status == "claimed":
            return BoolResponse(success=True)
        else:
            if req.unregister_if_not_claimed:
                _delete_episode_record(req.episode_uuid, shared_mem_dict, shared_mem_dict_lock)
            return BoolResponse(success=False)

    @app.post("/get_episode_buffer", response_model=EpisodeBufferResponse)
    async def get_episode_buffer():
        result = [v for k, v in shared_mem_dict.items() if is_key_episode_status(k)]
        return EpisodeBufferResponse(buffer=result)

    @app.post("/update_current_batch_rollout_pool_information", response_model=BoolResponse)
    async def update_current_batch_rollout_pool_information(req: CurrentBatchRolloutPoolInformation):
        """Update the current batch rollout pool information."""
        if DEBUG:
            logger.info(f"Running /update_current_batch_rollout_pool_information")
        try:
            with shared_mem_dict_lock:
                # Ignore fields that are only maintained in shared_mem_dict
                req.running_episode_details = None
                req.engine_status = None
                req.global_step = None
                req.completed_tasks_client_uuids = {}
                shared_mem_dict["current_batch_rollout_pool_information"] = req
            return BoolResponse(success=True)
        except Exception as e:
            logger.error(f"Error updating current batch rollout pool information: {e}")
            return BoolResponse(success=False, failure_reason=str(e))

    @app.get("/get_current_batch_rollout_pool_information", response_model=CurrentBatchRolloutPoolInformation)
    async def get_current_batch_rollout_pool_information():
        """Get the current batch rollout pool information."""
        try:
            pool_info = shared_mem_dict.get(
                "current_batch_rollout_pool_information",
                CurrentBatchRolloutPoolInformation(),
            )
            # Fetch additional fields from shared_mem_dict
            pool_info.engine_status = shared_mem_dict.get("engine_status", None)
            pool_info.global_step = shared_mem_dict.get("global_step", None)
            pool_info.booting_start_time = shared_mem_dict.get("booting_start_time", None)

            # Build running_episode_details for claimed episodes
            running_episode_details = {}
            current_time = time.time()
            for k, v in shared_mem_dict.items():
                if is_key_episode_status(k):
                    es: EpisodeStatus = v
                    if es.episode_status == "claimed":
                        time_since_last_activity = current_time - es.latest_activity_timestamp
                        running_episode_details[es.episode_uuid] = {
                            "episode_status": es.episode_status,
                            "time_since_last_activity": f"{time_since_last_activity:.1f}s",
                            "discard_episode_timeout": f"{es.discard_episode_timeout:.1f}s",
                            "llm_call_count": str(es.llm_call_count),
                            "client_uuid": es.client_uuid,
                            "optional_task_id": es.optional_task_id if hasattr(es, "optional_task_id") else None,
                        }
            pool_info.running_episode_details = running_episode_details if running_episode_details else None

            # Build completed_tasks_client_uuids from finished episodes
            # Map task_id -> list of client_uuids
            completed_tasks_client_uuids = {}
            for k, v in shared_mem_dict.items():
                if is_key_finished_episode_status(k):
                    es: EpisodeStatus = v
                    task_id = es.optional_task_id if hasattr(es, "optional_task_id") else None
                    if task_id:
                        if task_id not in completed_tasks_client_uuids:
                            completed_tasks_client_uuids[task_id] = []
                        completed_tasks_client_uuids[task_id].append(es.client_uuid)

            # Only set if we have data, otherwise keep the existing value from pool_info
            if completed_tasks_client_uuids:
                pool_info.completed_tasks_client_uuids = completed_tasks_client_uuids

            return pool_info
        except Exception as e:
            logger.error(f"Error getting current batch rollout pool information: {e}")
            return CurrentBatchRolloutPoolInformation()

    # --------------------------------------------------------------------
    # ------------ get reward history for visualization ------------------
    # --------------------------------------------------------------------
    @app.get("/get_reward_history", response_model=RewardHistoryResponse)
    async def get_reward_history():
        """Get the reward history for visualization (reward curves)."""
        try:
            history = shared_mem_dict.get("reward_history", [])
            entries = [RewardHistoryEntry(**entry) for entry in history]
            return RewardHistoryResponse(history=entries)
        except Exception as e:
            logger.error(f"Error getting reward history: {e}")
            return RewardHistoryResponse(history=[])

    # --------------------------------------------------------------------
    # ------------ bring engine back to ENGINE.OFFLINE -------------------
    # --------------------------------------------------------------------
    @app.post("/stop_engine")
    async def stop_engine():
        """
        Terminate the training engine and reset all state.
        This will:
        - Kill the training process and all its subprocesses (forcefully if necessary)
        - Set engine status to OFFLINE
        - Remove all episodes (registered, claimed, and unclaimed)
        - Clean up shared memory state
        """
        kill_process_tree(shared_mem_dict_lock, shared_mem_dict)
        return BoolResponse(success=True)

    return app, register_episode_ready_listener()