Move gcs_benchmarks/standalone tools to src/maxtext/utils

Charles Li · Charles Li · commit 6a65d665565a · 2026-04-02T22:38:35.000Z
diff --git a/RESTRUCTURE.md b/RESTRUCTURE.md
@@ -299,9 +299,6 @@ comments, or questions by creating a new
     ├── dev/
     │   ├── code_style.sh
     │   └── unit_test_and_lint.sh
-    ├── gcs_benchmarks/
-    │   ├── standalone_checkpointer.py
-    │   └── standalone_dataloader.py
     ├── orchestration/
     │   ├── gpu_multi_process_run.sh
     │   ├── multihost_job.py
diff --git a/src/maxtext/utils/standalone_checkpointer.py b/src/maxtext/utils/standalone_checkpointer.py
@@ -1,4 +1,4 @@
-# Copyright 2023–2025 Google LLC
+# Copyright 2023–2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -53,13 +53,9 @@ def checkpoint_loop(config, state=None):
   """
   model = from_config(config)
   mesh = model.mesh
-  init_rng, checkpoint_manager, _, tx = train_utils.create_training_tools(
-      config, model, mesh
-  )
+  init_rng, checkpoint_manager, _, tx = train_utils.create_training_tools(config, model, mesh)
 
-  unboxed_abstract_state, _, _ = maxtext_utils.get_abstract_state(
-      model, tx, config, init_rng, mesh, is_training=True
-  )
+  unboxed_abstract_state, _, _ = maxtext_utils.get_abstract_state(model, tx, config, init_rng, mesh, is_training=True)
   # A barrier to sync all hosts before starting to restore checkpoint
   jax.experimental.multihost_utils.sync_global_devices("Barrier before load")
   checkpoint_load_start = datetime.datetime.now()
@@ -82,30 +78,24 @@ def checkpoint_loop(config, state=None):
   if state is not None:  # Checkpoint was available for restore
     if jax.process_index() == 0:
       max_logging.log(
-          "STANDALONE CHECKPOINTER : Checkpoint restored in :"
-          f" {checkpoint_load_end - checkpoint_load_start}"
+          "STANDALONE CHECKPOINTER : Checkpoint restored in :" f" {checkpoint_load_end - checkpoint_load_start}"
       )
   else:  # Checkpoint was unavailable, state needs to be initialized
-    state, _, _, _ = maxtext_utils.setup_training_state(
-        model, None, tx, config, init_rng, mesh, checkpoint_manager
-    )
+    state, _, _, _ = maxtext_utils.setup_training_state(model, None, tx, config, init_rng, mesh, checkpoint_manager)
   state = add_entropy_to_checkpoint(state)
 
   start_step = get_first_step(state)  # this is the start_step for training
   for step in np.arange(start_step, config.steps):
     if checkpoint_manager is not None:
       start_time = datetime.datetime.now()
       # A barrier to sync all hosts before starting to save checkpoint
-      jax.experimental.multihost_utils.sync_global_devices(
-          "Barrier before save"
-      )
+      jax.experimental.multihost_utils.sync_global_devices("Barrier before save")
       if checkpointing.save_checkpoint(checkpoint_manager, int(step), state):
         checkpoint_manager.wait_until_finished()
         end_time = datetime.datetime.now()
         if jax.process_index() == 0:
           max_logging.log(
-              "STANDALONE CHECKPOINTER : Checkpoint saved in"
-              f" {end_time - start_time} ,step {step}, on host 0"
+              "STANDALONE CHECKPOINTER : Checkpoint saved in" f" {end_time - start_time} ,step {step}, on host 0"
           )
 
   return state
@@ -123,12 +113,8 @@ def add_entropy_to_checkpoint(state):
     state: Returns state with entropy added to the optimizer state.
   """
   opt_0 = state.opt_state[0]
-  opt_0 = opt_0._replace(
-      mu=jax.tree_util.tree_map(lambda k: jnp.cos(1000 * k), state.params)
-  )
-  opt_0 = opt_0._replace(
-      nu=jax.tree_util.tree_map(lambda k: jnp.sin(1000 * k), state.params)
-  )
+  opt_0 = opt_0._replace(mu=jax.tree_util.tree_map(lambda k: jnp.cos(1000 * k), state.params))
+  opt_0 = opt_0._replace(nu=jax.tree_util.tree_map(lambda k: jnp.sin(1000 * k), state.params))
   new_opt = [opt_0] + list(state.opt_state[1:])
   state = state.replace(opt_state=new_opt)
   return state
diff --git a/src/maxtext/utils/standalone_dataloader.py b/src/maxtext/utils/standalone_dataloader.py
@@ -1,4 +1,4 @@
-# Copyright 2023–2025 Google LLC
+# Copyright 2023–2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/integration/standalone_dl_ckpt_test.py b/tests/integration/standalone_dl_ckpt_test.py
@@ -15,8 +15,8 @@
 """ Tests for the standalone_checkpointer.py """
 import unittest
 import pytest
-from tools.gcs_benchmarks.standalone_checkpointer import main as sckpt_main
-from tools.gcs_benchmarks.standalone_dataloader import main as sdl_main
+from maxtext.utils.standalone_checkpointer import main as sckpt_main
+from maxtext.utils.standalone_dataloader import main as sdl_main
 from maxtext.utils.globals import MAXTEXT_ASSETS_ROOT
 from maxtext.common.gcloud_stub import is_decoupled
 
@@ -50,6 +50,7 @@ def _get_random_test_name(self, test_name):
 
   @pytest.mark.integration_test
   @pytest.mark.tpu_only
+  @pytest.mark.scheduled_only
   def test_standalone_dataloader(self):
     random_run_name = self._get_random_test_name("standalone_dataloader")
     sdl_main(
@@ -68,6 +69,7 @@ def test_standalone_dataloader(self):
 
   @pytest.mark.integration_test
   @pytest.mark.tpu_only
+  @pytest.mark.scheduled_only
   def test_standalone_checkpointer(self):
     random_run_name = self._get_random_test_name("standalone_checkpointer")
     # checkpoint at 50
diff --git a/tools/gcs_benchmarks/__init__.py b/tools/gcs_benchmarks/__init__.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright 2023–2025 Google LLC`
	`1`	`+# Copyright 2023–2026 Google LLC`
`2`	`2`	`#`
`3`	`3`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`4`	`4`	`# you may not use this file except in compliance with the License.`