Skip to content

Commit 95fe19e

Browse files
Merge pull request #3170 from gurusai-voleti:ai-gsutil-migration-4779320b60d44c658f872e9b583efb19
PiperOrigin-RevId: 890566995
2 parents 771acbe + ed85f8c commit 95fe19e

10 files changed

Lines changed: 19 additions & 20 deletions

File tree

src/maxtext/checkpoint_conversion/examples/convert_gemma2_to_hf.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ echo "Starting verification for the converted gemma2-2b model..."
4343
echo "Creating local directory for HF checkpoints: ${LOCAL_HF_CHECKPOINT_DIR}"
4444
mkdir -p "${LOCAL_HF_CHECKPOINT_DIR}"
4545
echo "Downloading HF checkpoints from ${HF_CHECKPOINT_GCS_PATH} to ${LOCAL_HF_CHECKPOINT_DIR}..."
46-
gsutil -m cp -r "${HF_CHECKPOINT_GCS_PATH}/*" "${LOCAL_HF_CHECKPOINT_DIR}/"
46+
gcloud storage cp -r "${HF_CHECKPOINT_GCS_PATH}/*" "${LOCAL_HF_CHECKPOINT_DIR}/"
4747
echo "Download complete."
4848

4949
python3 -m tests.utils.forward_pass_logit_checker \

src/maxtext/checkpoint_conversion/examples/convert_gemma3_to_hf.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ echo "Starting verification for the converted gemma3-4b model..."
4343
echo "Creating local directory for HF checkpoints: ${LOCAL_HF_CHECKPOINT_DIR}"
4444
mkdir -p "${LOCAL_HF_CHECKPOINT_DIR}"
4545
echo "Downloading HF checkpoints from ${HF_CHECKPOINT_GCS_PATH} to ${LOCAL_HF_CHECKPOINT_DIR}..."
46-
gsutil -m cp -r "${HF_CHECKPOINT_GCS_PATH}/*" "${LOCAL_HF_CHECKPOINT_DIR}/"
46+
gcloud storage cp --recursive "${HF_CHECKPOINT_GCS_PATH}/*" "${LOCAL_HF_CHECKPOINT_DIR}/"
4747
echo "Download complete."
4848

4949
python3 -m tests.utils.forward_pass_logit_checker \

src/maxtext/common/profiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,10 @@ def deactivate(self, blocking_object=None):
128128
max_logging.log("WARNING: library for nsys was not loaded \n" "profiler has no effect")
129129
return
130130
# Popen() instead of run() for non-blocking behavior
131-
if shutil.which("gsutil") is not None:
132-
subprocess.Popen(["gsutil", "cp", "*nsys-rep", self.output_path]) # pylint: disable=consider-using-with
131+
if shutil.which("gcloud") is not None:
132+
subprocess.Popen(["gcloud", "storage", "cp", "*nsys-rep", self.output_path]) # pylint: disable=consider-using-with
133133
else:
134-
max_logging.log("WARNING: gsutil is not installed or not found in the system's PATH. Skipping upload...")
134+
max_logging.log("WARNING: gcloud is not installed or not found in the system's PATH. Skipping upload...")
135135
elif self.mode == "xplane":
136136
jax.profiler.stop_trace()
137137

tests/end_to_end/gpu/a3/test_convergence_125m_params.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if [ "$DATASET_TYPE" == "hf" ]
4343
then
4444
# We use a local copy of tokenizer from https://huggingface.co/meta-llama/Llama-2-7b-hf
4545
# Alternatively, you can set tokenizer_path="meta-llama/Llama-2-7b-hf" and hf_access_token="<your-token>" after gaining access through HF website.
46-
gsutil cp -r gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
46+
gcloud storage cp --recursive gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
4747
CMD_DATA=" hf_path=parquet hf_data_files=gs://maxtext-dataset/hf/c4/c4-train-*.parquet dataset_type=hf tokenizer_path=${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}/llama2-tokenizer"
4848
fi
4949

tests/end_to_end/gpu/a3/test_convergence_1b_params.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if [ "$DATASET_TYPE" == "hf" ]
4343
then
4444
# We use a local copy of tokenizer from https://huggingface.co/meta-llama/Llama-2-7b-hf
4545
# Alternatively, you can set tokenizer_path="meta-llama/Llama-2-7b-hf" and hf_access_token="<your-token>" after gaining access through HF website.
46-
gsutil cp -r gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
46+
gcloud storage cp --recursive gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
4747
CMD_DATA=" hf_path=parquet hf_data_files=gs://maxtext-dataset/hf/c4/c4-train-*.parquet dataset_type=hf tokenizer_path=${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}/llama2-tokenizer"
4848
fi
4949

tests/end_to_end/tpu/test_convergence_1b_params.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ if [ "$DATASET_TYPE" == "hf" ]
4949
then
5050
# We use a local copy of tokenizer from https://huggingface.co/meta-llama/Llama-2-7b-hf
5151
# Alternatively, you can set tokenizer_path="meta-llama/Llama-2-7b-hf" and hf_access_token="<your-token>" after gaining access through HF website.
52-
gsutil cp -r gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
52+
gcloud storage cp -r gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"
5353
CMD_DATA=" hf_path=parquet tokenizer_path=${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}/llama2-tokenizer \
5454
hf_train_files=$DATASET_PATH/hf/c4/c4-train-*.parquet \
5555
hf_eval_split=train \

tests/end_to_end/tpu/test_decode_save_quantized_ckpt.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,5 @@ echo
7171
echo "Output directed to: ${OUTFILE}"
7272
echo
7373
echo "Checkpoint saved at:$SAVE_QUANTIZED_CHECKPOINT_PATH"
74-
${cmd} gsutil ls -lh $SAVE_QUANTIZED_CHECKPOINT_PATH >> ${OUTFILE}
74+
${cmd} gcloud storage ls --long --readable-sizes $SAVE_QUANTIZED_CHECKPOINT_PATH >> ${OUTFILE}
7575
echo
76-
77-

tests/post_training/integration/sft_trainer_correctness_test.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def get_token_log_probs(logits, inputs):
147147
return token_log_probs
148148

149149

150-
@pytest.mark.external_training # setUpClass does gsutil tokenizer
150+
@pytest.mark.external_training # setUpClass does gcloud storage tokenizer
151151
class SFTTrainerCorrectnessTest(unittest.TestCase):
152152

153153
@classmethod
@@ -160,15 +160,16 @@ def setUpClass(cls):
160160

161161
exit_code = subprocess.call(
162162
[
163-
"gsutil",
163+
"gcloud",
164+
"storage",
164165
"cp",
165-
"-r",
166+
"--recursive",
166167
"gs://maxtext-dataset/hf/llama2-chat-tokenizer",
167168
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
168169
]
169170
)
170171
if exit_code != 0:
171-
raise ValueError(f"Download tokenizer with gsutil cp failed with exit code: {exit_code}")
172+
raise ValueError(f"Download tokenizer with gcloud storage cp failed with exit code: {exit_code}")
172173

173174
@pytest.mark.skip(reason="Logit output test fragile, failing on jax upgrade to 0.6.2 b/425997645")
174175
@pytest.mark.integration_test

tests/post_training/unit/distillation_data_processing_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import os
2323
import subprocess
2424
import unittest
25-
import pytest
2625

2726
import transformers
2827

@@ -83,15 +82,16 @@ def setUpClass(cls):
8382
super().setUpClass()
8483
exit_code = subprocess.call(
8584
[
86-
"gsutil",
85+
"gcloud",
86+
"storage",
8787
"cp",
88-
"-r",
88+
"--recursive",
8989
"gs://maxtext-dataset/hf/llama2-chat-tokenizer",
9090
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
9191
]
9292
)
9393
if exit_code != 0:
94-
raise ValueError(f"Download tokenizer with gsutil cp failed with exit code: {exit_code}")
94+
raise ValueError(f"Download tokenizer with gcloud storage cp failed with exit code: {exit_code}")
9595

9696
def setUp(self):
9797
super().setUp()

tools/data_generation/download_dataset.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ function remove_trailing_slash {
2626
fi
2727
}
2828

29-
gsutil -u $1 -m cp 'gs://allennlp-tensorflow-datasets/c4/en/3.0.1/*' $(remove_trailing_slash $2)/c4/en/3.0.1
29+
gcloud storage cp 'gs://allennlp-tensorflow-datasets/c4/en/3.0.1/*' $(remove_trailing_slash $2)/c4/en/3.0.1

0 commit comments

Comments
 (0)