File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ if [ "$DATASET_TYPE" == "hf" ]
4949then
5050 # We use a local copy of tokenizer from https://huggingface.co/meta-llama/Llama-2-7b-hf
5151 # Alternatively, you can set tokenizer_path="meta-llama/Llama-2-7b-hf" and hf_access_token="<your-token>" after gaining access through HF website.
52- gcloud storage cp --recursive gs://maxtext-dataset/hf/llama2-tokenizer " ${MAXTEXT_ASSETS_ROOT:- ${MAXTEXT_PKG_DIR:- ${MAXTEXT_REPO_ROOT:- $PWD } / src/ maxtext/ assets/ tokenizers} } "
52+ gcloud storage cp -r gs://maxtext-dataset/hf/llama2-tokenizer " ${MAXTEXT_ASSETS_ROOT:- ${MAXTEXT_PKG_DIR:- ${MAXTEXT_REPO_ROOT:- $PWD } / src/ maxtext/ assets/ tokenizers} } "
5353 CMD_DATA=" hf_path=parquet tokenizer_path=${MAXTEXT_ASSETS_ROOT:- ${MAXTEXT_PKG_DIR:- ${MAXTEXT_REPO_ROOT:- $PWD } / src/ maxtext/ assets/ tokenizers} } /llama2-tokenizer \
5454 hf_train_files=$DATASET_PATH /hf/c4/c4-train-*.parquet \
5555 hf_eval_split=train \
You can’t perform that action at this time.
0 commit comments