update sa folder logic

ChonghaoSima · ChonghaoSima · commit 5cf2c69d9929 · 2026-02-14T16:14:43.000+08:00
diff --git a/stage_advantage/README.md b/stage_advantage/README.md
@@ -141,7 +141,7 @@ TrainConfig(
 
 ### Usage
 
-From the **repository root**:
+From the **repository root**, the core training command is:
 
 ```bash
 # Single GPU (KAI0 or PI06)
@@ -163,6 +163,17 @@ uv run python scripts/train_pytorch.py ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD --exp_n
 
 Logs and checkpoints go to `experiment/<config_name>/` and `experiment/<config_name>/log/<exp_name>.log`. Redirect to a log file if desired, e.g. `2>&1 | tee experiment/ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD/log/run1.log`.
 
+For a ready-to-use script with environment setup (conda/venv activation, DDP configuration) and automatic log management, see **`annotation/train_estimator.sh`**:
+
+```bash
+RUNNAME=ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD RUNTIME=run1 bash stage_advantage/annotation/train_estimator.sh
+
+# Multi-GPU
+RUNNAME=ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD RUNTIME=run1 NPROC_PER_NODE=8 bash stage_advantage/annotation/train_estimator.sh
+```
+
+The shell script handles output directory creation, log redirection (via `tee`), and multi-GPU/multi-node dispatch automatically.
+
 ### Training Outputs
 
 ```
@@ -212,7 +223,7 @@ experiment/ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD/   # or ADVANTAGE_TORCH_PI06_FLATTE
 
 ### Usage
 
-From the **repository root** (or ensure Python can import the project and paths are correct):
+From the **repository root**, the core evaluation command is:
 
 ```bash
 uv run python stage_advantage/annotation/eval.py <model_type> <model_name> <repo_id>
@@ -228,7 +239,13 @@ uv run python stage_advantage/annotation/eval.py Flatten-Fold KAI0 /path/to/data
 uv run python stage_advantage/annotation/eval.py Flatten-Fold PI06 /path/to/dataset
 ```
 
-`<model_type>` is a key in `eval.py`’s `MODELS_CONFIG_MAP` (e.g. `Flatten-Fold`); `<model_name>` is `PI06` or `KAI0`; `<repo_id>` is the path to the LeRobot dataset. Results are written under `<repo_id>/data_<model_name>_<ckpt_steps>/`.
+`<model_type>` is a key in `eval.py`'s `MODELS_CONFIG_MAP` (e.g. `Flatten-Fold`); `<model_name>` is `PI06` or `KAI0`; `<repo_id>` is the path to the LeRobot dataset. Results are written under `<repo_id>/data_<model_name>_<ckpt_steps>/`.
+
+For a ready-to-use script with environment setup (conda/venv activation, environment variables) and status logging, see **`annotation/eval.sh`**:
+
+```bash
+bash stage_advantage/annotation/eval.sh Flatten-Fold KAI0 /path/to/dataset
+```
 
 ### Evaluation Outputs
 
@@ -286,14 +303,22 @@ At **inference** time you must use the **same prompt format** as in training. To
 
 ### Usage
 
-From the repository root, run JAX training with the AWBC config and an experiment name:
+From the repository root, the core training command is:
 
 ```bash
 XLA_PYTHON_CLIENT_MEM_FRACTION=0.9 uv run scripts/train.py pi05_flatten_fold_awbc --exp_name=run1
 XLA_PYTHON_CLIENT_MEM_FRACTION=0.9 uv run scripts/train.py pi05_tee_shirt_sort_awbc --exp_name=run1
 XLA_PYTHON_CLIENT_MEM_FRACTION=0.9 uv run scripts/train.py pi05_hang_cloth_awbc --exp_name=run1
 ```
 
+For a ready-to-use script with environment setup (venv activation, `XLA_PYTHON_CLIENT_MEM_FRACTION`, `WANDB_MODE`) and automatic log management, see **`awbc/train_awbc.sh`**:
+
+```bash
+RUNNAME=pi05_flatten_fold_awbc RUNTIME=run1 bash stage_advantage/awbc/train_awbc.sh
+```
+
+The shell script handles output directory creation and log redirection (via `tee`) automatically.
+
 ---
 
 ## Directory Structure
@@ -304,9 +329,12 @@ stage_advantage/
 ├── annotation/                        # Stages 0–2: labeling & estimator training
 │   ├── README.md
 │   ├── gt_label.py                    # Core labeling script (progress → advantage → task_index)
-│   ├── gt_labeling.sh                 # Batch labeling for PI06 / KAI0 variants (only .sh kept here)
+│   ├── gt_labeling.sh                 # Batch labeling for PI06 / KAI0 variants
+│   ├── train_estimator.sh             # Shell script for Stage 1 training (env + DDP + logging)
 │   ├── eval.py                        # Evaluate trained estimator on datasets
+│   ├── eval.sh                        # Shell script for Stage 2 evaluation (env + logging)
 │   └── evaluator.py                   # SimpleValueEvaluator: batched GPU inference
-└── awbc/                              # Stage 3: AWBC (commands in README)
-    └── README.md
+└── awbc/                              # Stage 3: AWBC
+    ├── README.md
+    └── train_awbc.sh                  # Shell script for Stage 3 AWBC training (env + logging)
 ```
diff --git a/stage_advantage/annotation/eval.sh b/stage_advantage/annotation/eval.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+###############################################################################
+# eval.sh
+#
+# Use a trained Advantage Estimator to label a dataset with predicted
+# advantage values (relative_advantage, absolute_value, absolute_advantage).
+#
+# This script calls eval.py, which:
+#   1. Loads a trained Advantage Estimator checkpoint
+#   2. Iterates over all episodes in the LeRobot dataset
+#   3. Reads video frames from three camera views (top, left, right)
+#   4. Runs batched GPU inference to predict advantage values per frame
+#   5. Writes results as new parquet files with advantage columns appended
+#
+# The output parquets are saved under:
+#   <repo_id>/data_<model_name>_<ckpt_steps>/chunk-*/episode_*.parquet
+#
+# Prerequisites:
+#   - A trained Advantage Estimator checkpoint (from Stage 1)
+#   - Update MODELS_CONFIG_MAP in eval.py with the correct checkpoint paths
+#
+# Usage:
+#   bash eval.sh <model_type> <model_name> <repo_id>
+#
+# Examples:
+#   bash eval.sh Flatten-Fold KAI0 /path/to/dataset
+#   bash eval.sh Flatten-Fold PI06 /path/to/dataset
+#
+# Arguments:
+#   model_type : Flatten-Fold / demo_A / demo_B
+#   model_name : PI06 (single-timestep) / KAI0 (two-timestep stage-level)
+#   repo_id    : Path to the LeRobot dataset to evaluate
+###############################################################################
+set -xe
+set -o pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../" && pwd)"
+cd "${PROJECT_ROOT}"
+echo "Project root: ${PROJECT_ROOT}"
+
+# ─── Conda / venv activation ─────────────────────────────────────────────────
+source /cpfs01/shared/smch/miniconda3/etc/profile.d/conda.sh
+conda activate uv_py311
+source .venv/bin/activate
+
+export TZ='Asia/Shanghai'
+
+# ─── Other environment variables ──────────────────────────────────────────────
+export UV_DEFAULT_INDEX="https://mirrors.aliyun.com/pypi/simple/"
+export WANDB_MODE=offline
+
+# ─── Parse arguments ─────────────────────────────────────────────────────────
+MODEL_TYPE=${1:?"Usage: bash eval.sh <model_type> <model_name> <repo_id>"}
+MODEL_NAME=${2:?"Usage: bash eval.sh <model_type> <model_name> <repo_id>"}
+REPO_ID=${3:?"Usage: bash eval.sh <model_type> <model_name> <repo_id>"}
+
+echo "============================================================"
+echo "  Advantage Estimator Evaluation"
+echo "  Model type:  ${MODEL_TYPE}"
+echo "  Model name:  ${MODEL_NAME}"
+echo "  Dataset:     ${REPO_ID}"
+echo "============================================================"
+
+uv run python "${SCRIPT_DIR}/eval.py" "${MODEL_TYPE}" "${MODEL_NAME}" "${REPO_ID}"
+
+echo "============================================================"
+echo "  Evaluation complete!"
+echo "  Results saved under: ${REPO_ID}/data_${MODEL_NAME}_*/"
+echo "============================================================"
diff --git a/stage_advantage/annotation/train_estimator.sh b/stage_advantage/annotation/train_estimator.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+###############################################################################
+# train_estimator.sh
+###########################################################
+set -xe
+set -o pipefail
+
+# ─── Navigate to project root ────────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../" && pwd)"
+cd "${PROJECT_ROOT}"
+
+source .venv/bin/activate
+
+# ─── Training config name ────────────────────────────────────────────────────
+# RUNNAME must be one of: ADVANTAGE_TORCH_PI06_FLATTEN_FOLD, ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD
+# Default to ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD if RUNNAME is not set
+CFG=${RUNNAME:-ADVANTAGE_TORCH_KAI0_FLATTEN_FOLD}
+
+# ─── DDP environment variables ───────────────────────────────────────────────
+WORLD_SIZE=${WORLD_SIZE:-1}
+MASTER_ADDR=${MASTER_ADDR:-127.0.0.1}
+RANK=${RANK:-0}
+NPROC_PER_NODE=${NPROC_PER_NODE:-1}
+MASTER_PORT=${MASTER_PORT:-12345}
+
+# ─── Validate required environment variables ─────────────────────────────────
+if [ -z "${RUNNAME+x}" ]; then
+    echo "[WARNING] RUNNAME is not set, using default: ${CFG}"
+    export RUNNAME=${CFG}
+else
+    echo "RUNNAME is set to: ${RUNNAME}"
+fi
+
+if [ -z "${RUNTIME+x}" ]; then
+    echo "[ERROR] RUNTIME is not set. Please set RUNTIME for experiment output directory."
+    echo "  Example: RUNTIME=run1 bash train_estimator.sh"
+    exit 1
+else
+    echo "RUNTIME is set to: ${RUNTIME}"
+fi
+
+# ─── Create output directories ───────────────────────────────────────────────
+OUTPUT_DIR="./experiment/${RUNNAME}"
+LOG_OUTPUT_DIR="${OUTPUT_DIR}/log"
+mkdir -p "${OUTPUT_DIR}" "${LOG_OUTPUT_DIR}"
+
+# Set to "offline" for offline logging; remove or set to "online" for cloud sync
+export WANDB_MODE=${WANDB_MODE:-offline}
+
+if [ "${NPROC_PER_NODE}" -gt 1 ] || [ "${WORLD_SIZE}" -gt 1 ]; then
+    # Multi-GPU / Multi-Node training via torchrun
+    echo "Launching DDP training with torchrun..."
+    uv run torchrun \
+        --nnodes=${WORLD_SIZE} \
+        --nproc_per_node=${NPROC_PER_NODE} \
+        --node_rank=${RANK} \
+        --master_addr=${MASTER_ADDR} \
+        --master_port=${MASTER_PORT} \
+        scripts/train_pytorch.py ${CFG} \
+        --exp_name=${RUNTIME} \
+        --save_interval 10000 \
+        2>&1 | tee "${LOG_OUTPUT_DIR}/${RUNTIME}.log"
+else
+    # Single-GPU training
+    echo "Launching single-GPU training..."
+    uv run python scripts/train_pytorch.py ${CFG} \
+        --exp_name=${RUNTIME} \
+        --save_interval 10000 \
+        2>&1 | tee "${LOG_OUTPUT_DIR}/${RUNTIME}.log"
+fi
diff --git a/stage_advantage/awbc/README.md b/stage_advantage/awbc/README.md
@@ -36,7 +36,7 @@ Each uses `base_config=DataConfig(prompt_from_task=True)` so that the dataset’
 
 ## Usage
 
-From the **repository root**, run training with the config name and `--exp_name`:
+From the **repository root**, the core training command is:
 
 ```bash
 XLA_PYTHON_CLIENT_MEM_FRACTION=0.9 uv run scripts/train.py pi05_flatten_fold_awbc --exp_name=run1
@@ -46,6 +46,14 @@ XLA_PYTHON_CLIENT_MEM_FRACTION=0.9 uv run scripts/train.py pi05_hang_cloth_awbc
 
 Checkpoints and logs are written under `experiment/<config_name>/<exp_name>/` (e.g. `experiment/pi05_flatten_fold_awbc/run1/`).
 
+For a ready-to-use script with environment setup (venv activation, `XLA_PYTHON_CLIENT_MEM_FRACTION`, `WANDB_MODE`) and automatic log management, see **`train_awbc.sh`**:
+
+```bash
+RUNNAME=pi05_flatten_fold_awbc RUNTIME=run1 bash stage_advantage/awbc/train_awbc.sh
+```
+
+The shell script handles output directory creation and log redirection (via `tee`) automatically.
+
 ## Prompt format (training and inference)
 
 During **training**, the prompt is taken from **`meta/tasks.jsonl`**: each sample’s `task_index` is mapped to a string (written by `gt_label.py` when creating the advantage dataset).
diff --git a/stage_advantage/awbc/train_awbc.sh b/stage_advantage/awbc/train_awbc.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+###############################################################################
+# train_awbc.sh
+#
+# Train a policy with Advantage-Weighted Behavior Cloning (AWBC) using
+# advantage-estimator-labeled data. The data must have task_index per frame and
+# meta/tasks.jsonl mapping task_index -> prompt string (from Stage 0 + Stage 2).
+#
+# Configs (see src/openpi/training/config.py):
+#   pi05_flatten_fold_awbc
+#   pi05_tee_shirt_sort_awbc
+#   pi05_hang_cloth_awbc
+#
+# Prerequisites:
+#   - Complete Stage 0 (GT labeling) and Stage 2 (advantage estimation on data),
+#     then run gt_label.py with --advantage-source absolute_advantage to produce
+#     the "advantage" dataset with task_index and tasks.jsonl.
+#   - Set repo_id in the AWBC config to the path of that dataset
+#     (e.g. <path_to_repo_root>/data/FlattenFold/advantage).
+#   - Run compute_norm_states_fast.py for the chosen config before training.
+#   - Set weight_loader in config to your π₀.5 base checkpoint.
+#
+# Usage:
+#   RUNNAME=pi05_flatten_fold_awbc RUNTIME=run1 bash stage_advantage/awbc/train_awbc.sh
+###############################################################################
+set -xe
+set -o pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../" && pwd)"
+cd "${PROJECT_ROOT}"
+
+source .venv/bin/activate
+
+# ─── Training config name ────────────────────────────────────────────────────
+# RUNNAME must be one of: pi05_flatten_fold_awbc, pi05_tee_shirt_sort_awbc, pi05_hang_cloth_awbc
+CFG=${RUNNAME:-pi05_flatten_fold_awbc}
+
+# ─── Validate required environment variables ─────────────────────────────────
+if [ -z "${RUNNAME+x}" ]; then
+    echo "[WARNING] RUNNAME is not set, using default: ${CFG}"
+    export RUNNAME=${CFG}
+else
+    echo "RUNNAME is set to: ${RUNNAME}"
+fi
+
+if [ -z "${RUNTIME+x}" ]; then
+    echo "[ERROR] RUNTIME is not set. Please set RUNTIME for experiment output directory."
+    echo "  Example: RUNTIME=run1 bash stage_advantage/awbc/train_awbc.sh"
+    exit 1
+else
+    echo "RUNTIME is set to: ${RUNTIME}"
+fi
+
+# ─── Output directories ─────────────────────────────────────────────────────
+OUTPUT_DIR="./experiment/${RUNNAME}"
+LOG_OUTPUT_DIR="${OUTPUT_DIR}/log"
+mkdir -p "${OUTPUT_DIR}" "${LOG_OUTPUT_DIR}"
+
+export WANDB_MODE=${WANDB_MODE:-offline}
+export XLA_PYTHON_CLIENT_MEM_FRACTION=${XLA_PYTHON_CLIENT_MEM_FRACTION:-0.9}
+
+# ─── Launch JAX training ────────────────────────────────────────────────────
+echo "Launching AWBC training (JAX)..."
+uv run scripts/train.py ${CFG} \
+    --exp_name=${RUNTIME} \
+    2>&1 | tee "${LOG_OUTPUT_DIR}/${RUNTIME}.log"
+
+echo "============================================================"
+echo "  AWBC training finished. Checkpoints: ${OUTPUT_DIR}/${RUNTIME}/"
+echo "============================================================"