microsoft · romanlutz · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb
diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py
@@ -29,76 +29,164 @@
 # %%
 import os
 
-# Enter details of your AML workspace
-subscription_id = os.environ.get("AZURE_SUBSCRIPTION_ID")
-resource_group = os.environ.get("AZURE_RESOURCE_GROUP")
+from pyrit.setup.initialization import _load_environment_files
+
+_load_environment_files(env_files=None)
+
+subscription_id = os.environ.get("AZURE_ML_SUBSCRIPTION_ID")
+resource_group = os.environ.get("AZURE_ML_RESOURCE_GROUP")
 workspace = os.environ.get("AZURE_ML_WORKSPACE_NAME")
 print(workspace)
 
+# %% [markdown]
+# The Azure ML SDK emits a fair amount of telemetry to stderr that looks
+# alarming but is benign: every operation logs an `ActivityCompleted: ...
+# HowEnded=Failure` line for any expected `UserError` (such as
+# `create_or_update` finding the environment already at the latest version),
+# and every preview / experimental class prints a one-line warning. Quiet
+# all of it so the rest of the notebook output stays focused on what
+# actually matters.
+
+# %%
+import logging
+import warnings
+
+logging.getLogger("azure.ai.ml").setLevel(logging.ERROR)
+warnings.filterwarnings("ignore", module=r"azure\.ai\.ml.*")
+
 # %%
 from azure.ai.ml import MLClient
 from azure.identity import AzureCliCredential
 
-# Get a handle to the workspace
-# For some people DefaultAzureCredential may work better than AzureCliCredential.
 ml_client = MLClient(AzureCliCredential(), subscription_id, resource_group, workspace)
 
 # %% [markdown]
 # ## Create AML Environment
 
 # %% [markdown]
-# To install the dependencies needed to run GCG, we create an AML environment from a [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile).
+# To install the dependencies needed to run GCG, we create an AML environment from a
+# [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). The Dockerfile uses
+# an NVIDIA CUDA base image with Python 3.11 and installs PyRIT with the `gcg` extra.
+
 # %%
 from pathlib import Path
 
-from azure.ai.ml.entities import BuildContext, Environment, JobResourceConfiguration
+from azure.ai.ml.entities import BuildContext, Environment
 
 from pyrit.common.path import HOME_PATH
 
-# Configure the AML environment with path to Dockerfile and dependencies
+# Configure the AML environment — build context is the repo root so the Dockerfile
+# can COPY pyproject.toml and pyrit/ for pip install -e ".[gcg]"
 env_docker_context = Environment(
-    build=BuildContext(path=Path(HOME_PATH) / "pyrit" / "auxiliary_attacks" / "gcg" / "src"),
-    name="pyrit",
-    description="PyRIT environment created from a Docker context.",
+    build=BuildContext(
+        path=Path(HOME_PATH),
+        dockerfile_path="pyrit/auxiliary_attacks/gcg/src/Dockerfile",
+    ),
+    name="pyrit-gcg",
+    description="PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]",
+    tags={"Owner": os.environ.get("USER", "unknown")},
 )
 
-# Create or update the AML environment
 ml_client.environments.create_or_update(env_docker_context)
 
-
 # %% [markdown]
 # ## Submit Training Job to AML
 
 # %% [markdown]
-# Finally, we configure the command to run the GCG algorithm. The entry file for the algorithm is [`run.py`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), which takes several command line arguments, as shown below. We also have to specify the compute `instance_type` to run the algorithm on. In our experience, a GPU instance with at least 32GB of vRAM is required. In the example below, we use Standard_NC96ads_A100_v4.
+# Finally, we configure the command to run the GCG algorithm. The entry point is
+# [`pyrit.auxiliary_attacks.gcg.experiments.run`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py),
+# invoked as a module so the uploaded code snapshot takes priority over the
+# Docker-installed package (Python's `-m` flag puts the cwd at the front of `sys.path`).
 #
-# Depending on the compute instance you use, you may encounter "out of memory" errors. In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`.
+# We also have to specify a GPU compute target. In our experience, a GPU instance with
+# at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4).
+#
+# Depending on the compute instance you use, you may encounter "out of memory" errors.
+# In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`.
 
 # %%
-from azure.ai.ml import command
+from azure.ai.ml import Output, command
 
-# Configure the command
 job = command(
     code=Path(HOME_PATH),
-    command="cd pyrit/auxiliary_attacks/gcg/experiments && python run.py --model_name ${{inputs.model_name}} --setup ${{inputs.setup}} --n_train_data ${{inputs.n_train_data}} --n_test_data ${{inputs.n_test_data}} --n_steps ${{inputs.n_steps}} --batch_size ${{inputs.batch_size}}",
-    inputs={
-        "model_name": "phi_3_mini",
-        "setup": "multiple",
-        "n_train_data": 25,
-        "n_test_data": 0,
-        "n_steps": 500,
-        "batch_size": 256,
-    },
+    command=(
+        "python -m pyrit.auxiliary_attacks.gcg.experiments.run"
+        " --model_name llama_2"
+        " --setup single"
+        " --n_train_data 5"
+        " --n_test_data 0"
+        " --n_steps 5"
+        " --batch_size 64"
+        " --output_dir ${{outputs.results}}"
+    ),
+    inputs={},
+    outputs={"results": Output(type="uri_folder")},
     environment=f"{env_docker_context.name}:{env_docker_context.version}",
     environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]},
-    display_name="suffix_generation",
-    description="Generate a suffix for attacking LLMs.",
-    resources=JobResourceConfiguration(
-        instance_type="Standard_NC96ads_A100_v4",
-        instance_count=1,
-    ),
+    compute="gcg-gpu-a100",
+    display_name="gcg_suffix_generation",
+    description="Generate adversarial suffixes using GCG on Llama-2.",
+    tags={"Owner": os.environ.get("USER", "unknown")},
 )
 
 # %%
-# Submit the command
 returned_job = ml_client.create_or_update(job)
+print(f"Job: {returned_job.name}")
+print(f"Status: {returned_job.status}")
+print(f"Studio URL: {returned_job.studio_url}")
+
+# %% [markdown]
+# ## Wait for the Job to Complete and Inspect the Generated Suffix
+#
+# The next cell polls the job until it reaches a terminal state (~20-30
+# minutes for the small 5-step baseline above), then downloads the named
+# `results` output and prints the final suffix. The runner writes its
+# result file as `individual_behaviors_<model>_gcg_<timestamp>.json` into
+# the directory Azure ML mounted for the `results` output, so it ends up
+# under `<download_dir>/named-outputs/results/` once we download. The
+# `controls` array in that file contains one entry per training step, and
+# the last entry is the final adversarial suffix that, appended to the user
+# prompt, was optimized to elicit the target response.
+
+# %%
+import json
+import tempfile
+import time
+from pathlib import Path
+
+_TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"}
+
+last_status = None
+while True:
+    current_status = ml_client.jobs.get(returned_job.name).status
+    if current_status != last_status:
+        print(f"Job status: {current_status}", flush=True)
+        last_status = current_status
+    if current_status in _TERMINAL_STATES:
+        break
+    time.sleep(60)
+
+assert current_status == "Completed", f"Job did not complete successfully: {current_status}"
+
+download_dir = Path(tempfile.mkdtemp(prefix="gcg-aml-"))
+ml_client.jobs.download(name=returned_job.name, download_path=str(download_dir), all=True)
+
+result_files = list(download_dir.rglob("individual_behaviors_*_gcg_*.json"))
+if not result_files:
+    print(f"No GCG result file found under {download_dir}. Files captured:")
+    for p in sorted(download_dir.rglob("*")):
+        if p.is_file():
+            print(f"  {p.relative_to(download_dir)}")
+    raise FileNotFoundError("Result JSON not in downloaded artifacts")
+
+result_file = result_files[0]
+with open(result_file) as f:
+    log = json.load(f)
+
+final_suffix = log["controls"][-1] if log["controls"] else None
+final_loss = log["losses"][-1] if log["losses"] else None
+
+print(f"Result file: {result_file.name}")
+print(f"Steps run: {len(log['controls'])}")
+print(f"Final loss: {final_loss}")
+print(f"Generated suffix: {final_suffix!r}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -112,10 +112,12 @@ huggingface = [
 ]
 gcg = [
     "accelerate>=1.7.0",
-    "azure-ai-ml>=1.27.1",
-    "azureml-mlflow>=1.60.0",
-    "mlflow>=3.11.1",
+    "azure-ai-ml>=1.32.0",
     "ml-collections>=1.1.0",
+    # pyarrow is a transitive dep of `datasets`. With the gcg extra installed,
+    # the resolver picks a pyarrow version that lacks cp314 wheels and fails
+    # to build from source on Python 3.14; pin to a version that ships them.
+    "pyarrow>=22.0.0; python_version >= '3.14'",
     "sentencepiece>=0.2.0",
     "torch>=2.7.0",
 ]
@@ -139,13 +141,11 @@ speech = [
 all = [
     "accelerate>=1.7.0",
     "av>=14.0.0",
-    "azure-ai-ml>=1.27.1",
+    "azure-ai-ml>=1.32.0",
     "azure-cognitiveservices-speech>=1.44.0",
-    "azureml-mlflow>=1.60.0",
     "flask>=3.1.3",
     "ipykernel>=6.29.5",
     "jupyter>=1.1.1",
-    "mlflow>=3.11.1",
     "ml-collections>=1.1.0",
     "ollama>=0.5.1",
     "opencv-python>=4.11.0.86",