diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb index 416b27353e..cdc2bc24d2 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb @@ -47,38 +47,78 @@ "name": "stdout", "output_type": "stream", "text": [ - "romanlutz\n" + "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", + "Loaded environment file: ./.pyrit/.env\n", + "Loaded environment file: ./.pyrit/.env.local\n", + "gcg-romanlutz\n" ] } ], "source": [ "import os\n", "\n", - "# Enter details of your AML workspace\n", - "subscription_id = os.environ.get(\"AZURE_SUBSCRIPTION_ID\")\n", - "resource_group = os.environ.get(\"AZURE_RESOURCE_GROUP\")\n", + "from pyrit.setup.initialization import _load_environment_files\n", + "\n", + "_load_environment_files(env_files=None)\n", + "\n", + "subscription_id = os.environ.get(\"AZURE_ML_SUBSCRIPTION_ID\")\n", + "resource_group = os.environ.get(\"AZURE_ML_RESOURCE_GROUP\")\n", "workspace = os.environ.get(\"AZURE_ML_WORKSPACE_NAME\")\n", "print(workspace)" ] }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "The Azure ML SDK emits a fair amount of telemetry to stderr that looks\n", + "alarming but is benign: every operation logs an `ActivityCompleted: ...\n", + "HowEnded=Failure` line for any expected `UserError` (such as\n", + "`create_or_update` finding the environment already at the latest version),\n", + "and every preview / experimental class prints a one-line warning. Quiet\n", + "all of it so the rest of the notebook output stays focused on what\n", + "actually matters." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": {}, "outputs": [], + "source": [ + "import logging\n", + "import warnings\n", + "\n", + "logging.getLogger(\"azure.ai.ml\").setLevel(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\", module=r\"azure\\.ai\\.ml.*\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + } + ], "source": [ "from azure.ai.ml import MLClient\n", "from azure.identity import AzureCliCredential\n", "\n", - "# Get a handle to the workspace\n", - "# For some people DefaultAzureCredential may work better than AzureCliCredential.\n", "ml_client = MLClient(AzureCliCredential(), subscription_id, resource_group, workspace)" ] }, { "cell_type": "markdown", - "id": "6", + "id": "8", "metadata": {}, "source": [ "## Create AML Environment" @@ -86,26 +126,98 @@ }, { "cell_type": "markdown", - "id": "7", - "metadata": { - "lines_to_next_cell": 0 - }, + "id": "9", + "metadata": {}, "source": [ - "To install the dependencies needed to run GCG, we create an AML environment from a [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile)." + "To install the dependencies needed to run GCG, we create an AML environment from a\n", + "[Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). The Dockerfile uses\n", + "an NVIDIA CUDA base image with Python 3.11 and installs PyRIT with the `gcg` extra." ] }, { "cell_type": "code", "execution_count": null, - "id": "8", - "metadata": { - "lines_to_next_cell": 2 - }, + "id": "10", + "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=731.02 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"d83f8c4d225dee5d56c301c18e298f59\",\n", + " \"request\": \"c537217eb2b56149\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-09T12:49:18.18528+00:00\"\n", + "}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\u001b[39m\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Environment.CreateOrUpdate, HowEnded=Failure, Duration=33839.37 [ms], Exception=ResourceExistsError, ErrorCategory=UserError, ErrorMessage=(UserError) Environment pyrit-gcg with version 10 is already registered and cannot be changed.\n", + "Code: UserError\n", + "Message: Environment pyrit-gcg with version 10 is already registered and cannot be changed.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=348.1 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"66a3d036ffde9abfa617b61d00bd6214\",\n", + " \"request\": \"139566989f2c3f74\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-09T12:49:49.3263735+00:00\"\n", + "}\n" + ] + }, { "data": { "text/plain": [ - "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit', 'description': 'PyRIT environment created from a Docker context.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/romanlutz/providers/Microsoft.MachineLearningServices/workspaces/romanlutz/environments/pyrit/versions/5', 'Resource__source_path': '', 'base_path': './git/PyRIT/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '5', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" + "Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'pyrit-gcg', 'description': 'PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]', 'tags': {'Owner': 'unknown'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourceGroups/gcg-romanlutz/providers/Microsoft.MachineLearningServices/workspaces/gcg-romanlutz/environments/pyrit-gcg/versions/11', 'Resource__source_path': '', 'base_path': './git/PyRIT-wt-gcg-refactor/doc/code/auxiliary_attacks', 'creation_context': , 'serialize': , 'version': '11', 'conda_file': None, 'build': , 'inference_config': None, 'os_type': 'Linux', 'conda_file_path': None, 'path': None, 'datastore': None, 'upload_hash': None, 'translated_conda_file': None})" ] }, "execution_count": null, @@ -116,24 +228,28 @@ "source": [ "from pathlib import Path\n", "\n", - "from azure.ai.ml.entities import BuildContext, Environment, JobResourceConfiguration\n", + "from azure.ai.ml.entities import BuildContext, Environment\n", "\n", "from pyrit.common.path import HOME_PATH\n", "\n", - "# Configure the AML environment with path to Dockerfile and dependencies\n", + "# Configure the AML environment — build context is the repo root so the Dockerfile\n", + "# can COPY pyproject.toml and pyrit/ for pip install -e \".[gcg]\"\n", "env_docker_context = Environment(\n", - " build=BuildContext(path=Path(HOME_PATH) / \"pyrit\" / \"auxiliary_attacks\" / \"gcg\" / \"src\"),\n", - " name=\"pyrit\",\n", - " description=\"PyRIT environment created from a Docker context.\",\n", + " build=BuildContext(\n", + " path=Path(HOME_PATH),\n", + " dockerfile_path=\"pyrit/auxiliary_attacks/gcg/src/Dockerfile\",\n", + " ),\n", + " name=\"pyrit-gcg\",\n", + " description=\"PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]\",\n", + " tags={\"Owner\": os.environ.get(\"USER\", \"unknown\")},\n", ")\n", "\n", - "# Create or update the AML environment\n", "ml_client.environments.create_or_update(env_docker_context)" ] }, { "cell_type": "markdown", - "id": "9", + "id": "11", "metadata": {}, "source": [ "## Submit Training Job to AML" @@ -141,75 +257,281 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "12", "metadata": {}, "source": [ - "Finally, we configure the command to run the GCG algorithm. The entry file for the algorithm is [`run.py`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), which takes several command line arguments, as shown below. We also have to specify the compute `instance_type` to run the algorithm on. In our experience, a GPU instance with at least 32GB of vRAM is required. In the example below, we use Standard_NC96ads_A100_v4.\n", + "Finally, we configure the command to run the GCG algorithm. The entry point is\n", + "[`pyrit.auxiliary_attacks.gcg.experiments.run`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py),\n", + "invoked as a module so the uploaded code snapshot takes priority over the\n", + "Docker-installed package (Python's `-m` flag puts the cwd at the front of `sys.path`).\n", + "\n", + "We also have to specify a GPU compute target. In our experience, a GPU instance with\n", + "at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4).\n", "\n", - "Depending on the compute instance you use, you may encounter \"out of memory\" errors. In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`." + "Depending on the compute instance you use, you may encounter \"out of memory\" errors.\n", + "In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`." ] }, { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "13", "metadata": {}, "outputs": [], "source": [ - "from azure.ai.ml import command\n", + "from azure.ai.ml import Output, command\n", "\n", - "# Configure the command\n", "job = command(\n", " code=Path(HOME_PATH),\n", - " command=\"cd pyrit/auxiliary_attacks/gcg/experiments && python run.py --model_name ${{inputs.model_name}} --setup ${{inputs.setup}} --n_train_data ${{inputs.n_train_data}} --n_test_data ${{inputs.n_test_data}} --n_steps ${{inputs.n_steps}} --batch_size ${{inputs.batch_size}}\",\n", - " inputs={\n", - " \"model_name\": \"phi_3_mini\",\n", - " \"setup\": \"multiple\",\n", - " \"n_train_data\": 25,\n", - " \"n_test_data\": 0,\n", - " \"n_steps\": 500,\n", - " \"batch_size\": 256,\n", - " },\n", + " command=(\n", + " \"python -m pyrit.auxiliary_attacks.gcg.experiments.run\"\n", + " \" --model_name llama_2\"\n", + " \" --setup single\"\n", + " \" --n_train_data 5\"\n", + " \" --n_test_data 0\"\n", + " \" --n_steps 5\"\n", + " \" --batch_size 64\"\n", + " \" --output_dir ${{outputs.results}}\"\n", + " ),\n", + " inputs={},\n", + " outputs={\"results\": Output(type=\"uri_folder\")},\n", " environment=f\"{env_docker_context.name}:{env_docker_context.version}\",\n", " environment_variables={\"HUGGINGFACE_TOKEN\": os.environ[\"HUGGINGFACE_TOKEN\"]},\n", - " display_name=\"suffix_generation\",\n", - " description=\"Generate a suffix for attacking LLMs.\",\n", - " resources=JobResourceConfiguration(\n", - " instance_type=\"Standard_NC96ads_A100_v4\",\n", - " instance_count=1,\n", - " ),\n", + " compute=\"gcg-gpu-a100\",\n", + " display_name=\"gcg_suffix_generation\",\n", + " description=\"Generate adversarial suffixes using GCG on Llama-2.\",\n", + " tags={\"Owner\": os.environ.get(\"USER\", \"unknown\")},\n", ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "14", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", - "Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.\n", - "\n", - "Example: azcopy copy './git/PyRIT' 'https://romanlutz0437468309.blob.core.windows.net/3f52e8b9-0bac-4c48-9e4a-a92e85a582c4-10s61nn9uso4b2p89xjypawyc7/PyRIT' \n", + "Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "\n", - "See https://learn.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.\n", "\u001b[39m\n", "\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "pathOnCompute is not a known attribute of class and will be ignored\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job: stoic_parcel_6clfs67hp9\n", + "Status: Starting\n", + "Studio URL: https://ml.azure.com/runs/stoic_parcel_6clfs67hp9?wsid=/subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz&tid=72f988bf-86f1-41af-91ab-2d7cd011db47\n" + ] } ], "source": [ - "# Submit the command\n", - "returned_job = ml_client.create_or_update(job)" + "returned_job = ml_client.create_or_update(job)\n", + "print(f\"Job: {returned_job.name}\")\n", + "print(f\"Status: {returned_job.status}\")\n", + "print(f\"Studio URL: {returned_job.studio_url}\")" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Wait for the Job to Complete and Inspect the Generated Suffix\n", + "\n", + "The next cell polls the job until it reaches a terminal state (~20-30\n", + "minutes for the small 5-step baseline above), then downloads the named\n", + "`results` output and prints the final suffix. The runner writes its\n", + "result file as `individual_behaviors__gcg_.json` into\n", + "the directory Azure ML mounted for the `results` output, so it ends up\n", + "under `/named-outputs/results/` once we download. The\n", + "`controls` array in that file contains one entry per training step, and\n", + "the last entry is the final adversarial suffix that, appended to the user\n", + "prompt, was optimized to elicit the target response." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Preparing\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Queued\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Running\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job status: Completed\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading artifact azureml://subscriptions/db1ba766-2ca3-42c6-a19a-0f0d43134a8c/resourcegroups/gcg-romanlutz/workspaces/gcg-romanlutz/datastores/workspaceblobstore/paths/azureml/stoic_parcel_6clfs67hp9/results/ to ./AppData/Local/Temp/gcg-aml-e4x7zwr5/named-outputs/results\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ActivityCompleted: Activity=Datastore.ListSecrets, HowEnded=Failure, Duration=350.53 [ms], Exception=HttpResponseError, ErrorCategory=UserError, ErrorMessage=(UserError) No secrets for credentials of type None.\n", + "Code: UserError\n", + "Message: No secrets for credentials of type None.\n", + "Additional Information:Type: ComponentName\n", + "Info: {\n", + " \"value\": \"managementfrontend\"\n", + "}Type: Correlation\n", + "Info: {\n", + " \"value\": {\n", + " \"operation\": \"c6de26a5821f768ace8e59f5b75601d7\",\n", + " \"request\": \"f84ff3aa7b1f0557\"\n", + " }\n", + "}Type: Environment\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Location\n", + "Info: {\n", + " \"value\": \"westus3\"\n", + "}Type: Time\n", + "Info: {\n", + " \"value\": \"2026-05-09T13:18:13.3919233+00:00\"\n", + "}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading artifact azureml://datastores/workspaceartifactstore/ExperimentRun/dcid.stoic_parcel_6clfs67hp9 to ./AppData/Local/Temp/gcg-aml-e4x7zwr5/artifacts\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result file: individual_behaviors_llama_2_gcg_20260509-131606.json\n", + "Steps run: 30\n", + "Final loss: 0.87353515625\n", + "Generated suffix: '! ! }; Kingdom ! ! ! Move ! ! [former [ [ [ [ [ [ [ ['\n" + ] + } + ], + "source": [ + "import json\n", + "import tempfile\n", + "import time\n", + "from pathlib import Path\n", + "\n", + "_TERMINAL_STATES = {\"Completed\", \"Failed\", \"Canceled\", \"CancelRequested\"}\n", + "\n", + "last_status = None\n", + "while True:\n", + " current_status = ml_client.jobs.get(returned_job.name).status\n", + " if current_status != last_status:\n", + " print(f\"Job status: {current_status}\", flush=True)\n", + " last_status = current_status\n", + " if current_status in _TERMINAL_STATES:\n", + " break\n", + " time.sleep(60)\n", + "\n", + "assert current_status == \"Completed\", f\"Job did not complete successfully: {current_status}\"\n", + "\n", + "download_dir = Path(tempfile.mkdtemp(prefix=\"gcg-aml-\"))\n", + "ml_client.jobs.download(name=returned_job.name, download_path=str(download_dir), all=True)\n", + "\n", + "result_files = list(download_dir.rglob(\"individual_behaviors_*_gcg_*.json\"))\n", + "if not result_files:\n", + " print(f\"No GCG result file found under {download_dir}. Files captured:\")\n", + " for p in sorted(download_dir.rglob(\"*\")):\n", + " if p.is_file():\n", + " print(f\" {p.relative_to(download_dir)}\")\n", + " raise FileNotFoundError(\"Result JSON not in downloaded artifacts\")\n", + "\n", + "result_file = result_files[0]\n", + "with open(result_file) as f:\n", + " log = json.load(f)\n", + "\n", + "final_suffix = log[\"controls\"][-1] if log[\"controls\"] else None\n", + "final_loss = log[\"losses\"][-1] if log[\"losses\"] else None\n", + "\n", + "print(f\"Result file: {result_file.name}\")\n", + "print(f\"Steps run: {len(log['controls'])}\")\n", + "print(f\"Final loss: {final_loss}\")\n", + "print(f\"Generated suffix: {final_suffix!r}\")" ] } ], @@ -227,7 +549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.2" + "version": "3.14.4" } }, "nbformat": 4, diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py index db324926bf..ad35ae28e7 100644 --- a/doc/code/auxiliary_attacks/1_gcg_azure_ml.py +++ b/doc/code/auxiliary_attacks/1_gcg_azure_ml.py @@ -29,76 +29,164 @@ # %% import os -# Enter details of your AML workspace -subscription_id = os.environ.get("AZURE_SUBSCRIPTION_ID") -resource_group = os.environ.get("AZURE_RESOURCE_GROUP") +from pyrit.setup.initialization import _load_environment_files + +_load_environment_files(env_files=None) + +subscription_id = os.environ.get("AZURE_ML_SUBSCRIPTION_ID") +resource_group = os.environ.get("AZURE_ML_RESOURCE_GROUP") workspace = os.environ.get("AZURE_ML_WORKSPACE_NAME") print(workspace) +# %% [markdown] +# The Azure ML SDK emits a fair amount of telemetry to stderr that looks +# alarming but is benign: every operation logs an `ActivityCompleted: ... +# HowEnded=Failure` line for any expected `UserError` (such as +# `create_or_update` finding the environment already at the latest version), +# and every preview / experimental class prints a one-line warning. Quiet +# all of it so the rest of the notebook output stays focused on what +# actually matters. + +# %% +import logging +import warnings + +logging.getLogger("azure.ai.ml").setLevel(logging.ERROR) +warnings.filterwarnings("ignore", module=r"azure\.ai\.ml.*") + # %% from azure.ai.ml import MLClient from azure.identity import AzureCliCredential -# Get a handle to the workspace -# For some people DefaultAzureCredential may work better than AzureCliCredential. ml_client = MLClient(AzureCliCredential(), subscription_id, resource_group, workspace) # %% [markdown] # ## Create AML Environment # %% [markdown] -# To install the dependencies needed to run GCG, we create an AML environment from a [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). +# To install the dependencies needed to run GCG, we create an AML environment from a +# [Dockerfile](../../../pyrit/auxiliary_attacks/gcg/src/Dockerfile). The Dockerfile uses +# an NVIDIA CUDA base image with Python 3.11 and installs PyRIT with the `gcg` extra. + # %% from pathlib import Path -from azure.ai.ml.entities import BuildContext, Environment, JobResourceConfiguration +from azure.ai.ml.entities import BuildContext, Environment from pyrit.common.path import HOME_PATH -# Configure the AML environment with path to Dockerfile and dependencies +# Configure the AML environment — build context is the repo root so the Dockerfile +# can COPY pyproject.toml and pyrit/ for pip install -e ".[gcg]" env_docker_context = Environment( - build=BuildContext(path=Path(HOME_PATH) / "pyrit" / "auxiliary_attacks" / "gcg" / "src"), - name="pyrit", - description="PyRIT environment created from a Docker context.", + build=BuildContext( + path=Path(HOME_PATH), + dockerfile_path="pyrit/auxiliary_attacks/gcg/src/Dockerfile", + ), + name="pyrit-gcg", + description="PyRIT GCG environment: CUDA 12.1 + Python 3.11 + pip install -e .[gcg]", + tags={"Owner": os.environ.get("USER", "unknown")}, ) -# Create or update the AML environment ml_client.environments.create_or_update(env_docker_context) - # %% [markdown] # ## Submit Training Job to AML # %% [markdown] -# Finally, we configure the command to run the GCG algorithm. The entry file for the algorithm is [`run.py`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), which takes several command line arguments, as shown below. We also have to specify the compute `instance_type` to run the algorithm on. In our experience, a GPU instance with at least 32GB of vRAM is required. In the example below, we use Standard_NC96ads_A100_v4. +# Finally, we configure the command to run the GCG algorithm. The entry point is +# [`pyrit.auxiliary_attacks.gcg.experiments.run`](../../../pyrit/auxiliary_attacks/gcg/experiments/run.py), +# invoked as a module so the uploaded code snapshot takes priority over the +# Docker-installed package (Python's `-m` flag puts the cwd at the front of `sys.path`). # -# Depending on the compute instance you use, you may encounter "out of memory" errors. In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`. +# We also have to specify a GPU compute target. In our experience, a GPU instance with +# at least 24GB of vRAM is required (e.g., Standard_NC24ads_A100_v4). +# +# Depending on the compute instance you use, you may encounter "out of memory" errors. +# In this case, we recommend training on a smaller model or lowering `n_train_data` or `batch_size`. # %% -from azure.ai.ml import command +from azure.ai.ml import Output, command -# Configure the command job = command( code=Path(HOME_PATH), - command="cd pyrit/auxiliary_attacks/gcg/experiments && python run.py --model_name ${{inputs.model_name}} --setup ${{inputs.setup}} --n_train_data ${{inputs.n_train_data}} --n_test_data ${{inputs.n_test_data}} --n_steps ${{inputs.n_steps}} --batch_size ${{inputs.batch_size}}", - inputs={ - "model_name": "phi_3_mini", - "setup": "multiple", - "n_train_data": 25, - "n_test_data": 0, - "n_steps": 500, - "batch_size": 256, - }, + command=( + "python -m pyrit.auxiliary_attacks.gcg.experiments.run" + " --model_name llama_2" + " --setup single" + " --n_train_data 5" + " --n_test_data 0" + " --n_steps 5" + " --batch_size 64" + " --output_dir ${{outputs.results}}" + ), + inputs={}, + outputs={"results": Output(type="uri_folder")}, environment=f"{env_docker_context.name}:{env_docker_context.version}", environment_variables={"HUGGINGFACE_TOKEN": os.environ["HUGGINGFACE_TOKEN"]}, - display_name="suffix_generation", - description="Generate a suffix for attacking LLMs.", - resources=JobResourceConfiguration( - instance_type="Standard_NC96ads_A100_v4", - instance_count=1, - ), + compute="gcg-gpu-a100", + display_name="gcg_suffix_generation", + description="Generate adversarial suffixes using GCG on Llama-2.", + tags={"Owner": os.environ.get("USER", "unknown")}, ) # %% -# Submit the command returned_job = ml_client.create_or_update(job) +print(f"Job: {returned_job.name}") +print(f"Status: {returned_job.status}") +print(f"Studio URL: {returned_job.studio_url}") + +# %% [markdown] +# ## Wait for the Job to Complete and Inspect the Generated Suffix +# +# The next cell polls the job until it reaches a terminal state (~20-30 +# minutes for the small 5-step baseline above), then downloads the named +# `results` output and prints the final suffix. The runner writes its +# result file as `individual_behaviors__gcg_.json` into +# the directory Azure ML mounted for the `results` output, so it ends up +# under `/named-outputs/results/` once we download. The +# `controls` array in that file contains one entry per training step, and +# the last entry is the final adversarial suffix that, appended to the user +# prompt, was optimized to elicit the target response. + +# %% +import json +import tempfile +import time +from pathlib import Path + +_TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"} + +last_status = None +while True: + current_status = ml_client.jobs.get(returned_job.name).status + if current_status != last_status: + print(f"Job status: {current_status}", flush=True) + last_status = current_status + if current_status in _TERMINAL_STATES: + break + time.sleep(60) + +assert current_status == "Completed", f"Job did not complete successfully: {current_status}" + +download_dir = Path(tempfile.mkdtemp(prefix="gcg-aml-")) +ml_client.jobs.download(name=returned_job.name, download_path=str(download_dir), all=True) + +result_files = list(download_dir.rglob("individual_behaviors_*_gcg_*.json")) +if not result_files: + print(f"No GCG result file found under {download_dir}. Files captured:") + for p in sorted(download_dir.rglob("*")): + if p.is_file(): + print(f" {p.relative_to(download_dir)}") + raise FileNotFoundError("Result JSON not in downloaded artifacts") + +result_file = result_files[0] +with open(result_file) as f: + log = json.load(f) + +final_suffix = log["controls"][-1] if log["controls"] else None +final_loss = log["losses"][-1] if log["losses"] else None + +print(f"Result file: {result_file.name}") +print(f"Steps run: {len(log['controls'])}") +print(f"Final loss: {final_loss}") +print(f"Generated suffix: {final_suffix!r}") diff --git a/pyproject.toml b/pyproject.toml index 4105fc0e8b..5cf393448c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,10 +112,12 @@ huggingface = [ ] gcg = [ "accelerate>=1.7.0", - "azure-ai-ml>=1.27.1", - "azureml-mlflow>=1.60.0", - "mlflow>=3.11.1", + "azure-ai-ml>=1.32.0", "ml-collections>=1.1.0", + # pyarrow is a transitive dep of `datasets`. With the gcg extra installed, + # the resolver picks a pyarrow version that lacks cp314 wheels and fails + # to build from source on Python 3.14; pin to a version that ships them. + "pyarrow>=22.0.0; python_version >= '3.14'", "sentencepiece>=0.2.0", "torch>=2.7.0", ] @@ -139,13 +141,11 @@ speech = [ all = [ "accelerate>=1.7.0", "av>=14.0.0", - "azure-ai-ml>=1.27.1", + "azure-ai-ml>=1.32.0", "azure-cognitiveservices-speech>=1.44.0", - "azureml-mlflow>=1.60.0", "flask>=3.1.3", "ipykernel>=6.29.5", "jupyter>=1.1.1", - "mlflow>=3.11.1", "ml-collections>=1.1.0", "ollama>=0.5.1", "opencv-python>=4.11.0.86", diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index eae2663a4e..6e7991ea30 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -12,14 +12,11 @@ from copy import deepcopy from typing import Any, Optional -import mlflow import numpy as np import pandas as pd import torch import torch.multiprocessing as mp import torch.nn as nn -from fastchat.conversation import Conversation, SeparatorStyle -from fastchat.model import get_conversation_template from transformers import ( AutoModelForCausalLM, AutoTokenizer, @@ -134,7 +131,6 @@ def __init__( goal: str, target: str, tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, ) -> None: @@ -147,9 +143,9 @@ def __init__( target (str): The target of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a configured chat template + (i.e., ``tokenizer.chat_template`` is not ``None``); ``apply_chat_template`` is used + to render the user/assistant exchange instead of model-specific fastchat templates. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -161,11 +157,8 @@ def __init__( self.target = target self.control = control_init self.tokenizer = tokenizer - self.conv_template = conv_template self.test_prefixes = test_prefixes - self.conv_template.messages = [] - self.test_new_toks = len(self.tokenizer(self.target).input_ids) + 2 # buffer for prefix in self.test_prefixes: self.test_new_toks = max(self.test_new_toks, len(self.tokenizer(prefix).input_ids)) @@ -173,103 +166,68 @@ def __init__( self._update_ids() def _update_ids(self) -> None: - self.conv_template.append_message(self.conv_template.roles[0], f"{self.goal} {self.control}") - self.conv_template.append_message(self.conv_template.roles[1], f"{self.target}") - prompt = self.conv_template.get_prompt() + # Render the goal+control as the user turn and the target as the assistant turn using the + # tokenizer's built-in chat template. This replaces fastchat's per-model Conversation logic + # and works for any HuggingFace chat-tuned model (issue #965). + messages = [ + {"role": "user", "content": f"{self.goal} {self.control}"}, + {"role": "assistant", "content": f"{self.target}"}, + ] + prompt = self.tokenizer.apply_chat_template(messages, tokenize=False) encoding = self.tokenizer(prompt) toks = encoding.input_ids - if self.conv_template.name == "llama-2" or self.conv_template.name == "llama-3": - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks))) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks)) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 2) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 3) + # Locate goal/control/target substrings in the rendered prompt. + goal_start = prompt.find(self.goal) + control_start = prompt.find(self.control) + target_start = prompt.find(self.target) + if goal_start == -1 or control_start == -1 or target_start == -1: + raise ValueError( + "Could not locate goal/control/target in chat-templated prompt. " + f"prompt={prompt!r}, goal={self.goal!r}, " + f"control={self.control!r}, target={self.target!r}" + ) - else: - python_tokenizer = False or self.conv_template.name == "oasst_pythia" - try: - encoding.char_to_token(len(prompt) - 1) - except Exception: - python_tokenizer = True - if python_tokenizer: - # This is specific to the vicuna and pythia tokenizer and conversation prompt. - # It will not work with other tokenizers or prompts. - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks) - 1)) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks) - 1) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 1) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 2) - else: - self._system_slice = slice(None, encoding.char_to_token(len(self.conv_template.system))) - self._user_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[0])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[0]) + len(self.conv_template.roles[0]) + 1 - ), - ) - self._goal_slice = slice( - encoding.char_to_token(prompt.find(self.goal)), - encoding.char_to_token(prompt.find(self.goal) + len(self.goal)), - ) - self._control_slice = slice( - encoding.char_to_token(prompt.find(self.control)), - encoding.char_to_token(prompt.find(self.control) + len(self.control)), - ) - self._assistant_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[1])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[1]) + len(self.conv_template.roles[1]) + 1 - ), - ) - self._target_slice = slice( - encoding.char_to_token(prompt.find(self.target)), - encoding.char_to_token(prompt.find(self.target) + len(self.target)), - ) - self._loss_slice = slice( - encoding.char_to_token(prompt.find(self.target)) - 1, - encoding.char_to_token(prompt.find(self.target) + len(self.target)) - 1, - ) + # ``char_to_token`` returns None when the character index has no + # corresponding token (e.g. when the substring ends exactly at the end + # of the prompt or lands on whitespace squashed into a neighbouring + # token). For end positions we clamp to ``len(toks)``; for start + # positions we walk forward to the next character that does map to a + # token. Both are necessary for the slice arithmetic to remain valid + # across tokenizers/templates. + def end_tok(char_pos: int) -> int: + tok = encoding.char_to_token(char_pos) + return len(toks) if tok is None else tok + + def start_tok(char_pos: int) -> int: + limit = len(prompt) + cur = char_pos + while cur < limit: + tok = encoding.char_to_token(cur) + if tok is not None: + return tok + cur += 1 + return len(toks) + + self._goal_slice = slice( + start_tok(goal_start), + end_tok(goal_start + len(self.goal)), + ) + self._control_slice = slice( + start_tok(control_start), + end_tok(control_start + len(self.control)), + ) + target_start_tok = start_tok(target_start) + target_end_tok = end_tok(target_start + len(self.target)) + self._target_slice = slice(target_start_tok, target_end_tok) + self._loss_slice = slice(target_start_tok - 1, target_end_tok - 1) + # Assistant role tokens are everything between the control end and the target start. + # This works for any chat template (e.g. llama-2 "[/INST]", phi-3 "<|assistant|>", etc.) + # without us needing to know the literal marker text. + self._assistant_role_slice = slice(self._control_slice.stop, self._target_slice.start) self.input_ids = torch.tensor(toks[: self._target_slice.stop], device="cpu") - self.conv_template.messages = [] @torch.no_grad() # type: ignore[misc, untyped-decorator, unused-ignore] def generate(self, model: Any, gen_config: Any = None) -> torch.Tensor: @@ -458,7 +416,6 @@ def __init__( goals: list[str], targets: list[str], tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, managers: Optional[dict[str, type[AttackPrompt]]] = None, @@ -472,9 +429,7 @@ def __init__( targets (List[str]): The list of targets of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a chat template configured. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -492,8 +447,7 @@ def __init__( self.tokenizer = tokenizer self._prompts = [ - managers["AP"](goal, target, tokenizer, conv_template, control_init, test_prefixes) - for goal, target in zip(goals, targets) + managers["AP"](goal, target, tokenizer, control_init, test_prefixes) for goal, target in zip(goals, targets) ] self._nonascii_toks = get_nonascii_toks(tokenizer, device="cpu") @@ -635,9 +589,7 @@ def __init__( self.models = [worker.model for worker in workers] self.logfile = logfile self.prompts = [ - managers["PM"]( - goals, targets, worker.tokenizer, worker.conv_template, control_init, test_prefixes, managers - ) + managers["PM"](goals, targets, worker.tokenizer, control_init, test_prefixes, managers) for worker in workers ] self.managers = managers @@ -820,7 +772,6 @@ def test_all(self) -> tuple[list[list[bool]], list[list[int]], list[list[float]] self.goals + self.test_goals, self.targets + self.test_targets, worker.tokenizer, - worker.conv_template, self.control_str, self.test_prefixes, self.managers, @@ -902,14 +853,13 @@ def log( f"====================================================\n" ) - # Log to mlflow + # Log loss and GPU memory log_loss(step=step_num, loss=loss) log_gpu_memory(step=step_num) - # Log results table to mlflow + # Log results table at end of training if step_num == n_steps: log_table_summary(losses=log["losses"], controls=log["controls"], n_steps=n_steps) - mlflow.end_run() class ProgressiveMultiPromptAttack: @@ -999,7 +949,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1007,7 +957,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1113,7 +1063,6 @@ def run( self.test_goals, self.test_targets, self.test_workers, - **self.mpa_kwargs, ) if num_goals == len(self.goals) and num_workers == len(self.workers): stop_inner_on_success = False @@ -1239,7 +1188,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1247,7 +1196,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1351,7 +1300,6 @@ def run( self.test_goals, self.test_targets, self.test_workers, - **self.mpa_kwargs, ) attack.run( n_steps=n_steps, @@ -1454,7 +1402,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1462,7 +1410,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1523,7 +1471,6 @@ def run( self.test_prefixes, self.logfile, self.managers, - **self.mpa_kwargs, ) all_inputs = [p.eval_str for p in attack.prompts[0]._prompts] max_new_tokens = [p.test_new_toks for p in attack.prompts[0]._prompts] @@ -1590,7 +1537,6 @@ def __init__( token: str, model_kwargs: dict[str, Any], tokenizer: Any, - conv_template: Conversation, device: str, ) -> None: self.model = ( @@ -1601,7 +1547,6 @@ def __init__( .eval() ) self.tokenizer = tokenizer - self.conv_template = conv_template self.tasks: mp.JoinableQueue[Any] = mp.JoinableQueue() self.results: mp.JoinableQueue[Any] = mp.JoinableQueue() self.process: Optional[mp.Process] = None @@ -1672,45 +1617,23 @@ def get_workers(params: Any, eval: bool = False) -> tuple[list[ModelWorker], lis tokenizer.padding_side = "left" if not tokenizer.pad_token: tokenizer.pad_token = tokenizer.eos_token + if tokenizer.chat_template is None: + raise ValueError( + f"Tokenizer {params.tokenizer_paths[i]!r} has no chat_template configured. GCG uses " + "tokenizer.apply_chat_template() to render prompts (see issue #965); without a chat " + "template the attack cannot be set up. Pick a chat-tuned model or set " + "tokenizer.chat_template explicitly." + ) tokenizers.append(tokenizer) logger.info(f"Loaded {len(tokenizers)} tokenizers") - raw_conv_templates = [] - for template in params.conversation_templates: - if template in ["llama-2", "mistral", "llama-3-8b", "vicuna"]: - raw_conv_templates.append(get_conversation_template(template)) - elif template in ["phi-3-mini"]: - conv_template = Conversation( - name="phi-3-mini", - system_template="<|system|>\n{system_message}", - system_message="", - roles=("<|user|>", "<|assistant|>"), - sep_style=SeparatorStyle.CHATML, - sep="<|end|>", - stop_token_ids=[32000, 32001, 32007], - ) - raw_conv_templates.append(conv_template) - else: - raise ValueError("Conversation template not recognized") - - conv_templates = [] - for conv in raw_conv_templates: - if conv.name == "zero_shot": - conv.roles = tuple("### " + r for r in conv.roles) - conv.sep = "\n" - elif conv.name == "llama-2": - conv.sep2 = conv.sep2.strip() - conv_templates.append(conv) - - logger.info(f"Loaded {len(conv_templates)} conversation templates") workers = [ ModelWorker( params.model_paths[i], params.token, params.model_kwargs[i], tokenizers[i], - conv_templates[i], params.devices[i], ) for i in range(len(params.model_paths)) diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml index 841ea4be21..504fb3dd4b 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] train_data: "" test_data: "" @@ -28,4 +27,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml index 2fbc77a85c..a8b60c3926 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] train_data: "" test_data: "" @@ -28,4 +27,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml index a897403d0a..31e562c226 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] train_data: "" test_data: "" @@ -28,4 +27,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml index 3f3b466678..d3547152d3 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] train_data: "" test_data: "" @@ -28,4 +27,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml new file mode 100644 index 0000000000..d080430016 --- /dev/null +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml @@ -0,0 +1,29 @@ +transfer: False +target_weight: 1.0 +control_weight: 0.0 +progressive_goals: False +progressive_models: False +anneal: False +incr_control: False +stop_on_success: False +verbose: True +allow_non_ascii: False +num_train_models: 1 +result_prefix: "results/individual_phi4" +tokenizer_paths: ["microsoft/phi-4"] +tokenizer_kwargs: [{"use_fast": True}] +model_paths: ["microsoft/phi-4"] +model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] +devices: ["cuda:0"] +train_data: "" +test_data: "" +n_train_data: 50 +n_test_data: 0 +control_init: "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !" +n_steps: 500 +test_steps: 50 +batch_size: 512 +learning_rate: 0.01 +topk: 256 +temp: 1 +filter_cand: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml index 91fe68a563..286cb4a013 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["lmsys/vicuna-13b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-13b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] train_data: "" test_data: "" @@ -28,4 +27,3 @@ learning_rate: 0.01 topk: 256 temp: 1 filter_cand: True -gbda_deterministic: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml index f6f3ae3d58..351622dba3 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruc tokenizer_kwargs: [{"use_fast": False}, {"use_fast": False}, {"use_fast": False}, {"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruct-v0.1", "meta-llama/Meta-Llama-3-8B-Instruct", "lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2", "mistral", "llama-3-8b", "vicuna"] devices: ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml index d7200b1776..fc3c824124 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml index 5e538824c8..0b353b8769 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml index c2f808c185..4c6438a6af 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml index 679556c3e4..35316c5d98 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml index 2bbfe0a965..3850c95a21 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["lmsys/vicuna-7b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/log.py b/pyrit/auxiliary_attacks/gcg/experiments/log.py index 1bbf65ac20..bdd96c1ca4 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/log.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/log.py @@ -3,11 +3,8 @@ import logging import subprocess as sp -import time from typing import Any, Optional -import mlflow - logger = logging.getLogger(__name__) _DEFAULT_PARAM_KEYS: list[str] = [ @@ -26,7 +23,7 @@ def log_params( param_keys: Optional[list[str]] = None, ) -> None: """ - Log selected parameters to MLflow. + Log selected parameters via Python logging. Args: params (Any): A config object with a `to_dict()` method containing all parameters. @@ -34,20 +31,18 @@ def log_params( """ if param_keys is None: param_keys = _DEFAULT_PARAM_KEYS - mlflow_params = {key: params.to_dict()[key] for key in param_keys} - mlflow.log_params(mlflow_params) + logged_params = {key: params.to_dict()[key] for key in param_keys} + logger.info(f"Training parameters: {logged_params}") def log_train_goals(*, train_goals: list[str]) -> None: """ - Log training goals as a text artifact to MLflow. + Log training goals via Python logging. Args: train_goals (list[str]): The list of training goal strings to log. """ - timestamp = time.strftime("%Y%m%d-%H%M%S") - train_goals_str = "\n".join(train_goals) - mlflow.log_text(train_goals_str, f"train_goals_{timestamp}.txt") + logger.info(f"Training goals ({len(train_goals)}): {train_goals}") def get_gpu_memory() -> dict[str, int]: @@ -65,46 +60,40 @@ def get_gpu_memory() -> dict[str, int]: return memory_free_values -def log_gpu_memory(*, step: int, synchronous: bool = False) -> None: +def log_gpu_memory(*, step: int) -> None: """ - Log free GPU memory metrics to MLflow. + Log free GPU memory via Python logging. Args: step (int): The current training step number. - synchronous (bool): Whether to log synchronously. Defaults to False. """ - memory_values = get_gpu_memory() - for gpu, val in memory_values.items(): - mlflow.log_metric(gpu, val, step=step, synchronous=synchronous) + try: + memory_values = get_gpu_memory() + logger.info(f"Step {step} GPU memory: {memory_values}") + except Exception: + logger.debug("Could not query GPU memory (nvidia-smi not available)") -def log_loss(*, step: int, loss: float, synchronous: bool = False) -> None: +def log_loss(*, step: int, loss: float) -> None: """ - Log training loss to MLflow. + Log training loss via Python logging. Args: step (int): The current training step number. loss (float): The loss value to log. - synchronous (bool): Whether to log synchronously. Defaults to False. """ - mlflow.log_metric("loss", loss, step=step, synchronous=synchronous) + logger.info(f"Step {step} loss: {loss}") def log_table_summary(*, losses: list[float], controls: list[str], n_steps: int) -> None: """ - Log a summary table of losses and controls to MLflow. + Log a summary of losses and controls via Python logging. Args: losses (list[float]): Loss values for each step. controls (list[str]): Control strings for each step. n_steps (int): Total number of steps. """ - timestamp = time.strftime("%Y%m%d-%H%M%S") - mlflow.log_table( - { - "step": [i + 1 for i in range(n_steps)], - "loss": losses, - "control": controls, - }, - artifact_file=f"gcg_results_{timestamp}.json", - ) + logger.info(f"Training complete ({n_steps} steps). Final loss: {losses[-1] if losses else 'N/A'}") + if controls: + logger.info(f"Final control: {controls[-1]}") diff --git a/pyrit/auxiliary_attacks/gcg/experiments/run.py b/pyrit/auxiliary_attacks/gcg/experiments/run.py index 342db0a67a..f78b6e220d 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/run.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/run.py @@ -3,6 +3,7 @@ import argparse import os +from pathlib import Path from typing import Any, Union import yaml @@ -10,7 +11,7 @@ from pyrit.auxiliary_attacks.gcg.experiments.train import GreedyCoordinateGradientAdversarialSuffixGenerator from pyrit.setup.initialization import _load_environment_files -_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini"] +_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini", "phi_4"] _ALL_MODELS: str = "all_models" @@ -29,7 +30,9 @@ def _load_yaml_to_dict(config_path: str) -> dict[str, Any]: return data -def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parameters: Any) -> None: +def run_trainer( + *, model_name: str, setup: str = "single", output_dir: str = "outputs", **extra_config_parameters: Any +) -> None: """ Trains and generates adversarial suffix - single model single prompt. @@ -39,6 +42,10 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame setup (str): Identifier for the setup, currently supports - "single": one prompt one model - "multiple": multiple prompts one model or multiple prompts multiple models + output_dir (str): Directory (created if missing) to write the result JSON file to. + Defaults to "outputs". For Azure ML jobs, pass the path AML provides for the + named output (typically expanded from ``${{outputs.}}``) so the result + is uploaded to the job's artifact store. **extra_config_parameters: Additional parameters to override config values. Raises: @@ -56,7 +63,7 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame "train_data": ( "https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv" ), - "result_prefix": f"results/individual_behaviors_{model_name}_gcg", + "result_prefix": f"{output_dir}/individual_behaviors_{model_name}_gcg", "token": hf_token, } if setup != "single": @@ -73,8 +80,8 @@ def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parame config["model_name"] = model_name trainer = GreedyCoordinateGradientAdversarialSuffixGenerator() - if not os.path.exists("results"): - os.makedirs("results") + if not os.path.exists(output_dir): + os.makedirs(output_dir) trainer.generate_suffix(**config) @@ -100,11 +107,28 @@ def _parse_arguments() -> argparse.Namespace: parser.add_argument("--n_steps", type=int, default=100, help="Number of steps") parser.add_argument("--batch_size", type=int, default=512, help="Batch size") parser.add_argument("--random_seed", type=int, default=None, help="Random seed") + parser.add_argument( + "--output_dir", + type=str, + default="outputs", + help=( + "Directory to write the result JSON to. Pass the path Azure ML " + "expands ${{outputs.}} to so the result is uploaded as a " + "named output artifact." + ), + ) return parser.parse_args() if __name__ == "__main__": + # Resolve relative paths (configs/) against this file's directory so the + # script works regardless of where it is invoked from -- including + # `python -m pyrit.auxiliary_attacks.gcg.experiments.run` from any cwd. + # output_dir is left untouched so callers can point it at an absolute path + # (e.g. AML's ${{outputs.results}} expansion). + os.chdir(Path(__file__).resolve().parent) + args = _parse_arguments() run_trainer( model_name=args.model_name, @@ -116,4 +140,5 @@ def _parse_arguments() -> argparse.Namespace: batch_size=args.batch_size, test_steps=1, random_seed=args.random_seed, + output_dir=args.output_dir, ) diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 1eac86dc56..d5815f7bab 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -5,7 +5,6 @@ import time from typing import Any, Optional, Union -import mlflow import numpy as np import torch.multiprocessing as mp from ml_collections import config_dict @@ -42,7 +41,6 @@ def generate_suffix( tokenizer_paths: Optional[list[str]] = None, model_name: str = "", model_paths: Optional[list[str]] = None, - conversation_templates: Optional[list[str]] = None, result_prefix: str = "", train_data: str = "", control_init: str = _DEFAULT_CONTROL_INIT, @@ -82,7 +80,6 @@ def generate_suffix( tokenizer_paths (Optional[list[str]]): Paths to tokenizer models. model_name (str): Name identifier for the model. model_paths (Optional[list[str]]): Paths to model weights. - conversation_templates (Optional[list[str]]): Conversation template names. result_prefix (str): Prefix for result file paths. train_data (str): URL or path to training data CSV. control_init (str): Initial control string for optimization. @@ -110,7 +107,7 @@ def generate_suffix( topk (int): Number of top candidates to consider. Defaults to 256. temp (int): Temperature for sampling. Defaults to 1. filter_cand (bool): Whether to filter invalid candidates. Defaults to True. - gbda_deterministic (bool): Whether to use deterministic mode. Defaults to True. + gbda_deterministic (bool): Unused, kept for config compatibility. Defaults to True. logfile (str): Path to log file. Defaults to "". random_seed (int): Random seed for reproducibility. Defaults to 42. """ @@ -118,8 +115,6 @@ def generate_suffix( tokenizer_paths = [] if model_paths is None: model_paths = [] - if conversation_templates is None: - conversation_templates = [] if devices is None: devices = ["cuda:0"] if model_kwargs is None: @@ -132,7 +127,6 @@ def generate_suffix( tokenizer_paths=tokenizer_paths, model_name=model_name, model_paths=model_paths, - conversation_templates=conversation_templates, result_prefix=result_prefix, train_data=train_data, control_init=control_init, @@ -166,8 +160,6 @@ def generate_suffix( ) logger.info(f"Parameters: {params}") - # Start mlflow logging - mlflow.start_run() log_gpu_memory(step=0) log_params(params=params) @@ -300,7 +292,6 @@ def _create_attack( test_goals=test_goals, test_targets=test_targets, test_workers=test_workers, - mpa_deterministic=params.gbda_deterministic, mpa_lr=params.learning_rate, mpa_batch_size=params.batch_size, mpa_n_steps=params.n_steps, @@ -315,7 +306,6 @@ def _create_attack( test_goals=getattr(params, "test_goals", []), test_targets=getattr(params, "test_targets", []), test_workers=test_workers, - mpa_deterministic=params.gbda_deterministic, mpa_lr=params.learning_rate, mpa_batch_size=params.batch_size, mpa_n_steps=params.n_steps, diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index 37163d3365..085d286b9d 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -1,19 +1,22 @@ -FROM mcr.microsoft.com/azureml/curated/acpt-pytorch-2.2-cuda12.1:37 +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 -RUN apt-get update +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.11 python3.11-venv python3.11-dev python3-pip \ + curl git build-essential && \ + rm -rf /var/lib/apt/lists/* -ENV UV_ENVIRONMENT_PATH /azureml-envs/pyrit +# Install uv +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" -# Prepend path to AzureML environment -ENV PATH $UV_ENVIRONMENT_PATH/bin:$PATH +# Create venv +RUN uv venv /opt/venv --python 3.11 +ENV PATH="/opt/venv/bin:$PATH" +ENV VIRTUAL_ENV="/opt/venv" -# Create uv environment -RUN uv venv --python 3.11 && \ - uv pip cache purge +WORKDIR /app -RUN uv pip list - -RUN git clone https://github.com/microsoft/PyRIT.git - -RUN cd PyRIT && uv sync --extra gcg && uv pip install git+https://github.com/lm-sys/FastChat.git@2c68a13bfe10b86f40e3eefc3fcfacb32c00b02a -RUN uv pip freeze +# Install PyRIT with GCG extras to get all dependencies +COPY pyproject.toml MANIFEST.in README.md LICENSE /app/ +COPY pyrit/ /app/pyrit/ +RUN uv pip install -e ".[gcg]" diff --git a/tests/end_to_end/auxiliary_attacks/__init__.py b/tests/end_to_end/auxiliary_attacks/__init__.py new file mode 100644 index 0000000000..acd459977f --- /dev/null +++ b/tests/end_to_end/auxiliary_attacks/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""End-to-end tests for PyRIT auxiliary attacks (GCG).""" diff --git a/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py new file mode 100644 index 0000000000..bf761f9ee8 --- /dev/null +++ b/tests/end_to_end/auxiliary_attacks/test_gcg_aml_e2e.py @@ -0,0 +1,111 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""End-to-end test for the GCG Azure ML pipeline. + +Executes `doc/code/auxiliary_attacks/1_gcg_azure_ml.py` directly as a Python +script (the jupytext percent-format `# %%` markers are plain comments, so the +file is valid Python). After the notebook submits the AML job, this test +polls until the job reaches a terminal state and asserts success. + +Running the notebook itself keeps the submission logic in one place: the +tutorial people read is the same code we test. Anything the user can do +manually with the notebook, this test verifies works end-to-end. + +Skipped unless `RUN_ALL_TESTS=true`. Per-test skip also applies when the +required Azure ML or HuggingFace credentials are missing, since this submits +real (paid) compute. On test failure or interruption, the submitted job is +cancelled so it does not continue burning compute. + +Required environment variables when `RUN_ALL_TESTS=true`: + - AZURE_ML_SUBSCRIPTION_ID + - AZURE_ML_RESOURCE_GROUP + - AZURE_ML_WORKSPACE_NAME + - HUGGINGFACE_TOKEN (must have access to meta-llama/Llama-2-7b-chat-hf) + +Optional: + - GCG_E2E_MAX_WAIT_SECONDS (defaults to 5400 -- 90 minutes) +""" + +import contextlib +import os +import runpy +import time +from pathlib import Path + +import pytest + +# Skip the entire module unless RUN_ALL_TESTS=true; this test submits real +# paid Azure ML compute so it should never run in default CI. +pytestmark = pytest.mark.skipif( + os.getenv("RUN_ALL_TESTS", "").lower() != "true", + reason="RUN_ALL_TESTS is not set to true", +) + +# Heavy imports deferred until skip check passes +pytest.importorskip("azure.ai.ml", reason="azure-ai-ml not installed") +pytest.importorskip("azure.identity", reason="azure-identity not installed") + +from pyrit.common.path import HOME_PATH # noqa: E402 +from pyrit.setup.initialization import _load_environment_files # noqa: E402 + +_REQUIRED_ENV_VARS = ( + "AZURE_ML_SUBSCRIPTION_ID", + "AZURE_ML_RESOURCE_GROUP", + "AZURE_ML_WORKSPACE_NAME", + "HUGGINGFACE_TOKEN", +) +_NOTEBOOK_PATH = Path(HOME_PATH) / "doc" / "code" / "auxiliary_attacks" / "1_gcg_azure_ml.py" +_DEFAULT_MAX_WAIT_SECONDS = 5400 # 90 minutes +_POLL_INTERVAL_SECONDS = 30 +_TERMINAL_STATES = {"Completed", "Failed", "Canceled", "CancelRequested"} + + +@pytest.mark.timeout(_DEFAULT_MAX_WAIT_SECONDS + 600) +def test_gcg_aml_notebook_runs_to_completion() -> None: + """Execute the AML notebook end-to-end and verify the submitted job completes. + + The notebook is the single source of truth for how a GCG job is submitted + to Azure ML. This test loads it via runpy, extracts the submitted job + + MLClient from its namespace, then polls until the job reaches a terminal + state and asserts ``Completed``. + """ + _load_environment_files(env_files=None, silent=True) + missing = [name for name in _REQUIRED_ENV_VARS if not os.environ.get(name)] + if missing: + pytest.skip(f"Missing required env vars for GCG AML e2e test: {', '.join(missing)}") + + max_wait = int(os.environ.get("GCG_E2E_MAX_WAIT_SECONDS", _DEFAULT_MAX_WAIT_SECONDS)) + + notebook_globals = runpy.run_path(str(_NOTEBOOK_PATH), run_name="__main__") + submitted_job = notebook_globals["returned_job"] + ml_client = notebook_globals["ml_client"] + job_name = submitted_job.name + + final_status: str | None = None + status: str | None = None + try: + deadline = time.monotonic() + max_wait + while time.monotonic() < deadline: + status = ml_client.jobs.get(job_name).status + if status in _TERMINAL_STATES: + final_status = status + break + time.sleep(_POLL_INTERVAL_SECONDS) + else: + pytest.fail( + f"GCG job '{job_name}' did not reach a terminal state within " + f"{max_wait}s (last status: {status!r}). Studio URL: {submitted_job.studio_url}" + ) + + assert final_status == "Completed", ( + f"GCG job '{job_name}' finished with status {final_status!r}, expected 'Completed'. " + f"Studio URL: {submitted_job.studio_url}" + ) + finally: + # Always try to cancel a non-terminal job so we never leak paid compute + # (e.g., if pytest is interrupted or the assertion fires before a + # terminal state is reached). + if final_status is None or final_status not in _TERMINAL_STATES: + with contextlib.suppress(Exception): + ml_client.jobs.begin_cancel(job_name) diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index d2e32d5974..f2944bf480 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -5,9 +5,15 @@ These tests validate that the GCG attack pipeline works end-to-end with a real (tiny) model. They use GPT-2 (~124M params) which can run on CPU, paired with -the llama-2 conversation template (which has explicit handling in _update_ids). +explicit chat templates set on the tokenizer (since GPT-2 has no default +chat template). -Requires: torch, transformers, fastchat, mlflow (GCG optional deps). +After PR #965 dropped fastchat, ``AttackPrompt._update_ids`` uses +``tokenizer.apply_chat_template()`` exclusively, so we exercise that code path +with two distinct template shapes (llama-2 style and ChatML/phi-3 style) to +catch template-specific regressions. + +Requires: torch, transformers (GCG optional deps). Skipped via importorskip when deps are not installed. """ @@ -15,10 +21,8 @@ torch = pytest.importorskip("torch", reason="torch not installed") transformers = pytest.importorskip("transformers", reason="transformers not installed") -pytest.importorskip("fastchat", reason="fastchat not installed") -from fastchat.model import get_conversation_template # noqa: E402 from transformers import AutoTokenizer, GPT2LMHeadModel # noqa: E402 from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( # noqa: E402 @@ -33,6 +37,31 @@ token_gradients, ) +# Minimal Jinja chat templates that exercise the two structural variants we care about: +# (1) Inline role markers ("[INST]"/"[/INST]") used by llama-2. +# (2) Distinct role tokens ("<|user|>"/"<|assistant|>") used by phi-3 / ChatML. +# Both must produce findable goal/control/target substrings for the new +# apply_chat_template-based _update_ids to compute correct slices. +_LLAMA_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" +) + +_CHATML_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "<|user|>\n{{ m['content'] }}<|end|>\n<|assistant|>\n" + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}<|end|>" + "{%- endif -%}" + "{%- endfor -%}" +) + @pytest.fixture(scope="module") def gpt2_model() -> GPT2LMHeadModel: @@ -40,21 +69,33 @@ def gpt2_model() -> GPT2LMHeadModel: return GPT2LMHeadModel.from_pretrained("gpt2").eval() -@pytest.fixture(scope="module") -def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: - """Load GPT-2 tokenizer once for all tests in this module.""" +def _make_tokenizer(chat_template: str) -> transformers.PreTrainedTokenizer: + """Build a fresh GPT-2 tokenizer with the given chat template attached.""" tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" + tokenizer.chat_template = chat_template return tokenizer @pytest.fixture() -def conv_template(): - """Create a fresh llama-2 conversation template for each test.""" - conv = get_conversation_template("llama-2") - conv.sep2 = conv.sep2.strip() - return conv +def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a llama-2-style chat template attached.""" + return _make_tokenizer(_LLAMA_STYLE_TEMPLATE) + + +@pytest.fixture() +def gpt2_chatml_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a ChatML / phi-3-style chat template attached. + + Exercises the second structural variant we care about: distinct role tokens + (``<|user|>``/``<|assistant|>``) versus llama-2's inline ``[INST]`` markers. + Before PR #965 this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes; after the apply_chat_template rewrite both + template shapes share a single code path. + """ + return _make_tokenizer(_CHATML_STYLE_TEMPLATE) class TestTokenGradientsIntegration: @@ -90,20 +131,18 @@ def test_gradient_is_finite_and_nonzero( class TestGCGAttackPromptIntegration: - """Integration tests for GCGAttackPrompt with real GPT-2.""" + """Integration tests for GCGAttackPrompt with real GPT-2 + llama-style template.""" def test_prompt_initializes_with_valid_slices( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """AttackPrompt should initialize with non-empty, non-overlapping slices.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -116,14 +155,12 @@ def test_grad_returns_valid_gradient( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """GCGAttackPrompt.grad should return a finite, non-zero gradient tensor.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -138,14 +175,12 @@ def test_target_loss_is_finite_scalar( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Target loss from real model logits should be a finite positive number.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -162,14 +197,12 @@ def test_sample_control_produces_valid_candidates( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Sampled control tokens should be decodable by the tokenizer.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -214,3 +247,55 @@ def test_get_nonascii_toks_returns_nonempty_tensor(self, gpt2_tokenizer: transfo toks = get_nonascii_toks(gpt2_tokenizer, device="cpu") assert isinstance(toks, torch.Tensor) assert len(toks) > 0 + + +class TestGCGAttackPromptChatMLTemplate: + """Integration tests covering ChatML / phi-3 style templates. + + These exercise the second structural variant of chat templates (distinct + role tokens like ``<|user|>``/``<|assistant|>`` separated from content, + versus llama-2's inline ``[INST]`` markers). Before PR #965 dropped + fastchat, this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes (the same Phi-3 ``AttributeError`` we hit on + Azure ML). After the apply_chat_template rewrite both shapes share a single + code path, so these tests should pass alongside the llama-style ones above. + """ + + def test_prompt_initializes_with_chatml_template( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, + ) -> None: + """GCGAttackPrompt should construct successfully with a ChatML template.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_chatml_tokenizer, + control_init="! ! ! ! !", + ) + + assert prompt._control_slice.start < prompt._control_slice.stop + assert prompt._target_slice.start < prompt._target_slice.stop + assert prompt._control_slice.stop <= prompt._target_slice.start + assert prompt.input_ids.shape[0] > 0 + + def test_grad_returns_valid_gradient_with_chatml_template( + self, + gpt2_model: GPT2LMHeadModel, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, + ) -> None: + """gradient computation should work end-to-end with a ChatML template.""" + prompt = GCGAttackPrompt( + goal="Tell me how", + target="Sure here is", + tokenizer=gpt2_chatml_tokenizer, + control_init="! ! ! ! !", + ) + + grad = prompt.grad(gpt2_model) + + n_control = prompt._control_slice.stop - prompt._control_slice.start + assert grad.shape[0] == n_control + assert grad.shape[1] == gpt2_chatml_tokenizer.vocab_size + assert torch.isfinite(grad).all() diff --git a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py new file mode 100644 index 0000000000..abb64786de --- /dev/null +++ b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py @@ -0,0 +1,198 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Tests that exercise the full attack class wiring without mocking manager classes. + +These tests catch kwarg mismatches between IndividualPromptAttack/ProgressiveMultiPromptAttack +and MultiPromptAttack.__init__(), and template compatibility issues in _update_ids(). +""" + +from unittest.mock import MagicMock, patch + +import pytest + +attack_manager_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.base.attack_manager", + reason="GCG optional dependencies (torch, mlflow, etc.) not installed", +) +torch = pytest.importorskip("torch", reason="torch not installed") + +gcg_attack_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.attack.gcg.gcg_attack", + reason="GCG optional dependencies not installed", +) + +IndividualPromptAttack = attack_manager_mod.IndividualPromptAttack +ProgressiveMultiPromptAttack = attack_manager_mod.ProgressiveMultiPromptAttack +MultiPromptAttack = attack_manager_mod.MultiPromptAttack +GCGAttackPrompt = gcg_attack_mod.GCGAttackPrompt +GCGPromptManager = gcg_attack_mod.GCGPromptManager +GCGMultiPromptAttack = gcg_attack_mod.GCGMultiPromptAttack + +train_mod = pytest.importorskip( + "pyrit.auxiliary_attacks.gcg.experiments.train", + reason="GCG train module not available", +) +Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator + +MANAGERS = { + "AP": GCGAttackPrompt, + "PM": GCGPromptManager, + "MPA": GCGMultiPromptAttack, +} + + +def _make_mock_worker() -> MagicMock: + """Create a mock worker whose tokenizer can stand in for a real chat tokenizer. + + The wiring tests construct real ``GCGAttackPrompt`` instances which call + ``tokenizer.apply_chat_template`` and then walk character positions in the + rendered prompt. We need a real string + a tokenizer that can answer + ``char_to_token`` queries on it, so we back the mock with a real + distilgpt2 tokenizer (the smallest available transformers tokenizer that + ships with all the methods we touch). + """ + from transformers import AutoTokenizer + + real_tokenizer = AutoTokenizer.from_pretrained("gpt2") + real_tokenizer.pad_token = real_tokenizer.eos_token + real_tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST] " + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + + worker = MagicMock() + worker.model.name_or_path = "test-model" + worker.tokenizer = real_tokenizer + return worker + + +class TestAttackClassWiring: + """Tests that verify attack classes can be constructed with real manager classes. + + These catch kwarg mismatches that mocked tests miss. + """ + + def test_individual_attack_creates_mpa_without_error(self) -> None: + """IndividualPromptAttack.run() should create MultiPromptAttack without TypeError. + + This catches the mpa_kwargs bug where dead kwargs (deterministic, lr, etc.) + were passed to MultiPromptAttack.__init__() which didn't accept them. + """ + worker = _make_mock_worker() + + # Create IndividualPromptAttack with the real GCG manager classes + attack = IndividualPromptAttack( + goals=["test goal"], + targets=["test target"], + workers=[worker], + control_init="! ! !", + managers=MANAGERS, + mpa_lr=0.01, + mpa_batch_size=64, + mpa_n_steps=5, + ) + + # The run() method creates MultiPromptAttack internally. + # Patch the MPA's run() to avoid actually running the attack, + # but let __init__ execute with real classes to catch kwarg issues. + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) + + def test_progressive_attack_creates_mpa_without_error(self) -> None: + """ProgressiveMultiPromptAttack.run() should create MultiPromptAttack without TypeError.""" + worker = _make_mock_worker() + + attack = ProgressiveMultiPromptAttack( + goals=["test goal"], + targets=["test target"], + workers=[worker], + progressive_goals=False, + progressive_models=False, + control_init="! ! !", + managers=MANAGERS, + mpa_lr=0.01, + mpa_batch_size=64, + mpa_n_steps=5, + ) + + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) + + def test_create_attack_individual_wires_correctly(self) -> None: + """_create_attack with transfer=False should produce an IndividualPromptAttack + that can create internal MPA instances without error.""" + worker = _make_mock_worker() + + params = Generator._build_params( + transfer=False, + control_init="! ! !", + result_prefix="test", + learning_rate=0.01, + batch_size=64, + n_steps=5, + ) + + attack = Generator._create_attack( + params=params, + managers=MANAGERS, + train_goals=["test goal"], + train_targets=["test target"], + test_goals=[], + test_targets=[], + workers=[worker], + test_workers=[], + ) + + assert isinstance(attack, IndividualPromptAttack) + + # Verify internal MPA creation works + with patch.object(GCGMultiPromptAttack, "run", return_value=("control", 0.5, 1)): + attack.run( + n_steps=1, + batch_size=64, + topk=256, + temp=1, + allow_non_ascii=False, + target_weight=1.0, + control_weight=0.0, + anneal=False, + test_steps=1, + incr_control=False, + stop_on_success=False, + verbose=False, + filter_cand=True, + ) diff --git a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py index 2777bb1eea..ef5cc709c5 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py +++ b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py @@ -94,7 +94,6 @@ def test_all_configs_have_required_keys(self, config_files: list[str]) -> None: required_keys = { "tokenizer_paths", "model_paths", - "conversation_templates", "devices", } for path in config_files: diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index e1568e1a0f..79dcbf2231 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -390,7 +390,7 @@ def test_transfer_true_creates_progressive(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -430,7 +430,7 @@ def test_transfer_false_creates_individual(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -484,7 +484,6 @@ def test_raises_on_mismatched_goals_targets(self) -> None: goals=["goal1", "goal2"], targets=["target1"], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -494,7 +493,6 @@ def test_raises_on_empty_goals(self) -> None: goals=[], targets=[], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -506,11 +504,11 @@ def test_raises_with_multiple_workers(self) -> None: mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "m1" mock_worker1.tokenizer.name_or_path = "t1" - mock_worker1.conv_template.name = "c1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "m2" mock_worker2.tokenizer.name_or_path = "t2" - mock_worker2.conv_template.name = "c2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" with pytest.raises(ValueError, match="exactly 1 worker"): EvaluateAttack( @@ -519,3 +517,149 @@ def test_raises_with_multiple_workers(self) -> None: workers=[mock_worker1, mock_worker2], managers={"AP": MagicMock(), "PM": MagicMock(), "MPA": MagicMock()}, ) + + +class TestUpdateIdsErrorPaths: + """Tests covering the error / fallback paths in AttackPrompt._update_ids.""" + + def test_raises_when_substring_not_in_rendered_prompt(self) -> None: + """If the chat template strips/transforms goal/control/target so they don't appear + verbatim in the rendered prompt, _update_ids must raise a clear ValueError.""" + tokenizer = MagicMock() + # Chat template that drops the user content entirely — goal/control won't appear in prompt + tokenizer.apply_chat_template.return_value = "[INST] [/INST] hello" + # tokenizer(...) returns an encoding-like object + encoding = MagicMock() + encoding.input_ids = [1, 2, 3, 4] + encoding.char_to_token.return_value = 1 + tokenizer.return_value = encoding + + with pytest.raises(ValueError, match="Could not locate goal/control/target"): + AttackPrompt( + goal="this-goal-is-missing", + target="this-target-is-missing", + tokenizer=tokenizer, + control_init="this-control-is-missing", + ) + + def test_start_tok_walks_forward_when_initial_position_has_no_token(self) -> None: + """char_to_token returns None for the start position (e.g., whitespace squashed + into the previous token); start_tok must walk forward to the next mappable + character. Slices should still be valid.""" + # Use a fully mocked tokenizer so we can deterministically force char_to_token + # to return None at specific positions, otherwise real tokenizers usually map + # every byte and never trigger the fallback. + prompt_text = "USER hello !! ASSISTANT world" + toks = list(range(15)) + + def char_to_token(pos: int) -> int | None: + # Positions of "h" and "w" both return None; the next char does map. This + # exercises the cur += 1 walk-forward branch in start_tok. + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + if char in ("h", "w"): + return None + # Map remaining positions in a way that preserves slice ordering + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # Construction must succeed even though char_to_token returns None at goal/target + # start positions ("h" / "w"). + prompt = AttackPrompt( + goal="hello", + target="world", + tokenizer=tokenizer, + control_init="!!", + ) + assert isinstance(prompt._goal_slice.start, int) + assert isinstance(prompt._target_slice.start, int) + + def test_start_tok_returns_len_toks_when_no_position_maps(self) -> None: + """If char_to_token returns None for every position from char_pos to end-of-prompt, + start_tok must return len(toks) as a safe fallback (line 211).""" + prompt_text = "USER hello !! ASSISTANT world tail" + toks = list(range(20)) + + def char_to_token(pos: int) -> int | None: + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + # "tail" sits at end and never maps to a token (forces start_tok to exhaust + # the loop and hit `return len(toks)`); other content maps normally. + tail_start = prompt_text.find("tail") + if pos >= tail_start: + return None + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # "tail" as the target — its start position and every position after it returns + # None, so start_tok exits the while loop and returns len(toks). + prompt = AttackPrompt( + goal="hello", + target="tail", + tokenizer=tokenizer, + control_init="!!", + ) + assert prompt._target_slice.start == len(toks) + + def test_end_tok_returns_len_toks_when_target_is_at_prompt_end(self) -> None: + """If the target sits at the very end of the rendered prompt, + char_to_token(end_pos) returns None — end_tok must clamp to len(toks).""" + from transformers import AutoTokenizer + + tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer.pad_token = tokenizer.eos_token + tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + + prompt = AttackPrompt( + goal="hello", + target="world", # this sits at end of rendered prompt with no trailing tokens + tokenizer=tokenizer, + control_init="! ! !", + ) + # _target_slice.stop should be len(toks), not None or NoneType arithmetic + assert isinstance(prompt._target_slice.stop, int) + assert prompt._target_slice.stop > prompt._target_slice.start + + +class TestGetWorkersChatTemplateValidation: + """Tests for the chat-template precondition in get_workers.""" + + def test_raises_when_tokenizer_has_no_chat_template(self) -> None: + """Models without a chat_template cannot be used with apply_chat_template-based + GCG; get_workers should raise a clear ValueError pointing to the cause.""" + from unittest.mock import patch + + get_workers = attack_manager_mod.get_workers + + params = MagicMock() + params.tokenizer_paths = ["fake/no-chat-template-model"] + params.token = "" + params.tokenizer_kwargs = [{}] + + bare_tokenizer = MagicMock() + bare_tokenizer.chat_template = None + bare_tokenizer.pad_token = "" + + with patch.object(attack_manager_mod.AutoTokenizer, "from_pretrained", return_value=bare_tokenizer): + with pytest.raises(ValueError, match="no chat_template configured"): + get_workers(params) diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py index 7792c98224..ab42b5d961 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -1,17 +1,18 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import subprocess from unittest.mock import MagicMock, patch import pytest +# Note: GPU-memory tests live in test_log.py since they only need the log +# module (stdlib imports). Anything that touches the train module needs +# the gcg extra installed (ml_collections, torch, etc.) so we skip the +# whole module when those imports fail. log_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.log", reason="GCG optional dependencies (mlflow, etc.) not installed", ) -log_gpu_memory = log_mod.log_gpu_memory -get_gpu_memory = log_mod.get_gpu_memory train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", @@ -20,101 +21,18 @@ Generator = train_mod.GreedyCoordinateGradientAdversarialSuffixGenerator -class TestGpuMemoryLogging: - """Tests for GPU memory query and logging.""" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_parses_nvidia_smi(self, mock_sp: MagicMock) -> None: - """Should parse nvidia-smi output into a dict of GPU -> free memory.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" - result = get_gpu_memory() - assert result == {"gpu1_free_memory": 8000, "gpu2_free_memory": 16000} - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: - """Should handle single GPU output.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n24000 MiB\n" - result = get_gpu_memory() - assert result == {"gpu1_free_memory": 24000} - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_log_gpu_memory_logs_to_mlflow(self, mock_sp: MagicMock, mock_mlflow: MagicMock) -> None: - """Should log each GPU's free memory as an MLflow metric.""" - mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" - log_gpu_memory(step=5) - - assert mock_mlflow.log_metric.call_count == 2 - calls = mock_mlflow.log_metric.call_args_list - assert calls[0].args == ("gpu1_free_memory", 8000) - assert calls[0].kwargs["step"] == 5 - assert calls[1].args == ("gpu2_free_memory", 16000) - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") - def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: - """Should propagate exception when nvidia-smi is not available.""" - mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") - with pytest.raises(subprocess.CalledProcessError): - get_gpu_memory() - - class TestGenerateSuffixLifecycle: - """Tests for generate_suffix MLflow and worker lifecycle management.""" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") - def test_mlflow_run_started_before_training( - self, - mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, - mock_log_train_goals: MagicMock, - mock_log_params: MagicMock, - mock_log_gpu_memory: MagicMock, - mock_get_goals: MagicMock, - mock_get_workers: MagicMock, - ) -> None: - """MLflow run should be started before any training begins.""" - mock_get_goals.return_value = (["goal1"], ["target1"], [], []) - mock_worker = MagicMock() - mock_worker.model.name_or_path = "test-model" - mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" - mock_get_workers.return_value = ([mock_worker], []) - - mock_attack_instance = MagicMock() - mock_attack_lib.GCGAttackPrompt = MagicMock - mock_attack_lib.GCGPromptManager = MagicMock - mock_attack_lib.GCGMultiPromptAttack = MagicMock - - # Patch _create_attack to avoid IndividualPromptAttack's logfile writing - with patch.object(Generator, "_create_attack", return_value=mock_attack_instance): - generator = Generator.__new__(Generator) - generator.generate_suffix( - tokenizer_paths=["test/path"], - model_paths=["test/path"], - conversation_templates=["llama-2"], - train_data="", - n_steps=1, - ) - - mock_mlflow.start_run.assert_called_once() + """Tests for generate_suffix worker lifecycle management.""" @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_workers") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.get_goals_and_targets") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") def test_workers_stopped_after_training( self, mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, mock_log_train_goals: MagicMock, mock_log_params: MagicMock, mock_log_gpu_memory: MagicMock, @@ -126,11 +44,11 @@ def test_workers_stopped_after_training( mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "test-model-1" mock_worker1.tokenizer.name_or_path = "test-tokenizer-1" - mock_worker1.conv_template.name = "test-template-1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "test-model-2" mock_worker2.tokenizer.name_or_path = "test-tokenizer-2" - mock_worker2.conv_template.name = "test-template-2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker1], [mock_worker2]) mock_attack_instance = MagicMock() @@ -143,7 +61,6 @@ def test_workers_stopped_after_training( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, ) @@ -156,12 +73,10 @@ def test_workers_stopped_after_training( @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_gpu_memory") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_params") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.log_train_goals") - @patch("pyrit.auxiliary_attacks.gcg.experiments.train.mlflow") @patch("pyrit.auxiliary_attacks.gcg.experiments.train.attack_lib") def test_workers_not_stopped_on_training_failure( self, mock_attack_lib: MagicMock, - mock_mlflow: MagicMock, mock_log_train_goals: MagicMock, mock_log_params: MagicMock, mock_log_gpu_memory: MagicMock, @@ -177,7 +92,7 @@ def test_workers_not_stopped_on_training_failure( mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker], []) mock_attack_instance = MagicMock() @@ -192,7 +107,6 @@ def test_workers_not_stopped_on_training_failure( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, ) diff --git a/tests/unit/auxiliary_attacks/gcg/test_log.py b/tests/unit/auxiliary_attacks/gcg/test_log.py index e20b5a7c13..c225b5aeba 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_log.py +++ b/tests/unit/auxiliary_attacks/gcg/test_log.py @@ -1,14 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import subprocess from unittest.mock import MagicMock, patch import pytest log_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.log", - reason="GCG optional dependencies (mlflow, etc.) not installed", + reason="GCG optional dependencies not installed", ) +get_gpu_memory = log_mod.get_gpu_memory +log_gpu_memory = log_mod.log_gpu_memory log_loss = log_mod.log_loss log_params = log_mod.log_params log_table_summary = log_mod.log_table_summary @@ -18,9 +21,8 @@ class TestLogParams: """Tests for the log_params function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_default_param_keys(self, mock_mlflow: MagicMock) -> None: - """Should log the default parameter keys to MLflow.""" + def test_logs_default_param_keys(self) -> None: + """Should extract default parameter keys without error.""" params = MagicMock() params.to_dict.return_value = { "model_name": "test_model", @@ -32,89 +34,99 @@ def test_logs_default_param_keys(self, mock_mlflow: MagicMock) -> None: "extra_param": "ignored", } + # Should not raise log_params(params=params) - mock_mlflow.log_params.assert_called_once() - logged_params = mock_mlflow.log_params.call_args[0][0] - assert logged_params == { - "model_name": "test_model", - "transfer": False, - "n_train_data": 50, - "n_test_data": 10, - "n_steps": 100, - "batch_size": 512, - } - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_custom_param_keys(self, mock_mlflow: MagicMock) -> None: - """Should log only the specified parameter keys.""" + def test_logs_custom_param_keys(self) -> None: + """Should accept custom parameter keys.""" params = MagicMock() params.to_dict.return_value = { "model_name": "test_model", "batch_size": 256, } + # Should not raise log_params(params=params, param_keys=["model_name", "batch_size"]) - logged_params = mock_mlflow.log_params.call_args[0][0] - assert logged_params == {"model_name": "test_model", "batch_size": 256} - class TestLogTrainGoals: """Tests for the log_train_goals function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_goals_as_text(self, mock_mlflow: MagicMock) -> None: - """Should log training goals joined by newlines.""" - goals = ["goal1", "goal2", "goal3"] + def test_logs_goals(self) -> None: + """Should log training goals without error.""" + log_train_goals(train_goals=["goal1", "goal2", "goal3"]) - log_train_goals(train_goals=goals) - - mock_mlflow.log_text.assert_called_once() - logged_text = mock_mlflow.log_text.call_args[0][0] - assert logged_text == "goal1\ngoal2\ngoal3" - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_empty_goals(self, mock_mlflow: MagicMock) -> None: + def test_logs_empty_goals(self) -> None: """Should handle empty goals list.""" log_train_goals(train_goals=[]) - mock_mlflow.log_text.assert_called_once() - logged_text = mock_mlflow.log_text.call_args[0][0] - assert logged_text == "" - class TestLogLoss: """Tests for the log_loss function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_loss_metric(self, mock_mlflow: MagicMock) -> None: - """Should log loss as an MLflow metric.""" + def test_logs_loss(self) -> None: + """Should log loss without error.""" log_loss(step=5, loss=0.123) - mock_mlflow.log_metric.assert_called_once_with("loss", 0.123, step=5, synchronous=False) - - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_loss_synchronously(self, mock_mlflow: MagicMock) -> None: - """Should support synchronous logging.""" - log_loss(step=1, loss=0.5, synchronous=True) - - mock_mlflow.log_metric.assert_called_once_with("loss", 0.5, step=1, synchronous=True) - class TestLogTableSummary: """Tests for the log_table_summary function.""" - @patch("pyrit.auxiliary_attacks.gcg.experiments.log.mlflow") - def test_logs_table_with_correct_data(self, mock_mlflow: MagicMock) -> None: - """Should log a table with step numbers, losses, and controls.""" - losses = [0.5, 0.3, 0.1] - controls = ["ctrl1", "ctrl2", "ctrl3"] - - log_table_summary(losses=losses, controls=controls, n_steps=3) - - mock_mlflow.log_table.assert_called_once() - logged_data = mock_mlflow.log_table.call_args[0][0] - assert logged_data["step"] == [1, 2, 3] - assert logged_data["loss"] == [0.5, 0.3, 0.1] - assert logged_data["control"] == ["ctrl1", "ctrl2", "ctrl3"] + def test_logs_table_summary(self) -> None: + """Should log summary without error.""" + log_table_summary(losses=[0.5, 0.3, 0.1], controls=["ctrl1", "ctrl2", "ctrl3"], n_steps=3) + + def test_logs_empty_summary(self) -> None: + """Should handle empty losses and controls.""" + log_table_summary(losses=[], controls=[], n_steps=0) + + +class TestGpuMemoryLogging: + """Tests for GPU memory query and logging. + + Lives here (not test_lifecycle.py) so the tests don't transitively + depend on the GCG `train` module (which requires `ml_collections`, + only installed with the `gcg` extra). The log module itself only + uses stdlib imports, so these tests run in any CI environment. + """ + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_parses_nvidia_smi(self, mock_sp: MagicMock) -> None: + """Should parse nvidia-smi output into a dict of GPU -> free memory.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 8000, "gpu2_free_memory": 16000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_single_gpu(self, mock_sp: MagicMock) -> None: + """Should handle single GPU output.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n24000 MiB\n" + result = get_gpu_memory() + assert result == {"gpu1_free_memory": 24000} + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_log_gpu_memory_logs_via_logging(self, mock_sp: MagicMock) -> None: + """Should log GPU memory info without error on the success path.""" + mock_sp.check_output.return_value = b"memory.free [MiB]\n8000 MiB\n16000 MiB\n" + # Should not raise + log_gpu_memory(step=5) + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_log_gpu_memory_swallows_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: + """Should swallow exceptions when nvidia-smi is not available. + + Covers the except branch of `log_gpu_memory` -- callers (like the + train loop) should never crash because the runtime happens not to + have nvidia-smi. + """ + mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") + # Must not raise + log_gpu_memory(step=5) + + @patch("pyrit.auxiliary_attacks.gcg.experiments.log.sp") + def test_get_gpu_memory_handles_nvidia_smi_failure(self, mock_sp: MagicMock) -> None: + """`get_gpu_memory` itself should propagate the exception (only + `log_gpu_memory` is expected to swallow it).""" + mock_sp.check_output.side_effect = subprocess.CalledProcessError(1, "nvidia-smi") + with pytest.raises(subprocess.CalledProcessError): + get_gpu_memory() diff --git a/uv.lock b/uv.lock index bff1fbd05b..5bc96d73ff 100644 --- a/uv.lock +++ b/uv.lock @@ -448,7 +448,7 @@ wheels = [ [[package]] name = "azure-ai-ml" -version = "1.31.0" +version = "1.32.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-common" }, @@ -469,9 +469,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/74/472494706149d6b63dd22b1389d6b3756f586d98b1c074f120d971e2180f/azure_ai_ml-1.31.0.tar.gz", hash = "sha256:ba3f00df768292e197c47a03eb8eeb36d70bc865fca3fcf67738e0f124cc0d1c", size = 9456188, upload-time = "2025-12-30T18:31:00.529Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/2c/3087f02752886fea59c5f0453a0647b6f7970db8571d581a45a746b21df2/azure_ai_ml-1.32.0.tar.gz", hash = "sha256:edb90219520412132fac100a31a40264a2145150dc4a60fb884acdacc8ded7d5", size = 9442010, upload-time = "2026-03-16T17:04:34.915Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/bf/5b3df818fc6993f9b54bf88c21a462076e8c11ca7052556181e804de8e2e/azure_ai_ml-1.31.0-py3-none-any.whl", hash = "sha256:e2eb928660b10245349dfd1b8e92a598bd0df8af492d48edfcaf0b2bd621702d", size = 13186666, upload-time = "2025-12-30T18:31:03.833Z" }, + { url = "https://files.pythonhosted.org/packages/a9/cd/89930cb0ee2101ca67c250eaf302b61a86a0e6dd90fa8d7e89ed6cb5644d/azure_ai_ml-1.32.0-py3-none-any.whl", hash = "sha256:710cce6f706b64a65860218edc757ef1b9699ba9b284defe42926a9dc6116e15", size = 13169553, upload-time = "2026-03-16T17:04:38.897Z" }, ] [[package]] @@ -638,27 +638,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/41/f03fca7144b4a20419cdec03feb2020d3a44fe815de9ee52f089dc59ba95/azure_storage_file_share-12.23.1-py3-none-any.whl", hash = "sha256:10f83e82e68fb071bed3302614e35027ac9e03b63d0d850b81671e08936b0134", size = 307616, upload-time = "2025-10-29T13:39:46.896Z" }, ] -[[package]] -name = "azureml-mlflow" -version = "1.60.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "azure-common" }, - { name = "azure-core" }, - { name = "azure-identity" }, - { name = "azure-mgmt-core" }, - { name = "azure-storage-blob" }, - { name = "cryptography" }, - { name = "jsonpickle" }, - { name = "mlflow-skinny" }, - { name = "msrest" }, - { name = "python-dateutil" }, - { name = "pytz" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/cb/b1f1ac36949958a15707f0d0f361e0a610523dc44cf82e64a404acb9797c/azureml_mlflow-1.60.0-py3-none-any.whl", hash = "sha256:9074fa389cf24f16f3aff7d7cda62a658c93b65a4aecc3dd50a5f1e45909687f", size = 1020403, upload-time = "2025-04-11T20:16:27.116Z" }, -] - [[package]] name = "babel" version = "2.17.0" @@ -774,15 +753,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/de/acae8e9f9a1f4bb393d41c8265898b0f29772e38eac14e9f69d191e2c006/blis-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:9e5fdf4211b1972400f8ff6dafe87cb689c5d84f046b4a76b207c0bd2270faaf", size = 6324695, upload-time = "2025-11-17T12:28:28.401Z" }, ] -[[package]] -name = "cachetools" -version = "6.2.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bc/1d/ede8680603f6016887c062a2cf4fc8fdba905866a3ab8831aa8aa651320c/cachetools-6.2.4.tar.gz", hash = "sha256:82c5c05585e70b6ba2d3ae09ea60b79548872185d2f24ae1f2709d37299fd607", size = 31731, upload-time = "2025-12-15T18:24:53.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/fc/1d7b80d0eb7b714984ce40efc78859c022cd930e402f599d8ca9e39c78a4/cachetools-6.2.4-py3-none-any.whl", hash = "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", size = 11551, upload-time = "2025-12-15T18:24:52.332Z" }, -] - [[package]] name = "catalogue" version = "2.0.10" @@ -1048,15 +1018,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/8a/c4bb04426d608be4a3171efa2e233d2c59a5c8937850c10d098e126df18e/cloudpathlib-0.23.0-py3-none-any.whl", hash = "sha256:8520b3b01468fee77de37ab5d50b1b524ea6b4a8731c35d1b7407ac0cd716002", size = 62755, upload-time = "2025-10-07T22:47:54.905Z" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -1508,20 +1469,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/66/150e406a2db5535533aa3c946de58f0371f2e412e23f050c704588023e6e/cymem-2.0.13-cp314-cp314t-win_arm64.whl", hash = "sha256:e9027764dc5f1999fb4b4cabee1d0322c59e330c0a6485b436a68275f614277f", size = 39715, upload-time = "2025-11-14T14:58:24.773Z" }, ] -[[package]] -name = "databricks-sdk" -version = "0.76.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "google-auth" }, - { name = "protobuf" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/82/5efcfdca8779c84b5c6f61cc110d0938c9818e422f55c36a68d96b98c61f/databricks_sdk-0.76.0.tar.gz", hash = "sha256:fcfce4561b090b3c8e9cac2101f549766d9fb3bece31bb5720571919fa37d210", size = 822376, upload-time = "2025-12-17T17:11:31.907Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/96/ee7742b94f996560c57d6fb8d2e10eab3c489e8a72187369ed0917baf8aa/databricks_sdk-0.76.0-py3-none-any.whl", hash = "sha256:6696dda22bc52c8f50a50d24e6ccd1c855f92c0f68f5afe4eb2e77d5b1b1a65f", size = 774688, upload-time = "2025-12-17T17:11:29.925Z" }, -] - [[package]] name = "datasets" version = "4.8.5" @@ -1638,20 +1585,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] -[[package]] -name = "docker" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "requests" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, -] - [[package]] name = "ecoji" version = "0.1.1" @@ -1757,19 +1690,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" }, ] -[[package]] -name = "flask-cors" -version = "6.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flask" }, - { name = "werkzeug" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/74/0fc0fa68d62f21daef41017dafab19ef4b36551521260987eb3a5394c7ba/flask_cors-6.0.2.tar.gz", hash = "sha256:6e118f3698249ae33e429760db98ce032a8bf9913638d085ca0f4c5534ad2423", size = 13472, upload-time = "2025-12-12T20:31:42.861Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/af/72ad54402e599152de6d067324c46fe6a4f531c7c65baf7e96c63db55eaf/flask_cors-6.0.2-py3-none-any.whl", hash = "sha256:e57544d415dfd7da89a9564e1e3a9e515042df76e12130641ca6f3f2f03b699a", size = 13257, upload-time = "2025-12-12T20:31:41.3Z" }, -] - [[package]] name = "fonttools" version = "4.61.1" @@ -1971,80 +1891,6 @@ http = [ { name = "aiohttp" }, ] -[[package]] -name = "gitdb" -version = "4.0.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "smmap" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, -] - -[[package]] -name = "gitpython" -version = "3.1.50" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gitdb" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/33/f6/354ae6491228b5eb40e10d89c4d13c651fe1cf7556e35ebdded50cff57ce/gitpython-3.1.50.tar.gz", hash = "sha256:80da2d12504d52e1f998772dc5baf6e553f8d2fcfe1fcc226c9d9a2ee3372dcc", size = 219798, upload-time = "2026-05-06T04:01:26.571Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" }, -] - -[[package]] -name = "google-auth" -version = "2.48.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0c/41/242044323fbd746615884b1c16639749e73665b718209946ebad7ba8a813/google_auth-2.48.0.tar.gz", hash = "sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce", size = 326522, upload-time = "2026-01-26T19:22:47.157Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" }, -] - -[[package]] -name = "graphene" -version = "3.4.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "graphql-core" }, - { name = "graphql-relay" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cc/f6/bf62ff950c317ed03e77f3f6ddd7e34aaa98fe89d79ebd660c55343d8054/graphene-3.4.3.tar.gz", hash = "sha256:2a3786948ce75fe7e078443d37f609cbe5bb36ad8d6b828740ad3b95ed1a0aaa", size = 44739, upload-time = "2024-11-09T20:44:25.757Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/66/e0/61d8e98007182e6b2aca7cf65904721fb2e4bce0192272ab9cb6f69d8812/graphene-3.4.3-py2.py3-none-any.whl", hash = "sha256:820db6289754c181007a150db1f7fff544b94142b556d12e3ebc777a7bf36c71", size = 114894, upload-time = "2024-11-09T20:44:23.851Z" }, -] - -[[package]] -name = "graphql-core" -version = "3.2.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ac/9b/037a640a2983b09aed4a823f9cf1729e6d780b0671f854efa4727a7affbe/graphql_core-3.2.7.tar.gz", hash = "sha256:27b6904bdd3b43f2a0556dad5d579bdfdeab1f38e8e8788e555bdcb586a6f62c", size = 513484, upload-time = "2025-11-01T22:30:40.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/14/933037032608787fb92e365883ad6a741c235e0ff992865ec5d904a38f1e/graphql_core-3.2.7-py3-none-any.whl", hash = "sha256:17fc8f3ca4a42913d8e24d9ac9f08deddf0a0b2483076575757f6c412ead2ec0", size = 207262, upload-time = "2025-11-01T22:30:38.912Z" }, -] - -[[package]] -name = "graphql-relay" -version = "3.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "graphql-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/13/98fbf8d67552f102488ffc16c6f559ce71ea15f6294728d33928ab5ff14d/graphql-relay-3.2.0.tar.gz", hash = "sha256:1ff1c51298356e481a0be009ccdff249832ce53f30559c1338f22a0e0d17250c", size = 50027, upload-time = "2022-04-16T11:03:45.447Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/74/16/a4cf06adbc711bd364a73ce043b0b08d8fa5aae3df11b6ee4248bcdad2e0/graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5", size = 16940, upload-time = "2022-04-16T11:03:43.895Z" }, -] - [[package]] name = "greenlet" version = "3.3.0" @@ -2135,18 +1981,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/51/c936033e16d12b627ea334aaaaf42229c37620d0f15593456ab69ab48161/griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f", size = 142004, upload-time = "2026-02-09T19:09:40.561Z" }, ] -[[package]] -name = "gunicorn" -version = "23.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -2286,15 +2120,6 @@ http2 = [ { name = "h2" }, ] -[[package]] -name = "huey" -version = "2.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/29/3428d52eb8e85025e264a291641a9f9d6407cc1e51d1b630f6ac5815999a/huey-2.6.0.tar.gz", hash = "sha256:8d11f8688999d65266af1425b831f6e3773e99415027177b8734b0ffd5e251f6", size = 221068, upload-time = "2026-01-06T03:01:02.055Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/34/fae9ac8f1c3a552fd3f7ff652b94c78d219dedc5fce0c0a4232457760a00/huey-2.6.0-py3-none-any.whl", hash = "sha256:1b9df9d370b49c6d5721ba8a01ac9a787cf86b3bdc584e4679de27b920395c3f", size = 76951, upload-time = "2026-01-06T03:01:00.808Z" }, -] - [[package]] name = "huggingface-hub" version = "1.13.0" @@ -2628,15 +2453,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, ] -[[package]] -name = "joblib" -version = "1.5.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, -] - [[package]] name = "json5" version = "0.13.0" @@ -2646,15 +2462,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/9e/038522f50ceb7e74f1f991bf1b699f24b0c2bbe7c390dd36ad69f4582258/json5-0.13.0-py3-none-any.whl", hash = "sha256:9a08e1dd65f6a4d4c6fa82d216cf2477349ec2346a38fd70cc11d2557499fbcc", size = 36163, upload-time = "2026-01-01T19:42:13.962Z" }, ] -[[package]] -name = "jsonpickle" -version = "4.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/a6/d07afcfdef402900229bcca795f80506b207af13a838d4d99ad45abf530c/jsonpickle-4.1.1.tar.gz", hash = "sha256:f86e18f13e2b96c1c1eede0b7b90095bbb61d99fedc14813c44dc2f361dbbae1", size = 316885, upload-time = "2025-06-02T20:36:11.57Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/73/04df8a6fa66d43a9fd45c30f283cc4afff17da671886e451d52af60bdc7e/jsonpickle-4.1.1-py3-none-any.whl", hash = "sha256:bb141da6057898aa2438ff268362b126826c812a1721e31cf08a6e142910dc91", size = 47125, upload-time = "2025-06-02T20:36:08.647Z" }, -] - [[package]] name = "jsonpointer" version = "3.0.0" @@ -3430,90 +3237,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/8a/18d4ff2c7bd83f30d6924bd4ad97abf418488c3f908dea228d6f0961ad68/ml_collections-1.1.0-py3-none-any.whl", hash = "sha256:23b6fa4772aac1ae745a96044b925a5746145a70734f087eaca6626e92c05cbc", size = 76707, upload-time = "2025-04-17T08:24:59.038Z" }, ] -[[package]] -name = "mlflow" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "alembic" }, - { name = "cryptography" }, - { name = "docker" }, - { name = "flask" }, - { name = "flask-cors" }, - { name = "graphene" }, - { name = "gunicorn", marker = "sys_platform != 'win32'" }, - { name = "huey" }, - { name = "matplotlib" }, - { name = "mlflow-skinny" }, - { name = "mlflow-tracing" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "pandas" }, - { name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "skops" }, - { name = "sqlalchemy" }, - { name = "waitress", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/34/e328c073cd32c186fb242a957e5bade82433c06bc45b7d1695bf4d02f166/mlflow-3.11.1.tar.gz", hash = "sha256:84e54c4be91b5b2a19039a2673fe688b1d7307ceddacc08af51f8df05b19ee56", size = 9797469, upload-time = "2026-04-07T14:26:58.463Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/62/96826c340354638dfedcbdbcd35d67754566bd45f6592300e0c215c80e30/mlflow-3.11.1-py3-none-any.whl", hash = "sha256:8f6bf1238ac04f97664c229dd480380c5c254a78bdb3c0e433e3a0397508b1af", size = 10479141, upload-time = "2026-04-07T14:26:55.709Z" }, -] - -[[package]] -name = "mlflow-skinny" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "click" }, - { name = "cloudpickle" }, - { name = "databricks-sdk" }, - { name = "fastapi" }, - { name = "gitpython" }, - { name = "importlib-metadata" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "packaging" }, - { name = "protobuf" }, - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sqlparse" }, - { name = "typing-extensions" }, - { name = "uvicorn" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/40/77/fe2027ddad9e52ed1ac360fbc262169e6366f6678632e350cbd0d901bb9b/mlflow_skinny-3.11.1.tar.gz", hash = "sha256:86ce63491349f6713afc8a4ef0bf77a8314d0e79e03753cb150d6c860a0b0475", size = 2642799, upload-time = "2026-04-07T14:26:43.818Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/a7/e61ec397b34dc3c9e91572f45e41617f429d5c524d38a4e1aa2316ee1b5e/mlflow_skinny-3.11.1-py3-none-any.whl", hash = "sha256:82ffd5f6980320b4ac19f741e7a754faa1d01707e632b002ea68e04fd25a0535", size = 3171551, upload-time = "2026-04-07T14:26:41.762Z" }, -] - -[[package]] -name = "mlflow-tracing" -version = "3.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "databricks-sdk" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "packaging" }, - { name = "protobuf" }, - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1b/77/73af163432f3c66e2d213045250972e504a6683c76f63dd1abfba441a16a/mlflow_tracing-3.11.1.tar.gz", hash = "sha256:cb63cee16385d081467ec5bee4807fe1af59ddfdf04be4c79e7a7813b1002193", size = 1314550, upload-time = "2026-04-07T14:26:32.785Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/ab/d980c84e7df4224ab8db2457afbe135b430f371ca081a37cf89f8ef18ca1/mlflow_tracing-3.11.1-py3-none-any.whl", hash = "sha256:fa82df64dacf8293b714ae666440fe7c1902c6470c024df389bb91e9de3106d9", size = 1575790, upload-time = "2026-04-07T14:26:30.804Z" }, -] - [[package]] name = "mock-alchemy" version = "0.2.6" @@ -4482,18 +4205,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/98/c637d9e5cab1355d6765de2304199a1d79a43aa94c33d8eddb475327d81a/opentelemetry_instrumentation_wsgi-0.60b1-py3-none-any.whl", hash = "sha256:5e7b432778ebf5a39af136227884a6ab2efc3c4e73e2dbb1d05ecf03ea196705", size = 14583, upload-time = "2025-12-11T13:36:33.164Z" }, ] -[[package]] -name = "opentelemetry-proto" -version = "1.39.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, -] - [[package]] name = "opentelemetry-resource-detector-azure" version = "0.1.5" @@ -4863,18 +4574,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/0d/431bb85252119f5d2260417fa7d164619b31eed8f1725b364dc0ade43a8e/preshed-3.0.12-cp314-cp314t-win_arm64.whl", hash = "sha256:c0c0d3b66b4c1e40aa6042721492f7b07fc9679ab6c361bc121aa54a1c3ef63f", size = 114839, upload-time = "2025-11-17T13:00:19.513Z" }, ] -[[package]] -name = "prettytable" -version = "3.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wcwidth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/79/45/b0847d88d6cfeb4413566738c8bbf1e1995fad3d42515327ff32cc1eb578/prettytable-3.17.0.tar.gz", hash = "sha256:59f2590776527f3c9e8cf9fe7b66dd215837cca96a9c39567414cbc632e8ddb0", size = 67892, upload-time = "2025-11-14T17:33:20.212Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/8c/83087ebc47ab0396ce092363001fa37c17153119ee282700c0713a195853/prettytable-3.17.0-py3-none-any.whl", hash = "sha256:aad69b294ddbe3e1f95ef8886a060ed1666a0b83018bbf56295f6f226c43d287", size = 34433, upload-time = "2025-11-14T17:33:19.093Z" }, -] - [[package]] name = "prometheus-client" version = "0.23.1" @@ -5010,21 +4709,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] -[[package]] -name = "protobuf" -version = "6.33.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/25/7c72c307aafc96fa87062aa6291d9f7c94836e43214d43722e86037aac02/protobuf-6.33.5.tar.gz", hash = "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", size = 444465, upload-time = "2026-01-29T21:51:33.494Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/79/af92d0a8369732b027e6d6084251dd8e782c685c72da161bd4a2e00fbabb/protobuf-6.33.5-cp310-abi3-win32.whl", hash = "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b", size = 425769, upload-time = "2026-01-29T21:51:21.751Z" }, - { url = "https://files.pythonhosted.org/packages/55/75/bb9bc917d10e9ee13dee8607eb9ab963b7cf8be607c46e7862c748aa2af7/protobuf-6.33.5-cp310-abi3-win_amd64.whl", hash = "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", size = 437118, upload-time = "2026-01-29T21:51:24.022Z" }, - { url = "https://files.pythonhosted.org/packages/a2/6b/e48dfc1191bc5b52950246275bf4089773e91cb5ba3592621723cdddca62/protobuf-6.33.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", size = 427766, upload-time = "2026-01-29T21:51:25.413Z" }, - { url = "https://files.pythonhosted.org/packages/4e/b1/c79468184310de09d75095ed1314b839eb2f72df71097db9d1404a1b2717/protobuf-6.33.5-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", size = 324638, upload-time = "2026-01-29T21:51:26.423Z" }, - { url = "https://files.pythonhosted.org/packages/c5/f5/65d838092fd01c44d16037953fd4c2cc851e783de9b8f02b27ec4ffd906f/protobuf-6.33.5-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", size = 339411, upload-time = "2026-01-29T21:51:27.446Z" }, - { url = "https://files.pythonhosted.org/packages/9b/53/a9443aa3ca9ba8724fdfa02dd1887c1bcd8e89556b715cfbacca6b63dbec/protobuf-6.33.5-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", size = 323465, upload-time = "2026-01-29T21:51:28.925Z" }, - { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, -] - [[package]] name = "psutil" version = "7.2.1" @@ -5187,27 +4871,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" }, ] -[[package]] -name = "pyasn1" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, -] - -[[package]] -name = "pyasn1-modules" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, -] - [[package]] name = "pycparser" version = "2.23" @@ -5538,12 +5201,10 @@ all = [ { name = "av" }, { name = "azure-ai-ml" }, { name = "azure-cognitiveservices-speech" }, - { name = "azureml-mlflow" }, { name = "flask" }, { name = "ipykernel" }, { name = "jupyter" }, { name = "ml-collections" }, - { name = "mlflow" }, { name = "ollama" }, { name = "opencv-python" }, { name = "playwright" }, @@ -5558,9 +5219,8 @@ fairness-bias = [ gcg = [ { name = "accelerate" }, { name = "azure-ai-ml" }, - { name = "azureml-mlflow" }, { name = "ml-collections" }, - { name = "mlflow" }, + { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "sentencepiece" }, { name = "torch" }, ] @@ -5625,15 +5285,13 @@ requires-dist = [ { name = "av", specifier = ">=14.0.0" }, { name = "av", marker = "extra == 'all'", specifier = ">=14.0.0" }, { name = "azure-ai-contentsafety", specifier = ">=1.0.0" }, - { name = "azure-ai-ml", marker = "extra == 'all'", specifier = ">=1.27.1" }, - { name = "azure-ai-ml", marker = "extra == 'gcg'", specifier = ">=1.27.1" }, + { name = "azure-ai-ml", marker = "extra == 'all'", specifier = ">=1.32.0" }, + { name = "azure-ai-ml", marker = "extra == 'gcg'", specifier = ">=1.32.0" }, { name = "azure-cognitiveservices-speech", marker = "extra == 'all'", specifier = ">=1.44.0" }, { name = "azure-cognitiveservices-speech", marker = "extra == 'speech'", specifier = ">=1.46.0" }, { name = "azure-core", specifier = ">=1.38.0" }, { name = "azure-identity", specifier = ">=1.19.0" }, { name = "azure-storage-blob", specifier = ">=12.19.0" }, - { name = "azureml-mlflow", marker = "extra == 'all'", specifier = ">=1.60.0" }, - { name = "azureml-mlflow", marker = "extra == 'gcg'", specifier = ">=1.60.0" }, { name = "base2048", specifier = ">=0.1.3" }, { name = "colorama", specifier = ">=0.4.6" }, { name = "confusable-homoglyphs", specifier = ">=3.3.1" }, @@ -5649,8 +5307,6 @@ requires-dist = [ { name = "jupyter", marker = "extra == 'all'", specifier = ">=1.1.1" }, { name = "ml-collections", marker = "extra == 'all'", specifier = ">=1.1.0" }, { name = "ml-collections", marker = "extra == 'gcg'", specifier = ">=1.1.0" }, - { name = "mlflow", marker = "extra == 'all'", specifier = ">=3.11.1" }, - { name = "mlflow", marker = "extra == 'gcg'", specifier = ">=3.11.1" }, { name = "numpy", marker = "python_full_version < '3.14'", specifier = ">=1.26.0" }, { name = "numpy", marker = "python_full_version >= '3.14'", specifier = ">=2.3.0" }, { name = "ollama", marker = "extra == 'all'", specifier = ">=0.5.1" }, @@ -5662,6 +5318,7 @@ requires-dist = [ { name = "pillow", specifier = ">=12.2.0" }, { name = "playwright", marker = "extra == 'all'", specifier = ">=1.49.0" }, { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.49.0" }, + { name = "pyarrow", marker = "python_full_version >= '3.14' and extra == 'gcg'", specifier = ">=22.0.0" }, { name = "pydantic", specifier = ">=2.11.5" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.8.0" }, { name = "pyodbc", specifier = ">=5.1.0" }, @@ -5849,28 +5506,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] -[[package]] -name = "pywin32" -version = "311" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/40/44efbb0dfbd33aca6a6483191dae0716070ed99e2ecb0c53683f400a0b4f/pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3", size = 8760432, upload-time = "2025-07-14T20:13:05.9Z" }, - { url = "https://files.pythonhosted.org/packages/5e/bf/360243b1e953bd254a82f12653974be395ba880e7ec23e3731d9f73921cc/pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b", size = 9590103, upload-time = "2025-07-14T20:13:07.698Z" }, - { url = "https://files.pythonhosted.org/packages/57/38/d290720e6f138086fb3d5ffe0b6caa019a791dd57866940c82e4eeaf2012/pywin32-311-cp310-cp310-win_arm64.whl", hash = "sha256:0502d1facf1fed4839a9a51ccbcc63d952cf318f78ffc00a7e78528ac27d7a2b", size = 8778557, upload-time = "2025-07-14T20:13:11.11Z" }, - { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, - { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, - { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, - { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, - { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, - { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, - { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, - { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, - { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, - { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, -] - [[package]] name = "pywinpty" version = "3.0.2" @@ -6395,18 +6030,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] -[[package]] -name = "rsa" -version = "4.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, -] - [[package]] name = "ruff" version = "0.14.10" @@ -6459,117 +6082,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/5b/632a58724221ef03d78ab65062e82a1010e1bef8e8e0b9d7c6d7b8044841/safetensors-0.7.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:473b32699f4200e69801bf5abf93f1a4ecd432a70984df164fc22ccf39c4a6f3", size = 531885, upload-time = "2025-11-19T15:18:27.146Z" }, ] -[[package]] -name = "scikit-learn" -version = "1.7.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.11' and sys_platform == 'darwin'", - "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "joblib", marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, - { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, - { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, - { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, - { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, - { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, - { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, - { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, - { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, - { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, - { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, - { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, - { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, - { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, - { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, - { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, - { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, - { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, - { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, - { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, - { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, - { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, - { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, - { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, - { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, - { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, - { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, - { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, - { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, - { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, -] - -[[package]] -name = "scikit-learn" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform == 'darwin'", - "python_full_version >= '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and python_full_version < '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "joblib", marker = "python_full_version >= '3.11'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/92/53ea2181da8ac6bf27170191028aee7251f8f841f8d3edbfdcaf2008fde9/scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da", size = 8595835, upload-time = "2025-12-10T07:07:39.385Z" }, - { url = "https://files.pythonhosted.org/packages/01/18/d154dc1638803adf987910cdd07097d9c526663a55666a97c124d09fb96a/scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1", size = 8080381, upload-time = "2025-12-10T07:07:41.93Z" }, - { url = "https://files.pythonhosted.org/packages/8a/44/226142fcb7b7101e64fdee5f49dbe6288d4c7af8abf593237b70fca080a4/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b", size = 8799632, upload-time = "2025-12-10T07:07:43.899Z" }, - { url = "https://files.pythonhosted.org/packages/36/4d/4a67f30778a45d542bbea5db2dbfa1e9e100bf9ba64aefe34215ba9f11f6/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1", size = 9103788, upload-time = "2025-12-10T07:07:45.982Z" }, - { url = "https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b", size = 8081706, upload-time = "2025-12-10T07:07:48.111Z" }, - { url = "https://files.pythonhosted.org/packages/3d/46/5416595bb395757f754feb20c3d776553a386b661658fb21b7c814e89efe/scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961", size = 7688451, upload-time = "2025-12-10T07:07:49.873Z" }, - { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" }, - { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" }, - { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" }, - { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" }, - { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" }, - { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" }, - { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" }, - { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" }, - { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" }, - { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" }, - { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" }, - { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" }, - { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" }, - { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" }, - { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" }, - { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" }, - { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" }, - { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" }, - { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" }, - { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" }, - { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" }, - { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" }, - { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" }, - { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" }, - { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" }, - { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" }, - { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" }, - { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" }, -] - [[package]] name = "scipy" version = "1.15.3" @@ -6823,25 +6335,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] -[[package]] -name = "skops" -version = "0.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, - { name = "packaging" }, - { name = "prettytable" }, - { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/0c/5ec987633e077dd0076178ea6ade2d6e57780b34afea0b497fb507d7a1ed/skops-0.13.0.tar.gz", hash = "sha256:66949fd3c95cbb5c80270fbe40293c0fe1e46cb4a921860e42584dd9c20ebeb1", size = 581312, upload-time = "2025-08-06T09:48:14.916Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/e8/6a2b2030f0689f894432b9c2f0357f2f3286b2a00474827e04b8fe9eea13/skops-0.13.0-py3-none-any.whl", hash = "sha256:55e2cccb18c86f5916e4cfe5acf55ed7b0eecddf08a151906414c092fa5926dc", size = 131200, upload-time = "2025-08-06T09:48:13.356Z" }, -] - [[package]] name = "smart-open" version = "7.5.0" @@ -6854,15 +6347,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, ] -[[package]] -name = "smmap" -version = "5.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" }, -] - [[package]] name = "sniffio" version = "1.3.1" @@ -7017,15 +6501,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" }, ] -[[package]] -name = "sqlparse" -version = "0.5.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/90/76/437d71068094df0726366574cf3432a4ed754217b436eb7429415cf2d480/sqlparse-0.5.5.tar.gz", hash = "sha256:e20d4a9b0b8585fdf63b10d30066c7c94c5d7a7ec47c889a2d83a3caa93ff28e", size = 120815, upload-time = "2025-12-19T07:17:45.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/4b/359f28a903c13438ef59ebeee215fb25da53066db67b305c125f1c6d2a25/sqlparse-0.5.5-py3-none-any.whl", hash = "sha256:12a08b3bf3eec877c519589833aed092e2444e68240a3577e8e26148acc7b1ba", size = 46138, upload-time = "2025-12-19T07:17:46.573Z" }, -] - [[package]] name = "srsly" version = "2.5.3" @@ -7230,15 +6705,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9f/ef/1648fda54e9689058335ff54f650a7a314db2a42e21af1b83949b2dc748e/thinc-8.3.13-cp314-cp314-win_arm64.whl", hash = "sha256:11754fada9ad5ba2e02d5f3f234f940e24015b82333db58372f4a6aedad9b43f", size = 1667687, upload-time = "2026-03-23T07:22:34.967Z" }, ] -[[package]] -name = "threadpoolctl" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, -] - [[package]] name = "tinycss2" version = "1.4.0" @@ -7770,15 +7236,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258, upload-time = "2026-01-09T18:20:59.425Z" }, ] -[[package]] -name = "waitress" -version = "3.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bf/cb/04ddb054f45faa306a230769e868c28b8065ea196891f09004ebace5b184/waitress-3.0.2.tar.gz", hash = "sha256:682aaaf2af0c44ada4abfb70ded36393f0e307f4ab9456a215ce0020baefc31f", size = 179901, upload-time = "2024-11-16T20:02:35.195Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/57/a27182528c90ef38d82b636a11f606b0cbb0e17588ed205435f8affe3368/waitress-3.0.2-py3-none-any.whl", hash = "sha256:c56d67fd6e87c2ee598b76abdd4e96cfad1f24cacdea5078d382b1f9d7b5ed2e", size = 56232, upload-time = "2024-11-16T20:02:33.858Z" }, -] - [[package]] name = "wasabi" version = "1.1.3"