diff --git a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py index 460200d2e..6e7991ea3 100644 --- a/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py +++ b/pyrit/auxiliary_attacks/gcg/attack/base/attack_manager.py @@ -17,8 +17,6 @@ import torch import torch.multiprocessing as mp import torch.nn as nn -from fastchat.conversation import Conversation, SeparatorStyle -from fastchat.model import get_conversation_template from transformers import ( AutoModelForCausalLM, AutoTokenizer, @@ -133,7 +131,6 @@ def __init__( goal: str, target: str, tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, ) -> None: @@ -146,9 +143,9 @@ def __init__( target (str): The target of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a configured chat template + (i.e., ``tokenizer.chat_template`` is not ``None``); ``apply_chat_template`` is used + to render the user/assistant exchange instead of model-specific fastchat templates. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -160,11 +157,8 @@ def __init__( self.target = target self.control = control_init self.tokenizer = tokenizer - self.conv_template = conv_template self.test_prefixes = test_prefixes - self.conv_template.messages = [] - self.test_new_toks = len(self.tokenizer(self.target).input_ids) + 2 # buffer for prefix in self.test_prefixes: self.test_new_toks = max(self.test_new_toks, len(self.tokenizer(prefix).input_ids)) @@ -172,103 +166,68 @@ def __init__( self._update_ids() def _update_ids(self) -> None: - self.conv_template.append_message(self.conv_template.roles[0], f"{self.goal} {self.control}") - self.conv_template.append_message(self.conv_template.roles[1], f"{self.target}") - prompt = self.conv_template.get_prompt() + # Render the goal+control as the user turn and the target as the assistant turn using the + # tokenizer's built-in chat template. This replaces fastchat's per-model Conversation logic + # and works for any HuggingFace chat-tuned model (issue #965). + messages = [ + {"role": "user", "content": f"{self.goal} {self.control}"}, + {"role": "assistant", "content": f"{self.target}"}, + ] + prompt = self.tokenizer.apply_chat_template(messages, tokenize=False) encoding = self.tokenizer(prompt) toks = encoding.input_ids - if self.conv_template.name == "llama-2" or self.conv_template.name == "llama-3": - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks))) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks)) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 2) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 3) + # Locate goal/control/target substrings in the rendered prompt. + goal_start = prompt.find(self.goal) + control_start = prompt.find(self.control) + target_start = prompt.find(self.target) + if goal_start == -1 or control_start == -1 or target_start == -1: + raise ValueError( + "Could not locate goal/control/target in chat-templated prompt. " + f"prompt={prompt!r}, goal={self.goal!r}, " + f"control={self.control!r}, target={self.target!r}" + ) - else: - python_tokenizer = False or self.conv_template.name == "oasst_pythia" - try: - encoding.char_to_token(len(prompt) - 1) - except Exception: - python_tokenizer = True - if python_tokenizer: - # This is specific to the vicuna and pythia tokenizer and conversation prompt. - # It will not work with other tokenizers or prompts. - self.conv_template.messages = [] - - self.conv_template.append_message(self.conv_template.roles[0], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._user_role_slice = slice(None, len(toks)) - - self.conv_template.update_last_message(f"{self.goal}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._goal_slice = slice(self._user_role_slice.stop, max(self._user_role_slice.stop, len(toks) - 1)) - - separator = " " if self.goal else "" - self.conv_template.update_last_message(f"{self.goal}{separator}{self.control}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._control_slice = slice(self._goal_slice.stop, len(toks) - 1) - - self.conv_template.append_message(self.conv_template.roles[1], None) - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._assistant_role_slice = slice(self._control_slice.stop, len(toks)) - - self.conv_template.update_last_message(f"{self.target}") - toks = self.tokenizer(self.conv_template.get_prompt()).input_ids - self._target_slice = slice(self._assistant_role_slice.stop, len(toks) - 1) - self._loss_slice = slice(self._assistant_role_slice.stop - 1, len(toks) - 2) - else: - self._system_slice = slice(None, encoding.char_to_token(len(self.conv_template.system))) - self._user_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[0])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[0]) + len(self.conv_template.roles[0]) + 1 - ), - ) - self._goal_slice = slice( - encoding.char_to_token(prompt.find(self.goal)), - encoding.char_to_token(prompt.find(self.goal) + len(self.goal)), - ) - self._control_slice = slice( - encoding.char_to_token(prompt.find(self.control)), - encoding.char_to_token(prompt.find(self.control) + len(self.control)), - ) - self._assistant_role_slice = slice( - encoding.char_to_token(prompt.find(self.conv_template.roles[1])), - encoding.char_to_token( - prompt.find(self.conv_template.roles[1]) + len(self.conv_template.roles[1]) + 1 - ), - ) - self._target_slice = slice( - encoding.char_to_token(prompt.find(self.target)), - encoding.char_to_token(prompt.find(self.target) + len(self.target)), - ) - self._loss_slice = slice( - encoding.char_to_token(prompt.find(self.target)) - 1, - encoding.char_to_token(prompt.find(self.target) + len(self.target)) - 1, - ) + # ``char_to_token`` returns None when the character index has no + # corresponding token (e.g. when the substring ends exactly at the end + # of the prompt or lands on whitespace squashed into a neighbouring + # token). For end positions we clamp to ``len(toks)``; for start + # positions we walk forward to the next character that does map to a + # token. Both are necessary for the slice arithmetic to remain valid + # across tokenizers/templates. + def end_tok(char_pos: int) -> int: + tok = encoding.char_to_token(char_pos) + return len(toks) if tok is None else tok + + def start_tok(char_pos: int) -> int: + limit = len(prompt) + cur = char_pos + while cur < limit: + tok = encoding.char_to_token(cur) + if tok is not None: + return tok + cur += 1 + return len(toks) + + self._goal_slice = slice( + start_tok(goal_start), + end_tok(goal_start + len(self.goal)), + ) + self._control_slice = slice( + start_tok(control_start), + end_tok(control_start + len(self.control)), + ) + target_start_tok = start_tok(target_start) + target_end_tok = end_tok(target_start + len(self.target)) + self._target_slice = slice(target_start_tok, target_end_tok) + self._loss_slice = slice(target_start_tok - 1, target_end_tok - 1) + # Assistant role tokens are everything between the control end and the target start. + # This works for any chat template (e.g. llama-2 "[/INST]", phi-3 "<|assistant|>", etc.) + # without us needing to know the literal marker text. + self._assistant_role_slice = slice(self._control_slice.stop, self._target_slice.start) self.input_ids = torch.tensor(toks[: self._target_slice.stop], device="cpu") - self.conv_template.messages = [] @torch.no_grad() # type: ignore[misc, untyped-decorator, unused-ignore] def generate(self, model: Any, gen_config: Any = None) -> torch.Tensor: @@ -457,7 +416,6 @@ def __init__( goals: list[str], targets: list[str], tokenizer: Any, - conv_template: Conversation, control_init: str = "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !", test_prefixes: Optional[list[str]] = None, managers: Optional[dict[str, type[AttackPrompt]]] = None, @@ -471,9 +429,7 @@ def __init__( targets (List[str]): The list of targets of the attack tokenizer (Transformer Tokenizer): - The tokenizer used to convert text into tokens - conv_template (Template): - The conversation template used for the attack + The tokenizer used to convert text into tokens. Must have a chat template configured. control_init (str, optional): A string used to control the attack (default is "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !") test_prefixes (list, optional): @@ -491,8 +447,7 @@ def __init__( self.tokenizer = tokenizer self._prompts = [ - managers["AP"](goal, target, tokenizer, conv_template, control_init, test_prefixes) - for goal, target in zip(goals, targets) + managers["AP"](goal, target, tokenizer, control_init, test_prefixes) for goal, target in zip(goals, targets) ] self._nonascii_toks = get_nonascii_toks(tokenizer, device="cpu") @@ -634,9 +589,7 @@ def __init__( self.models = [worker.model for worker in workers] self.logfile = logfile self.prompts = [ - managers["PM"]( - goals, targets, worker.tokenizer, worker.conv_template, control_init, test_prefixes, managers - ) + managers["PM"](goals, targets, worker.tokenizer, control_init, test_prefixes, managers) for worker in workers ] self.managers = managers @@ -819,7 +772,6 @@ def test_all(self) -> tuple[list[list[bool]], list[list[int]], list[list[float]] self.goals + self.test_goals, self.targets + self.test_targets, worker.tokenizer, - worker.conv_template, self.control_str, self.test_prefixes, self.managers, @@ -997,7 +949,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1005,7 +957,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1236,7 +1188,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1244,7 +1196,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1450,7 +1402,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.workers ], @@ -1458,7 +1410,7 @@ def __init__( { "model_path": worker.model.name_or_path, "tokenizer_path": worker.tokenizer.name_or_path, - "conv_template": worker.conv_template.name, + "chat_template": worker.tokenizer.chat_template, } for worker in self.test_workers ], @@ -1585,7 +1537,6 @@ def __init__( token: str, model_kwargs: dict[str, Any], tokenizer: Any, - conv_template: Conversation, device: str, ) -> None: self.model = ( @@ -1596,7 +1547,6 @@ def __init__( .eval() ) self.tokenizer = tokenizer - self.conv_template = conv_template self.tasks: mp.JoinableQueue[Any] = mp.JoinableQueue() self.results: mp.JoinableQueue[Any] = mp.JoinableQueue() self.process: Optional[mp.Process] = None @@ -1667,45 +1617,23 @@ def get_workers(params: Any, eval: bool = False) -> tuple[list[ModelWorker], lis tokenizer.padding_side = "left" if not tokenizer.pad_token: tokenizer.pad_token = tokenizer.eos_token + if tokenizer.chat_template is None: + raise ValueError( + f"Tokenizer {params.tokenizer_paths[i]!r} has no chat_template configured. GCG uses " + "tokenizer.apply_chat_template() to render prompts (see issue #965); without a chat " + "template the attack cannot be set up. Pick a chat-tuned model or set " + "tokenizer.chat_template explicitly." + ) tokenizers.append(tokenizer) logger.info(f"Loaded {len(tokenizers)} tokenizers") - raw_conv_templates = [] - for template in params.conversation_templates: - if template in ["llama-2", "mistral", "llama-3-8b", "vicuna"]: - raw_conv_templates.append(get_conversation_template(template)) - elif template in ["phi-3-mini"]: - conv_template = Conversation( - name="phi-3-mini", - system_template="<|system|>\n{system_message}", - system_message="", - roles=("<|user|>", "<|assistant|>"), - sep_style=SeparatorStyle.CHATML, - sep="<|end|>", - stop_token_ids=[32000, 32001, 32007], - ) - raw_conv_templates.append(conv_template) - else: - raise ValueError("Conversation template not recognized") - - conv_templates = [] - for conv in raw_conv_templates: - if conv.name == "zero_shot": - conv.roles = tuple("### " + r for r in conv.roles) - conv.sep = "\n" - elif conv.name == "llama-2": - conv.sep2 = conv.sep2.strip() - conv_templates.append(conv) - - logger.info(f"Loaded {len(conv_templates)} conversation templates") workers = [ ModelWorker( params.model_paths[i], params.token, params.model_kwargs[i], tokenizers[i], - conv_templates[i], params.devices[i], ) for i in range(len(params.model_paths)) diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml index 10ef5504c..504fb3dd4 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_2.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml index 4eaf9c987..a8b60c392 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_llama_3.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml index 640f4c32f..31e562c22 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_mistral.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml index e3accd3c2..d3547152d 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_3_mini.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml new file mode 100644 index 000000000..d08043001 --- /dev/null +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_phi_4.yaml @@ -0,0 +1,29 @@ +transfer: False +target_weight: 1.0 +control_weight: 0.0 +progressive_goals: False +progressive_models: False +anneal: False +incr_control: False +stop_on_success: False +verbose: True +allow_non_ascii: False +num_train_models: 1 +result_prefix: "results/individual_phi4" +tokenizer_paths: ["microsoft/phi-4"] +tokenizer_kwargs: [{"use_fast": True}] +model_paths: ["microsoft/phi-4"] +model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] +devices: ["cuda:0"] +train_data: "" +test_data: "" +n_train_data: 50 +n_test_data: 0 +control_init: "! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !" +n_steps: 500 +test_steps: 50 +batch_size: 512 +learning_rate: 0.01 +topk: 256 +temp: 1 +filter_cand: True diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml index 3fc3537e4..286cb4a01 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/individual_vicuna.yaml @@ -14,7 +14,6 @@ tokenizer_paths: ["lmsys/vicuna-13b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-13b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] train_data: "" test_data: "" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml index f6f3ae3d5..351622dba 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_all_models.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruc tokenizer_kwargs: [{"use_fast": False}, {"use_fast": False}, {"use_fast": False}, {"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf", "mistralai/Mistral-7B-Instruct-v0.1", "meta-llama/Meta-Llama-3-8B-Instruct", "lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}, {"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2", "mistral", "llama-3-8b", "vicuna"] devices: ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml index d7200b177..fc3c82412 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_2.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Llama-2-7b-chat-hf"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Llama-2-7b-chat-hf"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-2"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml index 5e538824c..0b353b876 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_llama_3.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["meta-llama/Meta-Llama-3-8B-Instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["llama-3-8b"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml index c2f808c18..4c6438a6a 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_mistral.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["mistralai/Mistral-7B-Instruct-v0.1"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["mistral"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml index 679556c3e..35316c5d9 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_phi_3_mini.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["microsoft/Phi-3-mini-4k-instruct"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["microsoft/Phi-3-mini-4k-instruct"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["phi-3-mini"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml index 2bbfe0a96..3850c95a2 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml +++ b/pyrit/auxiliary_attacks/gcg/experiments/configs/transfer_vicuna.yaml @@ -6,5 +6,4 @@ tokenizer_paths: ["lmsys/vicuna-7b-v1.5"] tokenizer_kwargs: [{"use_fast": False}] model_paths: ["lmsys/vicuna-7b-v1.5"] model_kwargs: [{"low_cpu_mem_usage": True, "use_cache": False}] -conversation_templates: ["vicuna"] devices: ["cuda:0"] diff --git a/pyrit/auxiliary_attacks/gcg/experiments/run.py b/pyrit/auxiliary_attacks/gcg/experiments/run.py index 7558388e1..f78b6e220 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/run.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/run.py @@ -11,7 +11,7 @@ from pyrit.auxiliary_attacks.gcg.experiments.train import GreedyCoordinateGradientAdversarialSuffixGenerator from pyrit.setup.initialization import _load_environment_files -_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini"] +_MODEL_NAMES: list[str] = ["mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini", "phi_4"] _ALL_MODELS: str = "all_models" diff --git a/pyrit/auxiliary_attacks/gcg/experiments/train.py b/pyrit/auxiliary_attacks/gcg/experiments/train.py index 4adf633d7..d5815f7ba 100644 --- a/pyrit/auxiliary_attacks/gcg/experiments/train.py +++ b/pyrit/auxiliary_attacks/gcg/experiments/train.py @@ -41,7 +41,6 @@ def generate_suffix( tokenizer_paths: Optional[list[str]] = None, model_name: str = "", model_paths: Optional[list[str]] = None, - conversation_templates: Optional[list[str]] = None, result_prefix: str = "", train_data: str = "", control_init: str = _DEFAULT_CONTROL_INIT, @@ -81,7 +80,6 @@ def generate_suffix( tokenizer_paths (Optional[list[str]]): Paths to tokenizer models. model_name (str): Name identifier for the model. model_paths (Optional[list[str]]): Paths to model weights. - conversation_templates (Optional[list[str]]): Conversation template names. result_prefix (str): Prefix for result file paths. train_data (str): URL or path to training data CSV. control_init (str): Initial control string for optimization. @@ -117,8 +115,6 @@ def generate_suffix( tokenizer_paths = [] if model_paths is None: model_paths = [] - if conversation_templates is None: - conversation_templates = [] if devices is None: devices = ["cuda:0"] if model_kwargs is None: @@ -131,7 +127,6 @@ def generate_suffix( tokenizer_paths=tokenizer_paths, model_name=model_name, model_paths=model_paths, - conversation_templates=conversation_templates, result_prefix=result_prefix, train_data=train_data, control_init=control_init, diff --git a/pyrit/auxiliary_attacks/gcg/src/Dockerfile b/pyrit/auxiliary_attacks/gcg/src/Dockerfile index e820fbaaf..085d286b9 100644 --- a/pyrit/auxiliary_attacks/gcg/src/Dockerfile +++ b/pyrit/auxiliary_attacks/gcg/src/Dockerfile @@ -19,5 +19,4 @@ WORKDIR /app # Install PyRIT with GCG extras to get all dependencies COPY pyproject.toml MANIFEST.in README.md LICENSE /app/ COPY pyrit/ /app/pyrit/ -RUN uv pip install -e ".[gcg]" && \ - uv pip install "fschat @ git+https://github.com/lm-sys/FastChat.git@2c68a13bfe10b86f40e3eefc3fcfacb32c00b02a" +RUN uv pip install -e ".[gcg]" diff --git a/tests/integration/auxiliary_attacks/test_gcg_integration.py b/tests/integration/auxiliary_attacks/test_gcg_integration.py index 8474fd0f3..f2944bf48 100644 --- a/tests/integration/auxiliary_attacks/test_gcg_integration.py +++ b/tests/integration/auxiliary_attacks/test_gcg_integration.py @@ -5,9 +5,15 @@ These tests validate that the GCG attack pipeline works end-to-end with a real (tiny) model. They use GPT-2 (~124M params) which can run on CPU, paired with -the llama-2 conversation template (which has explicit handling in _update_ids). +explicit chat templates set on the tokenizer (since GPT-2 has no default +chat template). -Requires: torch, transformers, fastchat, mlflow (GCG optional deps). +After PR #965 dropped fastchat, ``AttackPrompt._update_ids`` uses +``tokenizer.apply_chat_template()`` exclusively, so we exercise that code path +with two distinct template shapes (llama-2 style and ChatML/phi-3 style) to +catch template-specific regressions. + +Requires: torch, transformers (GCG optional deps). Skipped via importorskip when deps are not installed. """ @@ -15,10 +21,8 @@ torch = pytest.importorskip("torch", reason="torch not installed") transformers = pytest.importorskip("transformers", reason="transformers not installed") -pytest.importorskip("fastchat", reason="fastchat not installed") -from fastchat.model import get_conversation_template # noqa: E402 from transformers import AutoTokenizer, GPT2LMHeadModel # noqa: E402 from pyrit.auxiliary_attacks.gcg.attack.base.attack_manager import ( # noqa: E402 @@ -33,6 +37,31 @@ token_gradients, ) +# Minimal Jinja chat templates that exercise the two structural variants we care about: +# (1) Inline role markers ("[INST]"/"[/INST]") used by llama-2. +# (2) Distinct role tokens ("<|user|>"/"<|assistant|>") used by phi-3 / ChatML. +# Both must produce findable goal/control/target substrings for the new +# apply_chat_template-based _update_ids to compute correct slices. +_LLAMA_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" +) + +_CHATML_STYLE_TEMPLATE = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "<|user|>\n{{ m['content'] }}<|end|>\n<|assistant|>\n" + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}<|end|>" + "{%- endif -%}" + "{%- endfor -%}" +) + @pytest.fixture(scope="module") def gpt2_model() -> GPT2LMHeadModel: @@ -40,33 +69,33 @@ def gpt2_model() -> GPT2LMHeadModel: return GPT2LMHeadModel.from_pretrained("gpt2").eval() -@pytest.fixture(scope="module") -def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: - """Load GPT-2 tokenizer once for all tests in this module.""" +def _make_tokenizer(chat_template: str) -> transformers.PreTrainedTokenizer: + """Build a fresh GPT-2 tokenizer with the given chat template attached.""" tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" + tokenizer.chat_template = chat_template return tokenizer @pytest.fixture() -def conv_template(): - """Create a fresh llama-2 conversation template for each test.""" - conv = get_conversation_template("llama-2") - conv.sep2 = conv.sep2.strip() - return conv +def gpt2_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a llama-2-style chat template attached.""" + return _make_tokenizer(_LLAMA_STYLE_TEMPLATE) @pytest.fixture() -def vicuna_conv_template(): - """Create a fresh vicuna conversation template for each test. - - Vicuna exercises the non-llama branch of `_update_ids` (the path that - references `conv_template.system` and uses `encoding.char_to_token`). - A bug in that branch — like the Phi-3 `.system` AttributeError we hit - on Azure (#965) — would never be caught by llama-2-only tests. +def gpt2_chatml_tokenizer() -> transformers.PreTrainedTokenizer: + """GPT-2 tokenizer with a ChatML / phi-3-style chat template attached. + + Exercises the second structural variant we care about: distinct role tokens + (``<|user|>``/``<|assistant|>``) versus llama-2's inline ``[INST]`` markers. + Before PR #965 this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes; after the apply_chat_template rewrite both + template shapes share a single code path. """ - return get_conversation_template("vicuna_v1.1") + return _make_tokenizer(_CHATML_STYLE_TEMPLATE) class TestTokenGradientsIntegration: @@ -102,20 +131,18 @@ def test_gradient_is_finite_and_nonzero( class TestGCGAttackPromptIntegration: - """Integration tests for GCGAttackPrompt with real GPT-2.""" + """Integration tests for GCGAttackPrompt with real GPT-2 + llama-style template.""" def test_prompt_initializes_with_valid_slices( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """AttackPrompt should initialize with non-empty, non-overlapping slices.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -128,14 +155,12 @@ def test_grad_returns_valid_gradient( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """GCGAttackPrompt.grad should return a finite, non-zero gradient tensor.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -150,14 +175,12 @@ def test_target_loss_is_finite_scalar( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Target loss from real model logits should be a finite positive number.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -174,14 +197,12 @@ def test_sample_control_produces_valid_candidates( self, gpt2_model: GPT2LMHeadModel, gpt2_tokenizer: transformers.PreTrainedTokenizer, - conv_template: object, ) -> None: """Sampled control tokens should be decodable by the tokenizer.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", tokenizer=gpt2_tokenizer, - conv_template=conv_template, control_init="! ! ! ! !", ) @@ -228,39 +249,29 @@ def test_get_nonascii_toks_returns_nonempty_tensor(self, gpt2_tokenizer: transfo assert len(toks) > 0 -class TestGCGAttackPromptNonLlamaTemplate: - """Integration tests covering the non-llama branch of `AttackPrompt._update_ids`. - - The llama-2/llama-3 path is well-exercised above. The `else` branch contains - distinct logic that touches `conv_template.system`, `char_to_token`, and - different slice arithmetic. A bug here — like the Phi-3 `conv_template.system` - AttributeError we hit on Azure (#965) — would only surface with a - non-llama template, so we exercise it explicitly with vicuna. +class TestGCGAttackPromptChatMLTemplate: + """Integration tests covering ChatML / phi-3 style templates. - Both tests are currently `xfail` because vicuna (and any other modern - fastchat template that lacks a `.system` attribute) reproduces the same - AttributeError as Phi-3 — a known bug tracked in #965 that PR replacing - fastchat with `tokenizer.apply_chat_template()` will fix. Once that lands, - the xfail will flip to "unexpectedly passed" and the marker can be removed. + These exercise the second structural variant of chat templates (distinct + role tokens like ``<|user|>``/``<|assistant|>`` separated from content, + versus llama-2's inline ``[INST]`` markers). Before PR #965 dropped + fastchat, this template shape blew up in ``_update_ids`` because the + fastchat-driven slice logic referenced ``conv_template.system`` and other + template-specific attributes (the same Phi-3 ``AttributeError`` we hit on + Azure ML). After the apply_chat_template rewrite both shapes share a single + code path, so these tests should pass alongside the llama-style ones above. """ - @pytest.mark.xfail( - reason="#965: fastchat templates without `.system` attribute crash _update_ids", - raises=AttributeError, - strict=True, - ) - def test_prompt_initializes_with_vicuna_template( + def test_prompt_initializes_with_chatml_template( self, gpt2_model: GPT2LMHeadModel, - gpt2_tokenizer: transformers.PreTrainedTokenizer, - vicuna_conv_template: object, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, ) -> None: - """GCGAttackPrompt should construct successfully with the vicuna template.""" + """GCGAttackPrompt should construct successfully with a ChatML template.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", - tokenizer=gpt2_tokenizer, - conv_template=vicuna_conv_template, + tokenizer=gpt2_chatml_tokenizer, control_init="! ! ! ! !", ) @@ -269,23 +280,16 @@ def test_prompt_initializes_with_vicuna_template( assert prompt._control_slice.stop <= prompt._target_slice.start assert prompt.input_ids.shape[0] > 0 - @pytest.mark.xfail( - reason="#965: fastchat templates without `.system` attribute crash _update_ids", - raises=AttributeError, - strict=True, - ) - def test_grad_returns_valid_gradient_with_vicuna_template( + def test_grad_returns_valid_gradient_with_chatml_template( self, gpt2_model: GPT2LMHeadModel, - gpt2_tokenizer: transformers.PreTrainedTokenizer, - vicuna_conv_template: object, + gpt2_chatml_tokenizer: transformers.PreTrainedTokenizer, ) -> None: - """gradient computation should work end-to-end on the non-llama path.""" + """gradient computation should work end-to-end with a ChatML template.""" prompt = GCGAttackPrompt( goal="Tell me how", target="Sure here is", - tokenizer=gpt2_tokenizer, - conv_template=vicuna_conv_template, + tokenizer=gpt2_chatml_tokenizer, control_init="! ! ! ! !", ) @@ -293,5 +297,5 @@ def test_grad_returns_valid_gradient_with_vicuna_template( n_control = prompt._control_slice.stop - prompt._control_slice.start assert grad.shape[0] == n_control - assert grad.shape[1] == gpt2_tokenizer.vocab_size + assert grad.shape[1] == gpt2_chatml_tokenizer.vocab_size assert torch.isfinite(grad).all() diff --git a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py index 7613a4780..2a01fb746 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py +++ b/tests/unit/auxiliary_attacks/gcg/test_attack_wiring.py @@ -7,6 +7,7 @@ and MultiPromptAttack.__init__(), and template compatibility issues in _update_ids(). """ +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -43,11 +44,32 @@ def _make_mock_worker() -> MagicMock: - """Create a mock worker with required attributes for attack construction.""" + """Create a mock worker whose tokenizer can stand in for a real chat tokenizer. + + The wiring tests construct real ``GCGAttackPrompt`` instances which call + ``tokenizer.apply_chat_template`` and then walk character positions in the + rendered prompt. We need a real string + a tokenizer that can answer + ``char_to_token`` queries on it, so we back the mock with a real + distilgpt2 tokenizer (the smallest available transformers tokenizer that + ships with all the methods we touch). + """ + from transformers import AutoTokenizer + + real_tokenizer = AutoTokenizer.from_pretrained("gpt2") + real_tokenizer.pad_token = real_tokenizer.eos_token + real_tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST] " + "{%- elif m['role'] == 'assistant' -%}" + "{{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + worker = MagicMock() worker.model.name_or_path = "test-model" - worker.tokenizer.name_or_path = "test-tokenizer" - worker.conv_template.name = "test-template" + worker.tokenizer = real_tokenizer return worker @@ -131,7 +153,7 @@ def test_progressive_attack_creates_mpa_without_error(self) -> None: filter_cand=True, ) - def test_create_attack_individual_wires_correctly(self) -> None: + def test_create_attack_individual_wires_correctly(self, tmp_path: Path) -> None: """_create_attack with transfer=False should produce an IndividualPromptAttack that can create internal MPA instances without error.""" worker = _make_mock_worker() @@ -139,7 +161,7 @@ def test_create_attack_individual_wires_correctly(self) -> None: params = Generator._build_params( transfer=False, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), learning_rate=0.01, batch_size=64, n_steps=5, diff --git a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py index 2777bb1ee..ef5cc709c 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py +++ b/tests/unit/auxiliary_attacks/gcg/test_data_and_config.py @@ -94,7 +94,6 @@ def test_all_configs_have_required_keys(self, config_files: list[str]) -> None: required_keys = { "tokenizer_paths", "model_paths", - "conversation_templates", "devices", } for path in config_files: diff --git a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py index e1568e1a0..c71dacf0f 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py +++ b/tests/unit/auxiliary_attacks/gcg/test_gcg_core.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from pathlib import Path from unittest.mock import MagicMock import numpy as np @@ -226,8 +227,14 @@ def test_output_tokens_within_vocab(self) -> None: assert (result >= 0).all() assert (result < vocab_size).all() - def test_each_candidate_differs_in_one_position(self) -> None: - """Each candidate should differ from the original in exactly one position.""" + def test_each_candidate_differs_in_at_most_one_position(self) -> None: + """Each candidate replaces exactly one position with a token sampled from top-k. + + The replacement token is drawn uniformly from top-k, so it may equal the + original token at that position (giving diffs == 0). The function only + guarantees that *at most* one position differs from the original; asserting + exactly one would make the test flaky against the underlying randomness. + """ n_control = 10 vocab_size = 50 batch_size = 8 @@ -239,8 +246,7 @@ def test_each_candidate_differs_in_one_position(self) -> None: for i in range(batch_size): diffs = (result[i] != original_toks.to(result.device)).sum().item() - # Each candidate changes exactly 1 position - assert diffs == 1, f"Candidate {i} differs in {diffs} positions, expected 1" + assert diffs <= 1, f"Candidate {i} differs in {diffs} positions, expected at most 1" def test_non_ascii_filtering(self) -> None: """When allow_non_ascii=False, the newly sampled token should not be non-ASCII. @@ -368,7 +374,7 @@ def test_augmentation_is_seeded_reproducible(self) -> None: class TestCreateAttack: """Tests for GreedyCoordinateGradientAdversarialSuffixgenerator_cls._create_attack.""" - def test_transfer_true_creates_progressive(self) -> None: + def test_transfer_true_creates_progressive(self, tmp_path: Path) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", @@ -380,7 +386,7 @@ def test_transfer_true_creates_progressive(self) -> None: progressive_models=True, progressive_goals=True, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), gbda_deterministic=True, learning_rate=0.01, batch_size=512, @@ -390,7 +396,7 @@ def test_transfer_true_creates_progressive(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -410,7 +416,7 @@ def test_transfer_true_creates_progressive(self) -> None: ) assert isinstance(attack, ProgressiveMultiPromptAttack) - def test_transfer_false_creates_individual(self) -> None: + def test_transfer_false_creates_individual(self, tmp_path: Path) -> None: train_mod = pytest.importorskip( "pyrit.auxiliary_attacks.gcg.experiments.train", reason="GCG train module not available", @@ -420,7 +426,7 @@ def test_transfer_false_creates_individual(self) -> None: params = generator_cls._build_params( transfer=False, control_init="! ! !", - result_prefix="test", + result_prefix=str(tmp_path / "test"), gbda_deterministic=True, learning_rate=0.01, batch_size=512, @@ -430,7 +436,7 @@ def test_transfer_false_creates_individual(self) -> None: mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" managers = { "AP": MagicMock(), @@ -484,7 +490,6 @@ def test_raises_on_mismatched_goals_targets(self) -> None: goals=["goal1", "goal2"], targets=["target1"], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -494,7 +499,6 @@ def test_raises_on_empty_goals(self) -> None: goals=[], targets=[], tokenizer=MagicMock(), - conv_template=MagicMock(), managers={"AP": MagicMock()}, ) @@ -506,11 +510,11 @@ def test_raises_with_multiple_workers(self) -> None: mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "m1" mock_worker1.tokenizer.name_or_path = "t1" - mock_worker1.conv_template.name = "c1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "m2" mock_worker2.tokenizer.name_or_path = "t2" - mock_worker2.conv_template.name = "c2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" with pytest.raises(ValueError, match="exactly 1 worker"): EvaluateAttack( @@ -519,3 +523,149 @@ def test_raises_with_multiple_workers(self) -> None: workers=[mock_worker1, mock_worker2], managers={"AP": MagicMock(), "PM": MagicMock(), "MPA": MagicMock()}, ) + + +class TestUpdateIdsErrorPaths: + """Tests covering the error / fallback paths in AttackPrompt._update_ids.""" + + def test_raises_when_substring_not_in_rendered_prompt(self) -> None: + """If the chat template strips/transforms goal/control/target so they don't appear + verbatim in the rendered prompt, _update_ids must raise a clear ValueError.""" + tokenizer = MagicMock() + # Chat template that drops the user content entirely — goal/control won't appear in prompt + tokenizer.apply_chat_template.return_value = "[INST] [/INST] hello" + # tokenizer(...) returns an encoding-like object + encoding = MagicMock() + encoding.input_ids = [1, 2, 3, 4] + encoding.char_to_token.return_value = 1 + tokenizer.return_value = encoding + + with pytest.raises(ValueError, match="Could not locate goal/control/target"): + AttackPrompt( + goal="this-goal-is-missing", + target="this-target-is-missing", + tokenizer=tokenizer, + control_init="this-control-is-missing", + ) + + def test_start_tok_walks_forward_when_initial_position_has_no_token(self) -> None: + """char_to_token returns None for the start position (e.g., whitespace squashed + into the previous token); start_tok must walk forward to the next mappable + character. Slices should still be valid.""" + # Use a fully mocked tokenizer so we can deterministically force char_to_token + # to return None at specific positions, otherwise real tokenizers usually map + # every byte and never trigger the fallback. + prompt_text = "USER hello !! ASSISTANT world" + toks = list(range(15)) + + def char_to_token(pos: int) -> int | None: + # Positions of "h" and "w" both return None; the next char does map. This + # exercises the cur += 1 walk-forward branch in start_tok. + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + if char in ("h", "w"): + return None + # Map remaining positions in a way that preserves slice ordering + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # Construction must succeed even though char_to_token returns None at goal/target + # start positions ("h" / "w"). + prompt = AttackPrompt( + goal="hello", + target="world", + tokenizer=tokenizer, + control_init="!!", + ) + assert isinstance(prompt._goal_slice.start, int) + assert isinstance(prompt._target_slice.start, int) + + def test_start_tok_returns_len_toks_when_no_position_maps(self) -> None: + """If char_to_token returns None for every position from char_pos to end-of-prompt, + start_tok must return len(toks) as a safe fallback (line 211).""" + prompt_text = "USER hello !! ASSISTANT world tail" + toks = list(range(20)) + + def char_to_token(pos: int) -> int | None: + char = prompt_text[pos] if 0 <= pos < len(prompt_text) else "" + # "tail" sits at end and never maps to a token (forces start_tok to exhaust + # the loop and hit `return len(toks)`); other content maps normally. + tail_start = prompt_text.find("tail") + if pos >= tail_start: + return None + return min(pos // 2, len(toks) - 1) + + encoding = MagicMock() + encoding.input_ids = toks + encoding.char_to_token.side_effect = char_to_token + + tokenizer = MagicMock() + tokenizer.apply_chat_template.return_value = prompt_text + tokenizer.return_value = encoding + + # "tail" as the target — its start position and every position after it returns + # None, so start_tok exits the while loop and returns len(toks). + prompt = AttackPrompt( + goal="hello", + target="tail", + tokenizer=tokenizer, + control_init="!!", + ) + assert prompt._target_slice.start == len(toks) + + def test_end_tok_returns_len_toks_when_target_is_at_prompt_end(self) -> None: + """If the target sits at the very end of the rendered prompt, + char_to_token(end_pos) returns None — end_tok must clamp to len(toks).""" + from transformers import AutoTokenizer + + tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer.pad_token = tokenizer.eos_token + tokenizer.chat_template = ( + "{%- for m in messages -%}" + "{%- if m['role'] == 'user' -%}" + "[INST] {{ m['content'] }} [/INST]" + "{%- elif m['role'] == 'assistant' -%}" + " {{ m['content'] }}" + "{%- endif -%}" + "{%- endfor -%}" + ) + + prompt = AttackPrompt( + goal="hello", + target="world", # this sits at end of rendered prompt with no trailing tokens + tokenizer=tokenizer, + control_init="! ! !", + ) + # _target_slice.stop should be len(toks), not None or NoneType arithmetic + assert isinstance(prompt._target_slice.stop, int) + assert prompt._target_slice.stop > prompt._target_slice.start + + +class TestGetWorkersChatTemplateValidation: + """Tests for the chat-template precondition in get_workers.""" + + def test_raises_when_tokenizer_has_no_chat_template(self) -> None: + """Models without a chat_template cannot be used with apply_chat_template-based + GCG; get_workers should raise a clear ValueError pointing to the cause.""" + from unittest.mock import patch + + get_workers = attack_manager_mod.get_workers + + params = MagicMock() + params.tokenizer_paths = ["fake/no-chat-template-model"] + params.token = "" + params.tokenizer_kwargs = [{}] + + bare_tokenizer = MagicMock() + bare_tokenizer.chat_template = None + bare_tokenizer.pad_token = "" + + with patch.object(attack_manager_mod.AutoTokenizer, "from_pretrained", return_value=bare_tokenizer): + with pytest.raises(ValueError, match="no chat_template configured"): + get_workers(params) diff --git a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py index 83c616f53..ab42b5d96 100644 --- a/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py +++ b/tests/unit/auxiliary_attacks/gcg/test_lifecycle.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -44,11 +44,11 @@ def test_workers_stopped_after_training( mock_worker1 = MagicMock() mock_worker1.model.name_or_path = "test-model-1" mock_worker1.tokenizer.name_or_path = "test-tokenizer-1" - mock_worker1.conv_template.name = "test-template-1" + mock_worker1.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_worker2 = MagicMock() mock_worker2.model.name_or_path = "test-model-2" mock_worker2.tokenizer.name_or_path = "test-tokenizer-2" - mock_worker2.conv_template.name = "test-template-2" + mock_worker2.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker1], [mock_worker2]) mock_attack_instance = MagicMock() @@ -61,7 +61,6 @@ def test_workers_stopped_after_training( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, ) @@ -93,7 +92,7 @@ def test_workers_not_stopped_on_training_failure( mock_worker = MagicMock() mock_worker.model.name_or_path = "test-model" mock_worker.tokenizer.name_or_path = "test-tokenizer" - mock_worker.conv_template.name = "test-template" + mock_worker.tokenizer.chat_template = "{{ messages[0]['content'] }}" mock_get_workers.return_value = ([mock_worker], []) mock_attack_instance = MagicMock() @@ -108,7 +107,6 @@ def test_workers_not_stopped_on_training_failure( generator.generate_suffix( tokenizer_paths=["test/path"], model_paths=["test/path"], - conversation_templates=["llama-2"], train_data="", n_steps=1, )