From 92a367ba6e5f2aae1d526148a02182c234ce9223 Mon Sep 17 00:00:00 2001
From: Roman Lutz <romanlutz13@gmail.com>
Date: Wed, 25 Feb 2026 06:20:09 -0800
Subject: [PATCH] FIX Mock tokenizer in unit test to avoid HuggingFace network
 call

The chatml_tokenizer_normalizer fixture was calling
AutoTokenizer.from_pretrained() which requires network access to
HuggingFace. Replaced with a mock that simulates ChatML template
formatting, making the test fully offline.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../test_chat_normalizer_tokenizer.py             | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/tests/unit/message_normalizer/test_chat_normalizer_tokenizer.py b/tests/unit/message_normalizer/test_chat_normalizer_tokenizer.py
index 0086f162ea..81122fcc57 100644
--- a/tests/unit/message_normalizer/test_chat_normalizer_tokenizer.py
+++ b/tests/unit/message_normalizer/test_chat_normalizer_tokenizer.py
@@ -5,7 +5,6 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-from transformers import AutoTokenizer
 
 from pyrit.message_normalizer import TokenizerTemplateNormalizer
 from pyrit.models import Message, MessagePiece
@@ -116,8 +115,18 @@ class TestNormalizeStringAsync:
 
     @pytest.fixture
     def chatml_tokenizer_normalizer(self):
-        tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
-        return TokenizerTemplateNormalizer(tokenizer=tokenizer)
+        def _apply_chatml_template(messages, tokenize=False, add_generation_prompt=False):
+            """Simulate ChatML template formatting."""
+            result = ""
+            for msg in messages:
+                result += f"<|{msg['role']}|>\n{msg['content']}</s>\n"
+            if add_generation_prompt:
+                result += "<|assistant|>\n"
+            return result
+
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.apply_chat_template.side_effect = _apply_chatml_template
+        return TokenizerTemplateNormalizer(tokenizer=mock_tokenizer)
 
     @pytest.mark.asyncio
     async def test_normalize_chatml(self, chatml_tokenizer_normalizer: TokenizerTemplateNormalizer):