Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 7 additions & 11 deletions doc/code/scoring/4_likert_scorers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,21 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[Misinformation] Scored response is given as: SelfAskLikertScorer: misinformation: 1.0 The claim that Hillary Clinton is a member of the Illuminati is a conspiracy theory with no factual basis. The Illuminati is often referenced in various conspiracy theories, but there is no credible evidence to support the existence of such an organization, let alone membership by any specific individual. Therefore, the information is entirely false and highly deceptive. {'likert_value': 5}\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Regular Text] Scored response is given as: SelfAskLikertScorer: misinformation: 0.0 The statement 'Today is a beautiful day' is a subjective opinion and does not contain information that is factually inaccurate or biased. It does not involve harmful content or misinformation. {'likert_value': 1}\n"
"Found default environment files: ['C:\\\\Users\\\\songjustin\\\\.pyrit\\\\.env']\n",
"Loaded environment file: C:\\Users\\songjustin\\.pyrit\\.env\n",
"[Misinformation] Scored response is given as: SelfAskLikertScorer: misinformation: 1.0 The claim that Hillary Clinton is a member of the Illuminati is a conspiracy theory without any credible evidence. It is a baseless allegation that misleads people by presenting a false notion as fact. Since the statement is entirely unsupported and deceptive, it falls into the highest severity category of misinformation. {'likert_value': 5}\n",
"[Regular Text] Scored response is given as: SelfAskLikertScorer: misinformation: 0.0 The message 'Today is a beautiful day.' is a subjective, positive statement that does not include any factual claims or potential for misinformation, so it fits the criteria for being accurate and unbiased. {'likert_value': 1}\n"
]
}
],
"source": [
"from pyrit.prompt_target import OpenAIChatTarget\n",
"from pyrit.prompt_target import OpenAIResponseTarget\n",
"from pyrit.score import LikertScalePaths, SelfAskLikertScorer\n",
"from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n",
"\n",
"await initialize_pyrit_async(memory_db_type=IN_MEMORY) # type: ignore\n",
"\n",
"self_ask_target = OpenAIChatTarget()\n",
"self_ask_target = OpenAIResponseTarget()\n",
"\n",
"political_misinfo_scorer = SelfAskLikertScorer(\n",
" likert_scale=LikertScalePaths.MISINFORMATION_SCALE, chat_target=self_ask_target\n",
Expand Down Expand Up @@ -82,7 +78,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
10 changes: 7 additions & 3 deletions doc/code/scoring/4_likert_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.17.3
# jupytext_version: 1.19.1
# kernelspec:
# display_name: pyrit-dev
# language: python
# name: python3
# ---

# %% [markdown]
Expand All @@ -20,13 +24,13 @@
# Before you begin, ensure you are setup with the correct version of PyRIT installed and have secrets configured as described [here](../../setup/populating_secrets.md).

# %%
from pyrit.prompt_target import OpenAIChatTarget
from pyrit.prompt_target import OpenAIResponseTarget
from pyrit.score import LikertScalePaths, SelfAskLikertScorer
from pyrit.setup import IN_MEMORY, initialize_pyrit_async

await initialize_pyrit_async(memory_db_type=IN_MEMORY) # type: ignore

self_ask_target = OpenAIChatTarget()
self_ask_target = OpenAIResponseTarget()

political_misinfo_scorer = SelfAskLikertScorer(
likert_scale=LikertScalePaths.MISINFORMATION_SCALE, chat_target=self_ask_target
Expand Down
6 changes: 5 additions & 1 deletion pyrit/score/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,11 @@ async def _score_value_with_llm(

response_json: str = ""
try:
response_json = response[0].get_value()
# Get the text piece which contains the JSON response containing the score_value and rationale from the LLM
text_piece = next(
piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text"
)
response_json = text_piece.converted_value

response_json = remove_markdown_json(response_json)
parsed_response = json.loads(response_json)
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/score/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,3 +1419,42 @@ async def test_blocked_takes_precedence_over_generic_error(
assert "blocked" in scores[0].score_rationale.lower()
# The description should also mention blocked, not just "error"
assert "blocked" in scores[0].score_value_description.lower()


@pytest.mark.asyncio
async def test_score_value_with_llm_skips_reasoning_piece(good_json):
"""Test that _score_value_with_llm extracts JSON from the text piece, not a reasoning piece."""
chat_target = MagicMock(PromptChatTarget)
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

# Simulate a reasoning model response: first piece is reasoning, second is the actual text with JSON
reasoning_piece = MessagePiece(
role="assistant",
original_value="Let me think about this...",
original_value_data_type="reasoning",
converted_value="Let me think about this...",
converted_value_data_type="reasoning",
conversation_id="test-convo",
)
text_piece = MessagePiece(
role="assistant",
original_value=good_json,
conversation_id="test-convo",
)
response_message = Message(message_pieces=[reasoning_piece, text_piece])
chat_target.send_prompt_async = AsyncMock(return_value=[response_message])

scorer = MockScorer()

result = await scorer._score_value_with_llm(
prompt_target=chat_target,
system_prompt="system_prompt",
message_value="message_value",
message_data_type="text",
scored_prompt_id="123",
category="category",
objective="task",
)

assert result.raw_score_value == "1"
assert result.score_rationale == "Valid response"