Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
define bot refuse to respond
"I am sorry, I cannot respond to that."
67 changes: 63 additions & 4 deletions src/nvidia_rag/rag_server/response_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import json
import logging
import os
import re
import time
from collections.abc import AsyncGenerator, Generator
from typing import Any, Literal, Optional, Union
Expand Down Expand Up @@ -479,6 +480,62 @@ def _extract_stream_delta(chunk: Any) -> tuple[str, str]:
return str(content) if content else "", str(reasoning) if reasoning else ""


# Compiled once at import time: keep word chars, whitespace, and the ASCII
# apostrophe; replace every other character (period, comma, em-dash, etc.)
# with a single space prior to whitespace collapsing.
_GUARDRAILS_REFUSAL_PUNCT_STRIP_RE = re.compile(r"[^\w\s']", flags=re.UNICODE)

# Allow-list of normalized NeMo Guardrails refusal sentences. Each entry is
# the result of lowercasing the source phrasing, replacing every non-word
# non-apostrophe character with a space, and collapsing runs of whitespace
# (i.e. the same transformation `_is_guardrails_refusal` applies to its
# input). The 4 entries cover the cartesian product of "I'm"/"I am" with
# "can't"/"cannot", which captures the benign drift observed from the
# guardrails container without admitting unrelated apologetic phrasings.
_GUARDRAILS_REFUSAL_NORMALIZED: frozenset[str] = frozenset(
{
"i'm sorry i can't respond to that",
"i'm sorry i cannot respond to that",
"i am sorry i can't respond to that",
"i am sorry i cannot respond to that",
}
)
Comment on lines +483 to +502

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for Unicode apostrophes in guardrails config and related files
rg -n $'[\u2018\u2019]' deploy/compose/nemoguardrails/

Repository: NVIDIA-AI-Blueprints/rag

Length of output: 50


Harden refusal apostrophe normalization against Unicode curly apostrophes

_GUARDRAILS_REFUSAL_PUNCT_STRIP_RE preserves only the ASCII apostrophe (', U+0027), so Unicode apostrophes (e.g., U+2018/U+2019) would be stripped to spaces and could prevent matches against _GUARDRAILS_REFUSAL_NORMALIZED. A search in deploy/compose/nemoguardrails/ found no curly apostrophes in current NeMo guardrails configs, so this may be a low risk for today’s fixtures, but it’s still worth making the normalization accept common Unicode apostrophe variants.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/nvidia_rag/rag_server/response_generator.py` around lines 483 - 502, The
regex _GUARDRAILS_REFUSAL_PUNCT_STRIP_RE currently allows only the ASCII
apostrophe and will strip Unicode curly apostrophes, breaking normalization;
update the pattern used where _GUARDRAILS_REFUSAL_PUNCT_STRIP_RE is defined so
the character class keeps common Unicode apostrophe variants (e.g. U+2019 and
U+2018) in addition to ASCII (for example add \u2019 and \u2018 to the allowed
chars), leaving the rest of the logic unchanged so _is_guardrails_refusal still
matches normalized refusal strings in _GUARDRAILS_REFUSAL_NORMALIZED.



def _is_guardrails_refusal(content_delta: str) -> bool:
"""Detect the NeMo Guardrails refusal sentence in a streamed content chunk.

The previous implementation compared ``content_delta`` for strict equality
against the canonical refusal text. Benign drift emitted by the guardrails
container (e.g. ``can't`` vs ``cannot``, ``I'm`` vs ``I am``, casing or
trailing-whitespace differences, period vs comma) caused the equality to
miss, leaving retrieved contexts attached to a refused response and
producing stale citations on the wire.

This helper normalizes ``content_delta`` by lowercasing, replacing every
punctuation character except the ASCII apostrophe with a single space,
and collapsing whitespace. The normalized form is then checked for
membership in a tightly bounded allow-list (see
``_GUARDRAILS_REFUSAL_NORMALIZED``) so that drifted refusals are caught
while unrelated apologetic responses containing the word "sorry" are not
false-positively flagged as refusals.

Args:
content_delta: The latest streamed content chunk from the LLM /
guardrails pipeline.

Returns:
``True`` iff the normalized form of ``content_delta`` is one of the
recognized refusal phrasings; ``False`` otherwise (including for
empty strings and non-string inputs).
"""
if not isinstance(content_delta, str) or not content_delta:
return False
stripped = _GUARDRAILS_REFUSAL_PUNCT_STRIP_RE.sub(" ", content_delta).lower()
normalized = " ".join(stripped.split())
return normalized in _GUARDRAILS_REFUSAL_NORMALIZED


def generate_answer(
generator: "Generator[str]",
contexts: list[Any],
Expand Down Expand Up @@ -737,10 +794,12 @@ async def generate_answer_async(
# Accumulate answer chunks for final logging
accumulated_response += content_delta

# TODO: This is a hack to clear contexts if we get an error
# response from nemoguardrails
if content_delta == "I'm sorry, I can't respond to that.":
# Clear contexts if we get an error response
# Clear retrieved contexts if NeMo Guardrails refused this
# response, so downstream citations do not leak stale results
# alongside a refusal message. The match tolerates benign
# drift in the guardrails refusal phrasing — see
# `_is_guardrails_refusal` for the normalization contract.
if _is_guardrails_refusal(content_delta):
contexts = []
Comment on lines +797 to 803

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | ⚡ Quick win

Sync path generate_answer retains strict equality check.

The async path now uses fuzzy matching, but the synchronous generate_answer function (line 594) still has the hardcoded strict check if content_delta == "I'm sorry, I can't respond to that.". This creates a behavior disparity where the same drifted refusal (e.g., "I am sorry, I cannot respond to that.") will leak citations in sync mode but not in async mode.

♻️ Consider unifying both paths

Apply the same _is_guardrails_refusal check to the sync path:

-                if content_delta == "I'm sorry, I can't respond to that.":
-                    # Clear contexts if we get an error response
+                if _is_guardrails_refusal(content_delta):
                     contexts = []

This ensures consistent behavior across sync and async code paths.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/nvidia_rag/rag_server/response_generator.py` around lines 797 - 803, The
synchronous generate_answer path currently checks for an exact refusal string
(e.g., content_delta == "I'm sorry, I can't respond to that.") which differs
from the async path; update generate_answer to use the same fuzzy matcher by
calling _is_guardrails_refusal(content_delta) instead of the strict equality,
and when true clear contexts (contexts = []) just like in the async path so
citations are not leaked; ensure you reference generate_answer and
_is_guardrails_refusal when making the change to keep behavior consistent across
sync/async flows.

chain_response = ChainResponse()
response_choice = ChainResponseChoices(
Expand Down
195 changes: 189 additions & 6 deletions tests/unit/test_rag_server/test_response_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
TextContent,
Usage,
_is_empty_content,
_is_guardrails_refusal,
configure_object_store_operator,
error_response_generator,
error_response_generator_async,
Expand Down Expand Up @@ -802,6 +803,87 @@ async def test_retrieve_summary_exception(self):
assert "error" in result


class TestIsGuardrailsRefusal:
"""Tests for the `_is_guardrails_refusal` helper.

The helper gates the context-clearing branch in
``generate_answer_async`` (and, eventually, ``generate_answer``).
The branch must fire for benign output drift in the NeMo Guardrails
refusal sentence — punctuation, casing, ``I'm``/``I am``,
``can't``/``cannot``, trailing whitespace — and must NOT fire for
unrelated apologetic phrasings or empty inputs.
"""

@pytest.mark.parametrize(
"content_delta",
[
# Canonical guardrails refusal (unchanged from the original
# hardcoded string).
"I'm sorry, I can't respond to that.",
# Cartesian product of "I'm"/"I am" with "can't"/"cannot".
"I am sorry, I cannot respond to that.",
"I'm sorry, I cannot respond to that.",
"I am sorry, I can't respond to that.",
# Casing variants.
"i'm sorry, i can't respond to that.",
"I'M SORRY, I CAN'T RESPOND TO THAT.",
"I Am Sorry, I Cannot Respond To That.",
# Punctuation drift: missing trailing period.
"I'm sorry, I can't respond to that",
# Punctuation drift: extra/different separators.
"I'm sorry. I can't respond to that.",
"I'm sorry — I can't respond to that.",
"I'm sorry; I can't respond to that.",
# Trailing / leading / interior whitespace drift.
" I'm sorry, I can't respond to that. ",
"I'm sorry, I can't respond to that.",
"I'm sorry,\tI can't respond to that.",
"I'm sorry,\nI can't respond to that.",
],
)
def test_recognises_canonical_and_drifted_refusals(
self, content_delta: str
) -> None:
"""All canonical + benign-drift refusals normalize to an allowed form."""
assert _is_guardrails_refusal(content_delta) is True

@pytest.mark.parametrize(
"content_delta",
[
# Empty / falsy inputs.
"",
# Unrelated apologetic phrasing.
"I'm sorry, can't help.",
"Sorry, I can't do that.",
"I'm sorry to hear that.",
"I am sorry to hear that you cannot respond to that.",
# Just the word "sorry" — must not be treated as a refusal.
"sorry",
"Sorry.",
# Genuine RAG-style answer that happens to contain "sorry".
"I'm sorry, but the document does not mention that topic.",
"I am sorry; based on the provided context, the answer is 42.",
# Refusal-adjacent phrasings that are NOT the guardrails sentence.
"I cannot respond to that.",
"I can't respond to that question.",
"I refuse to respond to that.",
# Token-level streaming fragments (early/partial chunks).
"I'm",
"I'm sorry",
"I'm sorry,",
"respond to that.",
],
)
def test_does_not_false_positive(self, content_delta: str) -> None:
"""Unrelated content (including partial chunks) must not trigger."""
assert _is_guardrails_refusal(content_delta) is False

@pytest.mark.parametrize("content_delta", [None, 123, b"bytes", object()])
def test_non_string_input_returns_false(self, content_delta: Any) -> None:
"""Non-string / unexpected inputs must return False, not raise."""
assert _is_guardrails_refusal(content_delta) is False # type: ignore[arg-type]


class TestGenerateAnswerAsync:
"""Test generate_answer_async function"""

Expand Down Expand Up @@ -856,10 +938,7 @@ async def mock_generator():
item for item in result if item["choices"][0].get("finish_reason") is None
]
assert token_chunks[0]["choices"][0]["delta"]["content"] == ""
assert (
token_chunks[0]["choices"][0]["delta"]["reasoning_content"]
== "thinking"
)
assert token_chunks[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | 💤 Low value

Unrelated test assertion change bundled in guardrails refusal PR.

This line changes the expected reasoning_content value from None to "thinking", which appears unrelated to the PR's stated objective of fixing guardrails refusal detection. While the change looks correct for the test's intent (validating reasoning content streaming), it's unclear why this modification is included in this PR.

Consider either:

  1. Explaining in the commit message why this test correction is bundled with the guardrails fix, or
  2. Splitting this into a separate commit/PR for test correctness
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/unit/test_rag_server/test_response_generator.py` at line 941, The test
change alters the expected value of
token_chunks[0]["choices"][0]["delta"]["reasoning_content"] from None to
"thinking" which is unrelated to the guardrails refusal fix; either revert this
assertion back to the original expected value (None) in
tests/unit/test_rag_server/test_response_generator.py to keep the PR focused, or
extract this test correction into a separate commit/PR and include a clear
commit message documenting why the expected reasoning_content should be
"thinking" (reference the assertion token_chunks[...] delta reasoning_content
and ensure the test commit/PR explains the behavior change).

assert token_chunks[1]["choices"][0]["delta"]["content"] == "answer"
assert token_chunks[1]["choices"][0]["delta"]["reasoning_content"] is None

Expand All @@ -882,9 +961,27 @@ async def test_generate_answer_async_no_generator(self):
assert len(result) == 1
assert result[0].startswith("data: ")

@staticmethod
def _first_chunk_citations(chunks: list[str]) -> dict[str, Any]:
"""Parse the first SSE chunk from ``generate_answer_async`` and return
its ``citations`` block (the only chunk in which citations are populated
by the citations builder — first_chunk gate at the source)."""
assert chunks, "expected at least one streamed chunk"
first = chunks[0]
assert first.startswith("data: "), first
payload = json.loads(first.removeprefix("data: "))
return payload["citations"]

@pytest.mark.asyncio
async def test_generate_answer_async_error_response_chunk(self):
"""Test generate_answer_async with error response chunk"""
"""Canonical refusal: contexts must be cleared before citations are built.

The first streamed chunk MUST carry an empty ``citations`` block — i.e.
``total_results == 0`` and ``results == []`` — even though we passed in
a non-empty ``contexts`` list. This is the deterministic signal that
``contexts = []`` ran prior to the ``_citations_fn(...)`` call gated by
``first_chunk``.
"""

async def mock_generator():
yield "I'm sorry, I can't respond to that."
Expand All @@ -899,10 +996,96 @@ async def mock_generator():
generator=mock_generator(),
contexts=contexts,
model="test-model",
enable_citations=True,
):
result.append(chunk)

assert len(result) > 0
citations = self._first_chunk_citations(result)
assert citations["total_results"] == 0
assert citations["results"] == []

@pytest.mark.asyncio
@pytest.mark.parametrize(
"refusal_text",
[
# Bug 6301657 reproducer: lowercased "I am" + "cannot" form.
"I am sorry, I cannot respond to that.",
# Mixed drift forms.
"I'm sorry, I cannot respond to that.",
"I am sorry, I can't respond to that.",
# Casing + trailing whitespace.
"I'M SORRY, I CAN'T RESPOND TO THAT. ",
# Missing trailing period.
"I'm sorry, I can't respond to that",
],
)
async def test_generate_answer_async_clears_contexts_on_drifted_refusal(
self, refusal_text: str
):
"""Drifted refusal phrasings must also clear contexts (regression: NVBug 6301657).

Prior to the fix the strict equality check missed any benign drift in
the guardrails refusal sentence (e.g. ``I am`` vs ``I'm``, ``cannot``
vs ``can't``, trailing whitespace, casing). The contexts then stayed
populated and citations leaked stale documents alongside a refusal
response. This test asserts the first chunk's citations are empty for
every drift form covered by ``_GUARDRAILS_REFUSAL_NORMALIZED``.
"""

async def mock_generator():
yield refusal_text

mock_doc = Mock()
mock_doc.page_content = "Test content"
mock_doc.metadata = {"source": "test.pdf", "content_metadata": {"type": "text"}}
contexts = [mock_doc]

result = []
async for chunk in generate_answer_async(
generator=mock_generator(),
contexts=contexts,
model="test-model",
enable_citations=True,
):
result.append(chunk)

citations = self._first_chunk_citations(result)
assert citations["total_results"] == 0, (
f"contexts were not cleared for drifted refusal: {refusal_text!r}"
)
assert citations["results"] == []

@pytest.mark.asyncio
async def test_generate_answer_async_keeps_contexts_for_non_refusal(self):
"""Non-refusal responses must NOT have their contexts cleared.

Guards against an over-broad ``_is_guardrails_refusal`` matcher: an
ordinary RAG answer that happens to begin with a phrase like
``"I'm sorry,"`` must still carry citations through to the client.
"""

async def mock_generator():
yield "I'm sorry, but the document does not mention that."
yield " The provided context covers other topics."

mock_doc = Mock()
mock_doc.page_content = "Test content"
mock_doc.metadata = {"source": "test.pdf", "content_metadata": {"type": "text"}}
contexts = [mock_doc]

result = []
async for chunk in generate_answer_async(
generator=mock_generator(),
contexts=contexts,
model="test-model",
enable_citations=True,
):
result.append(chunk)

citations = self._first_chunk_citations(result)
assert citations["total_results"] >= 1, (
"contexts should NOT have been cleared for a non-refusal apology"
)

@pytest.mark.asyncio
async def test_generate_answer_async_with_rag_start_time(self):
Expand Down
Loading