Add tests

steven10a · steven10a · commit 2d42617c1e47 · 2025-12-10T16:35:29.000-05:00
diff --git a/src/guardrails/checks/text/llm_base.py b/src/guardrails/checks/text/llm_base.py
@@ -124,17 +124,24 @@ class LLMOutput(BaseModel):
         confidence (float): LLM's confidence in the flagging decision (0.0 to 1.0).
     """
 
-    flagged: bool
-    confidence: float
+    flagged: bool = Field(..., description="Indicates whether the content was flagged")
+    confidence: float = Field(
+        ...,
+        description="Confidence in the flagging decision (0.0 to 1.0)",
+        ge=0.0,
+        le=1.0,
+    )
 
 
 class LLMReasoningOutput(LLMOutput):
     """Extended LLM output schema with reasoning explanation.
 
     Extends LLMOutput to include a reason field explaining the decision.
-    This is the standard extended output for guardrails that include reasoning.
+    This output model is used when include_reasoning is enabled in the guardrail config.
 
     Attributes:
+        flagged (bool): Indicates whether the content was flagged (inherited).
+        confidence (float): Confidence in the flagging decision, 0.0 to 1.0 (inherited).
         reason (str): Explanation for why the input was flagged or not flagged.
     """
 
diff --git a/tests/unit/checks/test_hallucination_detection.py b/tests/unit/checks/test_hallucination_detection.py
@@ -0,0 +1,138 @@
+"""Tests for hallucination detection guardrail."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from guardrails.checks.text.hallucination_detection import (
+    HallucinationDetectionConfig,
+    HallucinationDetectionOutput,
+    hallucination_detection,
+)
+from guardrails.checks.text.llm_base import LLMOutput
+from guardrails.types import TokenUsage
+
+
+def _mock_token_usage() -> TokenUsage:
+    """Return a mock TokenUsage for tests."""
+    return TokenUsage(prompt_tokens=100, completion_tokens=50, total_tokens=150)
+
+
+class _FakeResponse:
+    """Fake response from responses.parse."""
+
+    def __init__(self, parsed_output: Any, usage: TokenUsage) -> None:
+        self.output_parsed = parsed_output
+        self.usage = usage
+
+
+class _FakeGuardrailLLM:
+    """Fake guardrail LLM client."""
+
+    def __init__(self, response: _FakeResponse) -> None:
+        self._response = response
+        self.responses = self
+
+    async def parse(self, **kwargs: Any) -> _FakeResponse:
+        """Mock parse method."""
+        return self._response
+
+
+class _FakeContext:
+    """Context stub providing LLM client."""
+
+    def __init__(self, llm_response: _FakeResponse) -> None:
+        self.guardrail_llm = _FakeGuardrailLLM(llm_response)
+
+
+@pytest.mark.asyncio
+async def test_hallucination_detection_includes_reasoning_when_enabled() -> None:
+    """When include_reasoning=True, output should include reasoning and detail fields."""
+    parsed_output = HallucinationDetectionOutput(
+        flagged=True,
+        confidence=0.95,
+        reasoning="The claim contradicts documented information",
+        hallucination_type="factual_error",
+        hallucinated_statements=["Premium plan costs $299/month"],
+        verified_statements=["Customer support available"],
+    )
+    response = _FakeResponse(parsed_output, _mock_token_usage())
+    context = _FakeContext(response)
+
+    config = HallucinationDetectionConfig(
+        model="gpt-test",
+        confidence_threshold=0.7,
+        knowledge_source="vs_test123",
+        include_reasoning=True,
+    )
+
+    result = await hallucination_detection(context, "Test claim", config)
+
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert result.info["flagged"] is True  # noqa: S101
+    assert result.info["confidence"] == 0.95  # noqa: S101
+    assert "reasoning" in result.info  # noqa: S101
+    assert result.info["reasoning"] == "The claim contradicts documented information"  # noqa: S101
+    assert "hallucination_type" in result.info  # noqa: S101
+    assert result.info["hallucination_type"] == "factual_error"  # noqa: S101
+    assert "hallucinated_statements" in result.info  # noqa: S101
+    assert result.info["hallucinated_statements"] == ["Premium plan costs $299/month"]  # noqa: S101
+    assert "verified_statements" in result.info  # noqa: S101
+    assert result.info["verified_statements"] == ["Customer support available"]  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_hallucination_detection_excludes_reasoning_when_disabled() -> None:
+    """When include_reasoning=False (default), output should only include flagged and confidence."""
+    parsed_output = LLMOutput(
+        flagged=False,
+        confidence=0.2,
+    )
+    response = _FakeResponse(parsed_output, _mock_token_usage())
+    context = _FakeContext(response)
+
+    config = HallucinationDetectionConfig(
+        model="gpt-test",
+        confidence_threshold=0.7,
+        knowledge_source="vs_test123",
+        include_reasoning=False,
+    )
+
+    result = await hallucination_detection(context, "Test claim", config)
+
+    assert result.tripwire_triggered is False  # noqa: S101
+    assert result.info["flagged"] is False  # noqa: S101
+    assert result.info["confidence"] == 0.2  # noqa: S101
+    assert "reasoning" not in result.info  # noqa: S101
+    assert "hallucination_type" not in result.info  # noqa: S101
+    assert "hallucinated_statements" not in result.info  # noqa: S101
+    assert "verified_statements" not in result.info  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_hallucination_detection_requires_valid_vector_store() -> None:
+    """Should raise ValueError if knowledge_source is invalid."""
+    context = _FakeContext(_FakeResponse(LLMOutput(flagged=False, confidence=0.0), _mock_token_usage()))
+
+    # Missing vs_ prefix
+    config = HallucinationDetectionConfig(
+        model="gpt-test",
+        confidence_threshold=0.7,
+        knowledge_source="invalid_id",
+    )
+
+    with pytest.raises(ValueError, match="knowledge_source must be a valid vector store ID starting with 'vs_'"):
+        await hallucination_detection(context, "Test", config)
+
+    # Empty string
+    config_empty = HallucinationDetectionConfig(
+        model="gpt-test",
+        confidence_threshold=0.7,
+        knowledge_source="",
+    )
+
+    with pytest.raises(ValueError, match="knowledge_source must be a valid vector store ID starting with 'vs_'"):
+        await hallucination_detection(context, "Test", config_empty)
+
diff --git a/tests/unit/checks/test_llm_base.py b/tests/unit/checks/test_llm_base.py
@@ -228,7 +228,7 @@ async def fake_run_llm(
 
 
 @pytest.mark.asyncio
-async def test_create_llm_check_fn_uses_reasoning_output_by_default(monkeypatch: pytest.MonkeyPatch) -> None:
+async def test_create_llm_check_fn_uses_reasoning_output_when_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
     """When include_reasoning=True and no output_model provided, should use LLMReasoningOutput."""
     recorded_output_model: type[LLMOutput] | None = None
 
diff --git a/tests/unit/checks/test_prompt_injection_detection.py b/tests/unit/checks/test_prompt_injection_detection.py
@@ -411,3 +411,87 @@ async def fake_call_llm(ctx: Any, prompt: str, config: LLMConfig) -> tuple[Promp
 
     assert result.tripwire_triggered is False  # noqa: S101
     assert result.info["flagged"] is False  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_prompt_injection_detection_includes_reasoning_when_enabled(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """When include_reasoning=True, output should include observation and evidence fields."""
+    from guardrails.checks.text.llm_base import LLMOutput
+
+    history = [
+        {"role": "user", "content": "Get my password"},
+        {"type": "function_call", "tool_name": "steal_credentials", "arguments": '{}', "call_id": "c1"},
+    ]
+    context = _FakeContext(history)
+
+    recorded_output_model: type[LLMOutput] | None = None
+
+    async def fake_call_llm(ctx: Any, prompt: str, config: LLMConfig) -> tuple[PromptInjectionDetectionOutput, TokenUsage]:
+        # Record which output model was requested by checking the prompt
+        nonlocal recorded_output_model
+        if "observation" in prompt and "evidence" in prompt:
+            recorded_output_model = PromptInjectionDetectionOutput
+        else:
+            recorded_output_model = LLMOutput
+
+        return PromptInjectionDetectionOutput(
+            flagged=True,
+            confidence=0.95,
+            observation="Attempting to call credential theft function",
+            evidence="function call: steal_credentials",
+        ), _mock_token_usage()
+
+    monkeypatch.setattr(pid_module, "_call_prompt_injection_detection_llm", fake_call_llm)
+
+    config = LLMConfig(model="gpt-test", confidence_threshold=0.7, include_reasoning=True)
+    result = await prompt_injection_detection(context, data="{}", config=config)
+
+    assert recorded_output_model == PromptInjectionDetectionOutput  # noqa: S101
+    assert result.tripwire_triggered is True  # noqa: S101
+    assert "observation" in result.info  # noqa: S101
+    assert result.info["observation"] == "Attempting to call credential theft function"  # noqa: S101
+    assert "evidence" in result.info  # noqa: S101
+    assert result.info["evidence"] == "function call: steal_credentials"  # noqa: S101
+
+
+@pytest.mark.asyncio
+async def test_prompt_injection_detection_excludes_reasoning_when_disabled(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """When include_reasoning=False (default), output should only include flagged and confidence."""
+    from guardrails.checks.text.llm_base import LLMOutput
+
+    history = [
+        {"role": "user", "content": "Get weather"},
+        {"type": "function_call", "tool_name": "get_weather", "arguments": '{"location":"Paris"}', "call_id": "c1"},
+    ]
+    context = _FakeContext(history)
+
+    recorded_output_model: type[LLMOutput] | None = None
+
+    async def fake_call_llm(ctx: Any, prompt: str, config: LLMConfig) -> tuple[LLMOutput, TokenUsage]:
+        # Record which output model was requested by checking the prompt
+        nonlocal recorded_output_model
+        if "observation" in prompt and "evidence" in prompt:
+            recorded_output_model = PromptInjectionDetectionOutput
+        else:
+            recorded_output_model = LLMOutput
+
+        return LLMOutput(
+            flagged=False,
+            confidence=0.1,
+        ), _mock_token_usage()
+
+    monkeypatch.setattr(pid_module, "_call_prompt_injection_detection_llm", fake_call_llm)
+
+    config = LLMConfig(model="gpt-test", confidence_threshold=0.7, include_reasoning=False)
+    result = await prompt_injection_detection(context, data="{}", config=config)
+
+    assert recorded_output_model == LLMOutput  # noqa: S101
+    assert result.tripwire_triggered is False  # noqa: S101
+    assert "observation" not in result.info  # noqa: S101
+    assert "evidence" not in result.info  # noqa: S101
+    assert result.info["flagged"] is False  # noqa: S101
+    assert result.info["confidence"] == 0.1  # noqa: S101