diff --git a/packages/uipath-openai-agents/pyproject.toml b/packages/uipath-openai-agents/pyproject.toml index 329eb4c8..69d64b1c 100644 --- a/packages/uipath-openai-agents/pyproject.toml +++ b/packages/uipath-openai-agents/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "openai-agents>=0.6.5", "openinference-instrumentation-openai-agents>=1.4.0", "uipath>=2.10.0, <2.11.0", + "uipath-core>=0.5.18, <0.7.0", "uipath-runtime>=0.11.0, <0.12.0", ] classifiers = [ @@ -30,6 +31,9 @@ register = "uipath_openai_agents.middlewares:register_middleware" [project.entry-points."uipath.runtime.factories"] openai-agents = "uipath_openai_agents.runtime:register_runtime_factory" +[project.entry-points."uipath.governance.adapters"] +openai-agents = "uipath_openai_agents.governance:register_governance_adapter" + [project.urls] Homepage = "https://uipath.com" Repository = "https://github.com/UiPath/uipath-integrations-python" diff --git a/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py new file mode 100644 index 00000000..344f275e --- /dev/null +++ b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/__init__.py @@ -0,0 +1,51 @@ +"""Governance integration for ``uipath-openai-agents``. + +Registers :class:`OpenAIAgentsAdapter` with the adapter registry in +``uipath.core.adapters`` so the governance host can attach the +OpenAI-Agents-specific inner hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, +AFTER_TOOL) when it sees an OpenAI Agents agent. + +Registration is **idempotent**: calling :func:`register_governance_adapter` +twice is a no-op on the second call. + +Wiring: the package exposes :func:`register_governance_adapter` as an entry +point under ``uipath.governance.adapters``. The governance adapter discovery +path calls it to register the adapter. Importing this module does not, by +itself, mutate the global registry. +""" + +from __future__ import annotations + +import logging + +from uipath.core.adapters import get_adapter_registry + +from .adapter import GovernanceAgentHooks, OpenAIAgentsAdapter + +logger = logging.getLogger(__name__) + +_registered: bool = False + + +def register_governance_adapter() -> None: + """Register :class:`OpenAIAgentsAdapter` with the global registry. + + Idempotent — safe to call multiple times. + """ + global _registered + if _registered: + return + registry = get_adapter_registry() + if any(a.name == "OpenAIAgents" for a in registry.get_all()): + _registered = True + return + registry.register(OpenAIAgentsAdapter()) + _registered = True + logger.debug("Registered uipath-openai-agents governance adapter") + + +__all__ = [ + "GovernanceAgentHooks", + "OpenAIAgentsAdapter", + "register_governance_adapter", +] \ No newline at end of file diff --git a/packages/uipath-openai-agents/src/uipath_openai_agents/governance/adapter.py b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/adapter.py new file mode 100644 index 00000000..f0850b20 --- /dev/null +++ b/packages/uipath-openai-agents/src/uipath_openai_agents/governance/adapter.py @@ -0,0 +1,444 @@ +"""OpenAI Agents adapter for UiPath governance. + +Provides governance for OpenAI Agents SDK agents (``agents.Agent`` and any +graph of agents reachable via ``handoffs``). Like the Google ADK adapter — +and unlike the LangChain adapter, which wraps a ``Runnable`` and intercepts +``invoke`` / ``ainvoke`` — OpenAI Agents are executed by ``Runner.run`` / +``Runner.run_streamed``, which hold their **own** reference to the agent +object. Replacing ``runtime.agent`` with a proxy would never reach the +``Runner``. So this adapter installs governance directly onto each agent's +native ``hooks`` attribute (an :class:`agents.AgentHooks`), mutating it in +place: + +- ``on_llm_start`` → BEFORE_MODEL +- ``on_llm_end`` → AFTER_MODEL +- ``on_tool_start`` → TOOL_CALL +- ``on_tool_end`` → AFTER_TOOL + +Because the mutation is in place, :meth:`OpenAIAgentsAdapter.attach` returns +the **original agent** (hooks installed) rather than a wrapping proxy. +``agents.Agent`` validates that ``hooks`` is an ``AgentHooks`` instance, so +:class:`GovernanceAgentHooks` subclasses it (the ADK adapter could duck-type +its callbacks; here the SDK type-checks the slot). + +``agent.hooks`` holds a **single** ``AgentHooks`` (not a list, as in ADK), so +when an agent already carries user hooks we *chain*: governance runs first, +then the previously-installed hooks. ``detach`` restores the original. + +Chain-level boundaries (BEFORE_AGENT / AFTER_AGENT) are owned by the +governance host, so they are not fired here — that would duplicate every +boundary evaluation. (The SDK's per-agent ``on_start`` / ``on_end`` are +pass-through-only here for that reason.) + +Contracts and the evaluator protocol come from ``uipath-core``; this package +contributes only the OpenAI-Agents-specific implementation and registers it +with the adapter registry via the ``uipath.governance.adapters`` entry point. + +Audit emission and enforcement (raising :class:`GovernanceBlockException` on +DENY) are owned by the evaluator itself. Each hook only extracts the relevant +payload and calls the matching ``evaluate_*`` method; +:class:`GovernanceBlockException` is allowed to propagate (it aborts the +``Runner`` run), anything else is logged and swallowed so a governance bug +never breaks an agent run. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, List +from uuid import uuid4 + +from agents import Agent, AgentHooks +from uipath.core.adapters import BaseAdapter, EvaluatorProtocol +from uipath.core.governance.exceptions import GovernanceBlockException + +logger = logging.getLogger(__name__) + +# Cap on the text blob passed to BEFORE_MODEL / AFTER_MODEL governance +# evaluation. Sized to match the governance host and the other adapters so +# scan-time budgets are consistent across hooks. A long conversation history is +# governed at the LLM layer by scanning only the latest request content, not the +# full prompt — see :func:`_latest_input_text`. +_BEFORE_MODEL_TEXT_CAP = 64000 + +# Marks an agent we have already governed so a double ``attach`` is a no-op and +# ``detach`` can restore the hooks slot to whatever was there before. +_PREV_HOOKS_ATTR = "_uipath_governance_prev_hooks" + + +class OpenAIAgentsAdapter(BaseAdapter): + """Adapter for the OpenAI Agents SDK. + + Detects ``agents.Agent`` instances and installs governance hooks on every + agent reachable through the ``handoffs`` graph. + """ + + @property + def name(self) -> str: + return "OpenAIAgents" + + def can_handle(self, agent: Any) -> bool: + """Return True only for an OpenAI Agents ``Agent``.""" + return isinstance(agent, Agent) + + def attach( + self, + agent: Any, + agent_id: str, + session_id: str, + evaluator: EvaluatorProtocol, + ) -> Any: + """Install governance hooks on the agent graph (mutated in place). + + Returns the original ``agent`` — the ``Runner`` already holds this + reference, so in-place mutation is what actually wires governance into + execution. A wrapping proxy would not reach the ``Runner`` and would + break the SDK's ``isinstance(agent, Agent)`` checks. + """ + agents = _iter_agents(agent) + installed = 0 + for node in agents: + if isinstance(getattr(node, "hooks", None), GovernanceAgentHooks): + continue # idempotent — already governed + prev = getattr(node, "hooks", None) + hooks = GovernanceAgentHooks( + evaluator=evaluator, + agent_name=agent_id, + session_id=session_id, + inner=prev, + ) + # Remember what was there so detach can restore it. + setattr(node, _PREV_HOOKS_ATTR, prev) + node.hooks = hooks + installed += 1 + if not agents: + logger.warning( + "OpenAIAgentsAdapter found no Agent in %s — deep hooks will not fire", + type(agent).__name__, + ) + else: + logger.debug("Installed governance hooks on %d OpenAI agent(s)", installed) + return agent + + def detach(self, governed: Any) -> Any: + """Restore each agent's original ``hooks`` slot and return the graph.""" + for node in _iter_agents(governed): + if isinstance(getattr(node, "hooks", None), GovernanceAgentHooks): + node.hooks = getattr(node, _PREV_HOOKS_ATTR, None) + if hasattr(node, _PREV_HOOKS_ATTR): + delattr(node, _PREV_HOOKS_ATTR) + return governed + + +def _iter_agents(root: Any) -> List[Any]: + """Return every agent node reachable through the ``handoffs`` graph. + + A node qualifies if it exposes the ``hooks`` slot. Handoff targets may be + ``Agent`` instances or ``Handoff`` objects that carry the target on + ``.agent``; both are followed so a multi-agent app is governed end to end. + Cycles and pathological depth are bounded by an id-visited set and a hard + cap. + """ + found: List[Any] = [] + seen: set[int] = set() + stack: List[Any] = [root] + while stack and len(seen) < 1000: + node = stack.pop() + if node is None or id(node) in seen: + continue + seen.add(id(node)) + if hasattr(node, "hooks"): + found.append(node) + handoffs = getattr(node, "handoffs", None) + if isinstance(handoffs, (list, tuple)): + for h in handoffs: + # A Handoff wraps its target agent on ``.agent``; a bare Agent + # is itself the target. + stack.append(getattr(h, "agent", h)) + return found + + +class GovernanceAgentHooks(AgentHooks): # type: ignore[type-arg] + """Per-agent ``AgentHooks`` bound to one governance evaluator. + + The evaluator owns audit emission and DENY-raising. Each hook extracts the + relevant payload, calls the matching ``evaluate_*`` method, and returns + ``None``. :class:`GovernanceBlockException` is allowed to propagate — it + aborts the ``Runner`` run — anything else is logged and swallowed. + + When the agent already carried an ``AgentHooks`` (``inner``), governance + runs first and then delegates to it, so user hooks keep working. + """ + + def __init__( + self, + evaluator: EvaluatorProtocol, + agent_name: str, + session_id: str, + inner: Any = None, + ) -> None: + self._evaluator = evaluator + self._agent_name = agent_name + self._session_id = session_id + self._inner = inner + self._trace_id = str(uuid4()) + self._session_state: Dict[str, Any] = {"tool_calls": 0, "llm_calls": 0} + + # ----- Model hooks ----------------------------------------------------- + + async def on_llm_start( + self, + context: Any, + agent: Any, + system_prompt: Any, + input_items: Any, + ) -> None: + """Evaluate BEFORE_MODEL rules immediately before the LLM call. + + Scans only the **latest input item** — not the full history. The model + still receives the entire history (this hook does not mutate the + request); the evaluator focuses on the new content the agent is about + to respond to. Without this scoping, a violation in an earlier turn + would re-fire on every subsequent model call because that text stays in + the prompt for context. + """ + try: + self._session_state["llm_calls"] = ( + self._session_state.get("llm_calls", 0) + 1 + ) + model_input = _latest_input_text(input_items) + self._evaluator.evaluate_before_model( + model_input=model_input, + agent_name=self._agent_name, + runtime_id=self._session_id, + trace_id=self._trace_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 - governance must not break the run + logger.warning("on_llm_start governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_llm_start", context, agent, system_prompt, input_items) + + async def on_llm_end(self, context: Any, agent: Any, response: Any) -> None: + """Evaluate AFTER_MODEL rules immediately after the LLM response.""" + try: + model_output = _model_response_text(response) + self._evaluator.evaluate_after_model( + model_output=model_output, + agent_name=self._agent_name, + runtime_id=self._session_id, + trace_id=self._trace_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_llm_end governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_llm_end", context, agent, response) + + # ----- Tool hooks ------------------------------------------------------ + + async def on_tool_start(self, context: Any, agent: Any, tool: Any) -> None: + """Evaluate TOOL_CALL rules immediately before a tool is invoked. + + The OpenAI Agents SDK does not surface tool *arguments* on + ``on_tool_start`` (only the tool itself), so ``tool_args`` is empty + here — argument-shaped rules evaluate at AFTER_TOOL via the result, or + at the model layer where the call's arguments are visible in the output. + """ + try: + self._session_state["tool_calls"] = ( + self._session_state.get("tool_calls", 0) + 1 + ) + tool_name = getattr(tool, "name", None) or "unknown" + self._evaluator.evaluate_tool_call( + tool_name=tool_name, + tool_args={}, + agent_name=self._agent_name, + runtime_id=self._session_id, + trace_id=self._trace_id, + session_state=self._session_state, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_tool_start governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_tool_start", context, agent, tool) + + async def on_tool_end( + self, context: Any, agent: Any, tool: Any, result: Any + ) -> None: + """Evaluate AFTER_TOOL rules immediately after a tool is invoked.""" + try: + tool_name = getattr(tool, "name", None) or "unknown" + tool_result = "" if result is None else _stringify(result) + self._evaluator.evaluate_after_tool( + tool_name=tool_name, + tool_result=tool_result, + agent_name=self._agent_name, + runtime_id=self._session_id, + trace_id=self._trace_id, + ) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("on_tool_end governance check failed (continuing): %s", e) + await _delegate(self._inner, "on_tool_end", context, agent, tool, result) + + # ----- Pass-through boundaries ---------------------------------------- + # BEFORE_AGENT / AFTER_AGENT are owned by the governance host; here we only + # forward to any wrapped user hooks so their behaviour is preserved. + + async def on_start(self, context: Any, agent: Any) -> None: + await _delegate(self._inner, "on_start", context, agent) + + async def on_end(self, context: Any, agent: Any, output: Any) -> None: + await _delegate(self._inner, "on_end", context, agent, output) + + async def on_handoff(self, context: Any, agent: Any, source: Any) -> None: + await _delegate(self._inner, "on_handoff", context, agent, source) + + +# -------------------------------------------------------------------------- +# Delegation + text extraction (module-level, sync, duck-typed) +# -------------------------------------------------------------------------- + + +async def _delegate(inner: Any, method: str, *args: Any) -> None: + """Call ``inner.(*args)`` if a wrapped hooks object provides it. + + User hooks are best-effort: a failure in a chained hook is logged and + swallowed (it must not abort the run on governance's behalf), except a + :class:`GovernanceBlockException`, which always propagates. + """ + if inner is None: + return + fn = getattr(inner, method, None) + if fn is None: + return + try: + await fn(*args) + except GovernanceBlockException: + raise + except Exception as e: # noqa: BLE001 + logger.warning("chained user hook %s failed (continuing): %s", method, e) + + +def _latest_input_text(input_items: Any) -> str: + """Extract text from the most-recent item in an LLM-call input list. + + ``input_items`` is the full ``list`` of response input items sent to the + model. We take the last entry — the new user message, or the tool + ``function_call_output`` being fed back — and pull its text via + :func:`_item_text`. Returns ``""`` when there is nothing extractable. + """ + if not input_items: + return "" + if isinstance(input_items, (list, tuple)): + return _item_text(input_items[-1]) + return _item_text(input_items) + + +def _item_text(item: Any) -> str: + """Return governance-relevant text from one response input/output item. + + Tolerant of both dict-shaped items (``{"role": ..., "content": ...}``, + ``{"type": "function_call", "name": ..., "arguments": ...}``) and + object-shaped items (``.content`` / ``.text`` / ``.name`` / ``.arguments``). + Content may itself be a string or a list of parts (each a dict with + ``text`` / ``input_text`` / ``output_text`` or an object with ``.text``). + Capped at :data:`_BEFORE_MODEL_TEXT_CAP`. + """ + if item is None: + return "" + if isinstance(item, str): + return item[:_BEFORE_MODEL_TEXT_CAP] + + pieces: List[str] = [] + + # A function/tool call carries its intent in name + arguments. + name = _get(item, "name") + arguments = _get(item, "arguments") + if name and (_get(item, "type") in (None, "function_call") or arguments is not None): + if isinstance(name, str): + pieces.append(name) + if arguments is not None: + pieces.append(_stringify(arguments)) + + content = _get(item, "content") + if content is not None: + pieces.append(_content_text(content)) + + # Tool result fed back to the model. + output = _get(item, "output") + if output is not None and not pieces: + pieces.append(_stringify(output)) + + text = "\n".join(p for p in pieces if p) + return text[:_BEFORE_MODEL_TEXT_CAP] + + +def _content_text(content: Any) -> str: + """Return text from a message ``content`` (string or list of parts).""" + if isinstance(content, str): + return content + if isinstance(content, (list, tuple)): + out: List[str] = [] + for part in content: + if isinstance(part, str): + out.append(part) + continue + t = ( + _get(part, "text") + or _get(part, "input_text") + or _get(part, "output_text") + ) + if isinstance(t, str) and t: + out.append(t) + return "\n".join(out) + t = _get(content, "text") + return t if isinstance(t, str) else "" + + +def _model_response_text(response: Any) -> str: + """Extract assistant text + tool-call intent from a ``ModelResponse``. + + ``response.output`` is the ``list`` of output items the model produced + (assistant messages and function/tool calls). Each is run through + :func:`_item_text` so both visible replies and tool-call arguments are + governed. Capped at :data:`_BEFORE_MODEL_TEXT_CAP`. + """ + if response is None: + return "" + output = _get(response, "output") + if output is None: + # Some shapes hand back text directly. + return _item_text(response) + items = output if isinstance(output, (list, tuple)) else [output] + collected: List[str] = [] + remaining = _BEFORE_MODEL_TEXT_CAP + for item in items: + if remaining <= 0: + break + piece = _item_text(item) + if piece: + collected.append(piece) + remaining -= len(piece) + 1 + return "\n".join(collected)[:_BEFORE_MODEL_TEXT_CAP] + + +def _get(obj: Any, attr: str) -> Any: + """Read ``attr`` from a dict key or object attribute, else ``None``.""" + if isinstance(obj, dict): + return obj.get(attr) + return getattr(obj, attr, None) + + +def _stringify(value: Any) -> str: + """Render a dict / object payload as compact, scannable text.""" + if isinstance(value, str): + return value + try: + return json.dumps(value, default=str, ensure_ascii=False) + except (TypeError, ValueError): + return str(value) \ No newline at end of file diff --git a/packages/uipath-openai-agents/tests/governance/__init__.py b/packages/uipath-openai-agents/tests/governance/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packages/uipath-openai-agents/tests/governance/test_adapter.py b/packages/uipath-openai-agents/tests/governance/test_adapter.py new file mode 100644 index 00000000..e0225cc0 --- /dev/null +++ b/packages/uipath-openai-agents/tests/governance/test_adapter.py @@ -0,0 +1,379 @@ +"""Unit tests for the OpenAI Agents governance adapter. + +``can_handle`` is tested against a real ``agents.Agent``; everything else +duck-types the OpenAI Agents payloads (response input/output items, tools) +with lightweight fakes so the real code paths are exercised without a live +LLM. ``GovernanceAgentHooks`` subclasses ``agents.AgentHooks`` (the SDK +type-checks ``agent.hooks``), so importing the adapter requires +``openai-agents`` either way. + +The package is configured with ``asyncio_mode = "auto"``, so ``async def`` +tests run without an explicit marker. +""" + +from __future__ import annotations + +import logging +from types import SimpleNamespace +from typing import Any, List + +import pytest +from uipath.core.governance.exceptions import GovernanceBlockException + +from uipath_openai_agents.governance.adapter import ( + _BEFORE_MODEL_TEXT_CAP, + GovernanceAgentHooks, + OpenAIAgentsAdapter, +) + +# -------------------------------------------------------------------------- +# Fakes +# -------------------------------------------------------------------------- + + +class FakeEvaluator: + """Records evaluate_* calls; optionally BLOCKs on a named hook.""" + + def __init__(self, block_on: str | None = None) -> None: + self.block_on = block_on + self.calls: List[tuple[str, dict]] = [] + + def _record(self, hook: str, **kwargs: Any) -> None: + self.calls.append((hook, kwargs)) + if self.block_on == hook: + raise GovernanceBlockException("blocked") # type: ignore[call-arg] + + def evaluate_before_agent(self, **kwargs: Any) -> None: + self._record("before_agent", **kwargs) + + def evaluate_after_agent(self, **kwargs: Any) -> None: + self._record("after_agent", **kwargs) + + def evaluate_before_model(self, **kwargs: Any) -> None: + self._record("before_model", **kwargs) + + def evaluate_after_model(self, **kwargs: Any) -> None: + self._record("after_model", **kwargs) + + def evaluate_tool_call(self, **kwargs: Any) -> None: + self._record("tool_call", **kwargs) + + def evaluate_after_tool(self, **kwargs: Any) -> None: + self._record("after_tool", **kwargs) + + +class FakeAgent: + """Minimal stand-in for ``agents.Agent`` (duck-typed by the adapter).""" + + def __init__(self, name: str = "agent", handoffs: List[Any] | None = None): + self.name = name + self.hooks: Any = None + self.tools: List[Any] = [] + self.handoffs = handoffs or [] + + +class FakeTool: + def __init__(self, name: str): + self.name = name + + +class RecordingHooks: + """A user-supplied AgentHooks-like object that records delegated calls.""" + + def __init__(self) -> None: + self.seen: List[str] = [] + + async def on_llm_start(self, *_a: Any) -> None: + self.seen.append("on_llm_start") + + async def on_llm_end(self, *_a: Any) -> None: + self.seen.append("on_llm_end") + + async def on_tool_start(self, *_a: Any) -> None: + self.seen.append("on_tool_start") + + async def on_tool_end(self, *_a: Any) -> None: + self.seen.append("on_tool_end") + + +def _msg(text: str, role: str = "user") -> dict: + """A response input item carrying plain string content.""" + return {"role": role, "content": text} + + +def _msg_parts(*texts: str, role: str = "user") -> dict: + """A response input item carrying a list of text parts.""" + return {"role": role, "content": [{"type": "input_text", "text": t} for t in texts]} + + +def _function_call(name: str, arguments: str) -> dict: + return {"type": "function_call", "name": name, "arguments": arguments} + + +def _output_message(*texts: str) -> SimpleNamespace: + """A ModelResponse output message item with text parts.""" + parts = [SimpleNamespace(text=t) for t in texts] + return SimpleNamespace(role="assistant", content=parts) + + +def _make_hooks(evaluator: FakeEvaluator, inner: Any = None) -> GovernanceAgentHooks: + return GovernanceAgentHooks( + evaluator=evaluator, agent_name="agent-1", session_id="sess-1", inner=inner + ) + + +# -------------------------------------------------------------------------- +# can_handle +# -------------------------------------------------------------------------- + + +def test_can_handle_real_agent(): + from agents import Agent + + assert OpenAIAgentsAdapter().can_handle(Agent(name="t")) is True + + +def test_can_handle_rejects_non_agent(): + # A duck-typed look-alike must NOT be claimed — only a real Agent is. + assert OpenAIAgentsAdapter().can_handle(FakeAgent()) is False + assert OpenAIAgentsAdapter().can_handle(object()) is False + + +# -------------------------------------------------------------------------- +# attach / detach +# -------------------------------------------------------------------------- + + +def test_attach_installs_on_all_agents_in_handoff_graph(): + leaf_a = FakeAgent("a") + leaf_b = FakeAgent("b") + root = FakeAgent("root", handoffs=[leaf_a, leaf_b]) + + returned = OpenAIAgentsAdapter().attach( + root, agent_id="x", session_id="s", evaluator=FakeEvaluator() + ) + + assert returned is root # original returned, not a proxy + for node in (root, leaf_a, leaf_b): + assert isinstance(node.hooks, GovernanceAgentHooks) + + +def test_attach_follows_handoff_wrapper_objects(): + target = FakeAgent("target") + handoff = SimpleNamespace(agent=target) # Handoff-shaped wrapper + root = FakeAgent("root", handoffs=[handoff]) + OpenAIAgentsAdapter().attach(root, agent_id="x", session_id="s", evaluator=FakeEvaluator()) + assert isinstance(target.hooks, GovernanceAgentHooks) + + +def test_attach_is_idempotent(): + agent = FakeAgent() + adapter = OpenAIAgentsAdapter() + ev = FakeEvaluator() + adapter.attach(agent, agent_id="x", session_id="s", evaluator=ev) + first = agent.hooks + adapter.attach(agent, agent_id="x", session_id="s", evaluator=ev) + assert agent.hooks is first # not re-wrapped + + +def test_attach_chains_existing_hooks(): + agent = FakeAgent() + user_hooks = RecordingHooks() + agent.hooks = user_hooks + OpenAIAgentsAdapter().attach(agent, agent_id="x", session_id="s", evaluator=FakeEvaluator()) + assert isinstance(agent.hooks, GovernanceAgentHooks) + assert agent.hooks._inner is user_hooks + + +def test_detach_restores_previous_hooks(): + agent = FakeAgent() + user_hooks = RecordingHooks() + agent.hooks = user_hooks + adapter = OpenAIAgentsAdapter() + adapter.attach(agent, agent_id="x", session_id="s", evaluator=FakeEvaluator()) + adapter.detach(agent) + assert agent.hooks is user_hooks + + +def test_detach_restores_none_when_no_prior_hooks(): + agent = FakeAgent() + adapter = OpenAIAgentsAdapter() + adapter.attach(agent, agent_id="x", session_id="s", evaluator=FakeEvaluator()) + adapter.detach(agent) + assert agent.hooks is None + + +def test_attach_warns_when_no_agent(caplog): + with caplog.at_level(logging.WARNING): + OpenAIAgentsAdapter().attach( + object(), agent_id="x", session_id="s", evaluator=FakeEvaluator() + ) + assert any("no Agent" in r.message for r in caplog.records) + + +# -------------------------------------------------------------------------- +# on_llm_start (BEFORE_MODEL) +# -------------------------------------------------------------------------- + + +async def test_on_llm_start_scopes_to_latest_item(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + items = [_msg("OLD turn — secret leak here"), _msg("the new question")] + await cb.on_llm_start(None, FakeAgent(), "system", items) + hook, kwargs = ev.calls[-1] + assert hook == "before_model" + assert kwargs["model_input"] == "the new question" + assert "OLD turn" not in kwargs["model_input"] + + +async def test_on_llm_start_extracts_list_parts(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_start(None, FakeAgent(), None, [_msg_parts("part one", "part two")]) + out = ev.calls[-1][1]["model_input"] + assert "part one" in out and "part two" in out + + +async def test_on_llm_start_extracts_function_call_when_latest(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + items = [_function_call("lookup", '{"balance": "1000"}')] + await cb.on_llm_start(None, FakeAgent(), None, items) + out = ev.calls[-1][1]["model_input"] + assert "lookup" in out and "1000" in out + + +async def test_on_llm_start_caps_text(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + huge = "x" * (_BEFORE_MODEL_TEXT_CAP + 5000) + await cb.on_llm_start(None, FakeAgent(), None, [_msg(huge)]) + assert len(ev.calls[-1][1]["model_input"]) <= _BEFORE_MODEL_TEXT_CAP + + +async def test_on_llm_start_empty_input(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_start(None, FakeAgent(), None, []) + assert ev.calls[-1][1]["model_input"] == "" + + +# -------------------------------------------------------------------------- +# on_llm_end (AFTER_MODEL) +# -------------------------------------------------------------------------- + + +async def test_on_llm_end_extracts_text_and_function_call(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + response = SimpleNamespace( + output=[ + _output_message("thinking"), + SimpleNamespace( + type="function_call", + name="submit_answer", + arguments='{"content": "final reply"}', + ), + ] + ) + await cb.on_llm_end(None, FakeAgent(), response) + out = ev.calls[-1][1]["model_output"] + assert "thinking" in out and "submit_answer" in out and "final reply" in out + + +async def test_on_llm_end_empty_response(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) + assert ev.calls[-1][1]["model_output"] == "" + + +# -------------------------------------------------------------------------- +# tools +# -------------------------------------------------------------------------- + + +async def test_on_tool_start_passes_name_and_session_state(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_start(None, FakeAgent(), FakeTool("transfer")) + hook, kwargs = ev.calls[-1] + assert hook == "tool_call" + assert kwargs["tool_name"] == "transfer" + assert kwargs["tool_args"] == {} # OpenAI SDK does not surface args here + assert kwargs["session_state"]["tool_calls"] == 1 + + +async def test_on_tool_end_stringifies_dict_result(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_end(None, FakeAgent(), FakeTool("lookup"), {"x": 1}) + out = ev.calls[-1][1]["tool_result"] + assert "x" in out and "1" in out + + +async def test_on_tool_end_none_result(): + ev = FakeEvaluator() + cb = _make_hooks(ev) + await cb.on_tool_end(None, FakeAgent(), FakeTool("noop"), None) + assert ev.calls[-1][1]["tool_result"] == "" + + +# -------------------------------------------------------------------------- +# chaining to user hooks +# -------------------------------------------------------------------------- + + +async def test_governance_delegates_to_inner_hooks(): + inner = RecordingHooks() + cb = _make_hooks(FakeEvaluator(), inner=inner) + await cb.on_llm_start(None, FakeAgent(), None, [_msg("hi")]) + await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) + await cb.on_tool_start(None, FakeAgent(), FakeTool("t")) + await cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {}) + assert inner.seen == ["on_llm_start", "on_llm_end", "on_tool_start", "on_tool_end"] + + +# -------------------------------------------------------------------------- +# enforcement semantics +# -------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "hook,invoke", + [ + ("before_model", lambda cb: cb.on_llm_start(None, FakeAgent(), None, [_msg("hi")])), + ("after_model", lambda cb: cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[]))), + ("tool_call", lambda cb: cb.on_tool_start(None, FakeAgent(), FakeTool("t"))), + ("after_tool", lambda cb: cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {"r": 1})), + ], +) +async def test_block_exception_propagates(hook, invoke): + cb = _make_hooks(FakeEvaluator(block_on=hook)) + with pytest.raises(GovernanceBlockException): + await invoke(cb) + + +async def test_non_block_exception_is_swallowed(caplog): + class Boom: + def evaluate_before_model(self, **_: Any) -> None: + raise RuntimeError("evaluator bug") + + cb = GovernanceAgentHooks( + evaluator=Boom(), # type: ignore[arg-type] + agent_name="a", + session_id="s", + ) + with caplog.at_level(logging.WARNING): + # must NOT raise — a governance bug can't break the agent run + await cb.on_llm_start(None, FakeAgent(), None, [_msg("x")]) + assert any("governance check failed" in r.message for r in caplog.records) + + +async def test_hooks_return_none(): + cb = _make_hooks(FakeEvaluator()) + assert await cb.on_llm_start(None, FakeAgent(), None, []) is None + assert await cb.on_llm_end(None, FakeAgent(), SimpleNamespace(output=[])) is None + assert await cb.on_tool_start(None, FakeAgent(), FakeTool("t")) is None + assert await cb.on_tool_end(None, FakeAgent(), FakeTool("t"), {}) is None \ No newline at end of file diff --git a/packages/uipath-openai-agents/uv.lock b/packages/uipath-openai-agents/uv.lock index ff2970af..d34369c2 100644 --- a/packages/uipath-openai-agents/uv.lock +++ b/packages/uipath-openai-agents/uv.lock @@ -2356,6 +2356,7 @@ dependencies = [ { name = "openai-agents" }, { name = "openinference-instrumentation-openai-agents" }, { name = "uipath" }, + { name = "uipath-core" }, { name = "uipath-runtime" }, ] @@ -2377,6 +2378,7 @@ requires-dist = [ { name = "openai-agents", specifier = ">=0.6.5" }, { name = "openinference-instrumentation-openai-agents", specifier = ">=1.4.0" }, { name = "uipath", specifier = ">=2.10.0,<2.11.0" }, + { name = "uipath-core", specifier = ">=0.5.18,<0.7.0" }, { name = "uipath-runtime", specifier = ">=0.11.0,<0.12.0" }, ]