Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ ddtrace/contrib/internal/pydantic_ai @DataDog/ml-observ
ddtrace/contrib/internal/ray @DataDog/ml-observability
ddtrace/contrib/internal/mcp @DataDog/ml-observability
ddtrace/contrib/internal/vllm @DataDog/ml-observability
ddtrace/contrib/internal/claude_agent_sdk @DataDog/ml-observability
tests/llmobs @DataDog/ml-observability
tests/contrib/openai @DataDog/ml-observability
tests/contrib/langchain @DataDog/ml-observability
Expand All @@ -188,6 +189,7 @@ tests/contrib/pydantic_ai @DataDog/ml-observ
tests/contrib/ray @DataDog/ml-observability
tests/contrib/mcp @DataDog/ml-observability
tests/contrib/vllm @DataDog/ml-observability
tests/contrib/claude_agent_sdk @DataDog/ml-observability
.gitlab/tests/llmobs.yml @DataDog/ml-observability
# MLObs snapshot tests
tests/snapshots/tests.contrib.anthropic.* @DataDog/ml-observability
Expand Down
9 changes: 5 additions & 4 deletions ddtrace/llmobs/_integrations/claude_agent_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,16 @@ def _parse_context_categories(self, context_messages: list[Any]) -> dict[str, An

return result

def _format_context(self, parameters: dict[str, Any], kwargs: dict[str, Any]) -> None:
def _format_context(self, metadata: dict[str, Any], kwargs: dict[str, Any]) -> None:
after_context = kwargs.get("_dd_context")
before_context = kwargs.get("_dd_before_context")

if "_dd" not in metadata or not isinstance(metadata["_dd"], dict):
metadata["_dd"] = {}
if after_context:
parameters["after_context"] = self._parse_context_categories(after_context)

metadata["_dd"]["after_context"] = self._parse_context_categories(after_context)
if before_context:
parameters["before_context"] = self._parse_context_categories(before_context)
metadata["_dd"]["before_context"] = self._parse_context_categories(before_context)

def _extract_input_messages(self, prompt: Any, span: Span) -> list[Message]:
prompt_wrapper = span._get_ctx_item("_dd_prompt_wrapper") if span else None
Expand Down
4 changes: 3 additions & 1 deletion ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,9 @@ def _llmobs_span_event(self, span: Span) -> Optional[LLMObsSpanEvent]:
meta["model_provider"] = (span._get_ctx_item(MODEL_PROVIDER) or "custom").lower()
metadata = span._get_ctx_item(METADATA) or {}
if span_kind == "agent" and span._get_ctx_item(AGENT_MANIFEST) is not None:
metadata["agent_manifest"] = span._get_ctx_item(AGENT_MANIFEST)
metadata_dd = metadata.get("_dd") or {}
metadata_dd["agent_manifest"] = span._get_ctx_item(AGENT_MANIFEST)
Comment on lines +409 to +410

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Handle non-dict _dd metadata before writing manifest

If user-supplied metadata already contains _dd as a truthy non-dict (for example {"_dd": "custom"}), metadata.get("_dd") or {} returns that value and metadata_dd["agent_manifest"] = ... raises a TypeError. _submit_llmobs_span catches TypeError and drops the event, so agent spans are silently lost for this input shape; _dd should be validated/coerced to a dict before mutation.

Useful? React with 👍 / 👎.

metadata["_dd"] = metadata_dd
meta["metadata"] = metadata

input_type: Literal["value", "messages", ""] = ""
Expand Down
30 changes: 17 additions & 13 deletions tests/contrib/claude_agent_sdk/test_claude_agent_sdk_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async def test_llmobs_query_extracts_content_and_usage(
{"content": "4", "role": "system"},
]
),
metadata={"agent_manifest": expected_agent_manifest()},
metadata={"_dd": {"agent_manifest": expected_agent_manifest()}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand Down Expand Up @@ -70,7 +70,7 @@ async def test_llmobs_query_with_options(self, claude_agent_sdk, llmobs_events,
{"content": "4", "role": "system"},
]
),
metadata={"max_turns": 3, "agent_manifest": expected_agent_manifest(max_iterations=3)},
metadata={"max_turns": 3, "_dd": {"agent_manifest": expected_agent_manifest(max_iterations=3)}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand All @@ -95,7 +95,7 @@ async def test_llmobs_query_error_no_output(
span_kind="agent",
input_value=safe_json([{"content": prompt, "role": "user"}]),
output_value=safe_json([{"content": ""}]),
metadata={"agent_manifest": {"framework": "Claude Agent SDK"}},
metadata={"_dd": {"agent_manifest": {"framework": "Claude Agent SDK"}}},
token_metrics={},
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
error="builtins.ValueError",
Expand Down Expand Up @@ -129,9 +129,11 @@ async def test_llmobs_client_query_captures_prompt(self, mock_client, llmobs_eve
]
),
metadata={
"after_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"before_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"agent_manifest": expected_agent_manifest(),
"_dd": {
"agent_manifest": expected_agent_manifest(),
"after_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"before_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
},
},
token_metrics={
"input_tokens": 14599,
Expand Down Expand Up @@ -193,7 +195,7 @@ async def test_llmobs_query_with_read_tool_use(
{"content": "4", "role": "system"},
]
),
metadata={"agent_manifest": expected_agent_manifest()},
metadata={"_dd": {"agent_manifest": expected_agent_manifest()}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand Down Expand Up @@ -247,7 +249,7 @@ async def test_llmobs_query_with_bash_tool_use(
{"content": "4", "role": "system"},
]
),
metadata={"agent_manifest": expected_agent_manifest()},
metadata={"_dd": {"agent_manifest": expected_agent_manifest()}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand Down Expand Up @@ -301,7 +303,7 @@ async def test_llmobs_query_with_grep_tool_use(
{"content": "4", "role": "system"},
]
),
metadata={"agent_manifest": expected_agent_manifest()},
metadata={"_dd": {"agent_manifest": expected_agent_manifest()}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand Down Expand Up @@ -334,7 +336,7 @@ async def prompt_generator():
{"content": "4", "role": "system"},
]
),
metadata={"agent_manifest": expected_agent_manifest()},
metadata={"_dd": {"agent_manifest": expected_agent_manifest()}},
token_metrics=EXPECTED_QUERY_USAGE,
tags={"ml_app": "unnamed-ml-app", "service": "tests.llmobs"},
)
Expand Down Expand Up @@ -372,9 +374,11 @@ async def prompt_generator():
]
),
metadata={
"after_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"before_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"agent_manifest": expected_agent_manifest(),
"_dd": {
"agent_manifest": expected_agent_manifest(),
"after_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
"before_context": {"categories": {}, "used_tokens": None, "total_tokens": None},
},
},
token_metrics={
"input_tokens": 14599,
Expand Down
2 changes: 1 addition & 1 deletion tests/contrib/crewai/test_crewai_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def expected_agent_span_args(role):
return {
"input_value": mock.ANY,
"output_value": mock.ANY,
"metadata": {"agent_manifest": AGENT_TO_EXPECTED_AGENT_MANIFEST[role]},
"metadata": {"_dd": {"agent_manifest": AGENT_TO_EXPECTED_AGENT_MANIFEST[role]}},
"tags": {"service": "tests.contrib.crewai", "ml_app": "<ml-app-name>"},
"span_links": True,
}
Expand Down
80 changes: 42 additions & 38 deletions tests/contrib/google_adk/test_google_adk_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,25 +118,27 @@ def expected_llmobs_tool_span_events_agent_run(
input_value="Say hello",
tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.google_adk"},
metadata={
"agent_manifest": {
"description": "Test agent for ADK integration testing",
"framework": "Google ADK",
"instructions": "You are a helpful test agent. You can: "
"(1) call tools using the provided "
"functions, (2) execute Python code "
"blocks when they are provided to you. "
"When you see ```python code blocks, "
"execute them using your code execution "
"capability. Always be helpful and use "
"your available capabilities.",
"model": "gemini-2.5-pro",
"model_configuration": '{"arbitrary_types_allowed": true, "extra": "forbid"}',
"name": "test_agent",
"session_management": {"session_id": "test-session", "user_id": "test-user"},
"tools": [
{"description": "A tiny search tool stub.", "name": "search_docs"},
{"description": "Simple arithmetic tool.", "name": "multiply"},
],
"_dd": {
"agent_manifest": {
"description": "Test agent for ADK integration testing",
"framework": "Google ADK",
"instructions": "You are a helpful test agent. You can: "
"(1) call tools using the provided "
"functions, (2) execute Python code "
"blocks when they are provided to you. "
"When you see ```python code blocks, "
"execute them using your code execution "
"capability. Always be helpful and use "
"your available capabilities.",
"model": "gemini-2.5-pro",
"model_configuration": '{"arbitrary_types_allowed": true, "extra": "forbid"}',
"name": "test_agent",
"session_management": {"session_id": "test-session", "user_id": "test-user"},
"tools": [
{"description": "A tiny search tool stub.", "name": "search_docs"},
{"description": "Simple arithmetic tool.", "name": "multiply"},
],
}
}
},
output_value=mock.ANY,
Expand All @@ -163,25 +165,27 @@ def expected_llmobs_agent_span_event_with_tools(llmobs_event, agent_span, tool_s
input_value="Can you search for information about recurring revenue?",
tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.google_adk"},
metadata={
"agent_manifest": {
"description": "Test agent for ADK integration testing",
"framework": "Google ADK",
"instructions": "You are a helpful test agent. You can: "
"(1) call tools using the provided "
"functions, (2) execute Python code "
"blocks when they are provided to you. "
"When you see ```python code blocks, "
"execute them using your code execution "
"capability. Always be helpful and use "
"your available capabilities.",
"model": "gemini-2.5-pro",
"model_configuration": '{"arbitrary_types_allowed": true, "extra": "forbid"}',
"name": "test_agent",
"session_management": {"session_id": "test-session", "user_id": "test-user"},
"tools": [
{"description": "A tiny search tool stub.", "name": "search_docs"},
{"description": "Simple arithmetic tool.", "name": "multiply"},
],
"_dd": {
"agent_manifest": {
"description": "Test agent for ADK integration testing",
"framework": "Google ADK",
"instructions": "You are a helpful test agent. You can: "
"(1) call tools using the provided "
"functions, (2) execute Python code "
"blocks when they are provided to you. "
"When you see ```python code blocks, "
"execute them using your code execution "
"capability. Always be helpful and use "
"your available capabilities.",
"model": "gemini-2.5-pro",
"model_configuration": '{"arbitrary_types_allowed": true, "extra": "forbid"}',
"name": "test_agent",
"session_management": {"session_id": "test-session", "user_id": "test-user"},
"tools": [
{"description": "A tiny search tool stub.", "name": "search_docs"},
{"description": "Simple arithmetic tool.", "name": "multiply"},
],
}
}
},
output_value=mock.ANY,
Expand Down
14 changes: 8 additions & 6 deletions tests/contrib/langgraph/test_langgraph_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,11 @@ def test_agent_manifest_simple_graph(self, llmobs_events, agentic_graph_with_con
"tools": [],
}

assert agent_a_span["meta"]["metadata"]["agent_manifest"] == expected_agent_a_manifest
assert conditional_agent_span["meta"]["metadata"]["agent_manifest"] == expected_conditional_agent_manifest
assert agent_d_span["meta"]["metadata"]["agent_manifest"] == expected_agent_d_manifest
assert agent_a_span["meta"]["metadata"]["_dd"]["agent_manifest"] == expected_agent_a_manifest
assert (
conditional_agent_span["meta"]["metadata"]["_dd"]["agent_manifest"] == expected_conditional_agent_manifest
)
assert agent_d_span["meta"]["metadata"]["_dd"]["agent_manifest"] == expected_agent_d_manifest

@pytest.mark.skipif(
LANGGRAPH_VERSION < (0, 3, 21), reason="create_react_agent has full support after LangGraph 0.3.21"
Expand Down Expand Up @@ -372,7 +374,7 @@ def test_agent_manifest_from_create_react_agent(self, llmobs_events, agent_from_
"instructions": "You are a helpful assistant who talks with a Boston accent but is also very nice. You speak in full sentences with at least 15 words.", # noqa: E501
}

assert react_agent_span["meta"]["metadata"]["agent_manifest"] == expected_agent_manifest
assert react_agent_span["meta"]["metadata"]["_dd"]["agent_manifest"] == expected_agent_manifest

@pytest.mark.skipif(LANGGRAPH_VERSION < (0, 3, 22), reason="Agent names are only supported in LangGraph 0.3.22+")
def test_agent_manifest_populates_tools_from_tool_node(self, llmobs_events, custom_agent_with_tool_node):
Expand Down Expand Up @@ -403,7 +405,7 @@ def test_agent_manifest_populates_tools_from_tool_node(self, llmobs_events, cust
],
}

assert agent_span["meta"]["metadata"]["agent_manifest"] == expected_agent_manifest
assert agent_span["meta"]["metadata"]["_dd"]["agent_manifest"] == expected_agent_manifest

@pytest.mark.skipif(LANGGRAPH_VERSION < (0, 3, 22), reason="Agent names are only supported in LangGraph 0.3.22+")
def test_agent_manifest_different_recursion_limit(
Expand All @@ -415,7 +417,7 @@ def test_agent_manifest_different_recursion_limit(

agent_span = _find_span_by_name(llmobs_events, "agent")

assert agent_span["meta"]["metadata"]["agent_manifest"]["max_iterations"] == 100
assert agent_span["meta"]["metadata"]["_dd"]["agent_manifest"]["max_iterations"] == 100

@pytest.mark.skipif(LANGGRAPH_VERSION < (0, 3, 22), reason="Agent names are only supported in LangGraph 0.3.22+")
def test_agent_with_tool_calls_integrations_enabled(
Expand Down
2 changes: 1 addition & 1 deletion tests/contrib/openai_agents/test_openai_agents_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@


def _expected_agent_metadata(agent_name: str) -> dict:
return {"agent_manifest": AGENT_TO_EXPECTED_AGENT_MANIFEST[agent_name]}
return {"_dd": {"agent_manifest": AGENT_TO_EXPECTED_AGENT_MANIFEST[agent_name]}}


def _assert_expected_agent_run(
Expand Down
18 changes: 10 additions & 8 deletions tests/contrib/pydantic_ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ def expected_foo_tool():

def expected_agent_metadata(instructions=None, system_prompt=None, model_settings=None, tools=None) -> dict:
metadata = {
"agent_manifest": {
"framework": "PydanticAI",
"name": "test_agent",
"model": "gpt-4o",
"model_settings": model_settings,
"instructions": instructions,
"system_prompts": (system_prompt,) if system_prompt else (),
"tools": tools if tools is not None else [],
"_dd": {
"agent_manifest": {
"framework": "PydanticAI",
"name": "test_agent",
"model": "gpt-4o",
"model_settings": model_settings,
"instructions": instructions,
"system_prompts": (system_prompt,) if system_prompt else (),
"tools": tools if tools is not None else [],
}
}
}
return metadata
Expand Down