Skip to content

Commit 2694008

Browse files
committed
fix(integrations): Anthropic: add content transformation for images and documents
1 parent 3d3ce5b commit 2694008

File tree

2 files changed

+840
-3
lines changed

2 files changed

+840
-3
lines changed

sentry_sdk/integrations/anthropic.py

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,104 @@ def _collect_ai_data(
120120
return model, input_tokens, output_tokens, content_blocks
121121

122122

123+
def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any]":
124+
"""
125+
Transform an Anthropic content block to a Sentry-compatible format.
126+
127+
Handles binary data (images, documents) by converting them to the standardized format:
128+
- base64 encoded data -> type: "blob"
129+
- URL references -> type: "uri"
130+
- file_id references -> type: "file"
131+
"""
132+
block_type = content_block.get("type")
133+
134+
# Handle image blocks
135+
if block_type == "image":
136+
source = content_block.get("source", {})
137+
source_type = source.get("type")
138+
media_type = source.get("media_type", "")
139+
140+
if source_type == "base64":
141+
return {
142+
"type": "blob",
143+
"modality": "image",
144+
"mime_type": media_type,
145+
"content": source.get("data", ""),
146+
}
147+
elif source_type == "url":
148+
return {
149+
"type": "uri",
150+
"modality": "image",
151+
"mime_type": media_type,
152+
"uri": source.get("url", ""),
153+
}
154+
elif source_type == "file":
155+
return {
156+
"type": "file",
157+
"modality": "image",
158+
"mime_type": media_type,
159+
"file_id": source.get("file_id", ""),
160+
}
161+
162+
# Handle document blocks (PDFs, etc.)
163+
elif block_type == "document":
164+
source = content_block.get("source", {})
165+
source_type = source.get("type")
166+
media_type = source.get("media_type", "")
167+
168+
if source_type == "base64":
169+
return {
170+
"type": "blob",
171+
"modality": "document",
172+
"mime_type": media_type,
173+
"content": source.get("data", ""),
174+
}
175+
elif source_type == "url":
176+
return {
177+
"type": "uri",
178+
"modality": "document",
179+
"mime_type": media_type,
180+
"uri": source.get("url", ""),
181+
}
182+
elif source_type == "file":
183+
return {
184+
"type": "file",
185+
"modality": "document",
186+
"mime_type": media_type,
187+
"file_id": source.get("file_id", ""),
188+
}
189+
elif source_type == "text":
190+
# Plain text documents - keep as is but mark the type
191+
return {
192+
"type": "text",
193+
"text": source.get("data", ""),
194+
}
195+
196+
# For text blocks and other types, return as-is
197+
return content_block
198+
199+
200+
def _transform_message_content(
201+
content: "Any",
202+
) -> "Any":
203+
"""
204+
Transform message content, handling both string content and list of content blocks.
205+
"""
206+
if isinstance(content, str):
207+
return content
208+
209+
if isinstance(content, (list, tuple)):
210+
transformed = []
211+
for block in content:
212+
if isinstance(block, dict):
213+
transformed.append(_transform_content_block(block))
214+
else:
215+
transformed.append(block)
216+
return transformed
217+
218+
return content
219+
220+
123221
def _set_input_data(
124222
span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration"
125223
) -> None:
@@ -164,19 +262,54 @@ def _set_input_data(
164262
and "content" in message
165263
and isinstance(message["content"], (list, tuple))
166264
):
265+
has_tool_result = False
266+
transformed_content = []
167267
for item in message["content"]:
168-
if item.get("type") == "tool_result":
268+
if isinstance(item, dict) and item.get("type") == "tool_result":
269+
has_tool_result = True
169270
normalized_messages.append(
170271
{
171272
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL,
172273
"content": { # type: ignore[dict-item]
173274
"tool_use_id": item.get("tool_use_id"),
174-
"output": item.get("content"),
275+
"output": _transform_message_content(
276+
item.get("content")
277+
),
175278
},
176279
}
177280
)
281+
else:
282+
# Transform content blocks (images, documents, etc.)
283+
transformed_content.append(
284+
_transform_content_block(item)
285+
if isinstance(item, dict)
286+
else item
287+
)
288+
289+
# If there are non-tool-result items, add them as a message
290+
if transformed_content and not has_tool_result:
291+
normalized_messages.append(
292+
{
293+
"role": message.get("role"),
294+
"content": transformed_content,
295+
}
296+
)
297+
elif transformed_content and has_tool_result:
298+
# Mixed content: tool results + other content
299+
normalized_messages.append(
300+
{
301+
"role": message.get("role"),
302+
"content": transformed_content,
303+
}
304+
)
178305
else:
179-
normalized_messages.append(message)
306+
# Transform content for non-list messages or assistant messages
307+
transformed_message = message.copy()
308+
if "content" in transformed_message:
309+
transformed_message["content"] = _transform_message_content(
310+
transformed_message["content"]
311+
)
312+
normalized_messages.append(transformed_message)
180313

181314
role_normalized_messages = normalize_message_roles(normalized_messages)
182315
scope = sentry_sdk.get_current_scope()

0 commit comments

Comments
 (0)