Skip to content

Commit e3c5d1e

Browse files
feat: Enhance telemetry integration with OpenTelemetry and Azure Monitor for improved event tracking
1 parent d4a4e2c commit e3c5d1e

File tree

7 files changed

+386
-123
lines changed

7 files changed

+386
-123
lines changed

src/backend/api/api_routes.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from api.status_updates import app_connection_manager, close_connection
1515

1616
# Third-party
17-
# Azure Monitor OpenTelemetry integration is currently causing issues with OpenAI calls in process_batch_async, needs further investigation, commenting out for now
18-
# from azure.monitor.opentelemetry import configure_azure_monitor
19-
2017
from common.logger.app_logger import AppLogger
2118
from common.services.batch_service import BatchService
2219

@@ -40,21 +37,6 @@
4037
router = APIRouter()
4138
logger = AppLogger("APIRoutes")
4239

43-
# Check if the Application Insights Instrumentation Key is set in the environment variables
44-
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
45-
if instrumentation_key:
46-
# Configure Application Insights if the Instrumentation Key is found
47-
# commenting below line as configure_azure_monitor is causing issues with OpenAI calls in process_batch_async, needs further investigation
48-
# configure_azure_monitor(connection_string=instrumentation_key)
49-
logging.info(
50-
"Application Insights configured with the provided Instrumentation Key"
51-
)
52-
else:
53-
# Log a warning if the Instrumentation Key is not found
54-
logging.warning(
55-
"No Application Insights Instrumentation Key found. Skipping configuration"
56-
)
57-
5840

5941
def record_exception_to_trace(e):
6042
"""Record exception to the current OpenTelemetry trace span."""

src/backend/api/event_utils.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,41 @@
33
import os
44

55
# Third-party
6-
from azure.monitor.events.extension import track_event
6+
from opentelemetry import trace
7+
from opentelemetry.trace import Status, StatusCode
78

89
from dotenv import load_dotenv
910

1011
load_dotenv()
1112

1213

1314
def track_event_if_configured(event_name: str, event_data: dict):
15+
"""Track a custom event using OpenTelemetry.
16+
17+
This creates a span with the event name and adds the event data as attributes.
18+
The span will appear in Application Insights as a dependency with the event data.
19+
"""
1420
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
1521
if instrumentation_key:
16-
track_event(event_name, event_data)
22+
try:
23+
tracer = trace.get_tracer(__name__)
24+
with tracer.start_as_current_span(f"event:{event_name}") as span:
25+
# Set span kind to internal for custom events
26+
span.set_attribute("event.name", event_name)
27+
span.set_attribute("event.type", "custom")
28+
29+
# Add all event data as span attributes
30+
for key, value in event_data.items():
31+
# Convert value to string to ensure it's serializable
32+
span.set_attribute(f"event.{key}", str(value))
33+
34+
# Add event to the span (appears in Application Insights)
35+
span.add_event(event_name, attributes=event_data)
36+
span.set_status(Status(StatusCode.OK))
37+
38+
logging.debug(f"Tracked event: {event_name} with data: {event_data}")
39+
except Exception as e:
40+
logging.error(f"Failed to track event {event_name}: {e}")
1741
else:
1842
logging.warning(
1943
f"Skipping track_event for {event_name} as Application Insights is not configured"

src/backend/app.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
from api.api_routes import router as backend_router
77

8+
from azure.monitor.opentelemetry import configure_azure_monitor
9+
from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter, AzureMonitorTraceExporter
10+
811
from common.config.config import app_config
912
from common.logger.app_logger import AppLogger
1013

@@ -15,6 +18,14 @@
1518

1619
from helper.azure_credential_utils import get_azure_credential
1720

21+
from opentelemetry import trace
22+
from opentelemetry._logs import set_logger_provider
23+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
24+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
25+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
26+
from opentelemetry.sdk.trace import TracerProvider
27+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
28+
1829
from semantic_kernel.agents.azure_ai.azure_ai_agent import AzureAIAgent # pylint: disable=E0611
1930

2031
from sql_agents.agent_manager import clear_sql_agents, set_sql_agents
@@ -46,6 +57,11 @@
4657
for logger_name in AZURE_LOGGING_PACKAGES:
4758
logging.getLogger(logger_name).setLevel(getattr(logging, AZURE_PACKAGE_LOGGING_LEVEL, logging.WARNING))
4859

60+
# Suppress noisy OpenTelemetry and Azure Monitor logs
61+
# logging.getLogger("opentelemetry.sdk").setLevel(logging.ERROR)
62+
# logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
63+
# logging.getLogger("azure.monitor.opentelemetry.exporter.export._base").setLevel(logging.WARNING)
64+
4965
logger = AppLogger("app")
5066

5167
# Global variables for agents
@@ -119,6 +135,60 @@ def create_app() -> FastAPI:
119135
allow_headers=["*"],
120136
)
121137

138+
# Configure Azure Monitor and instrument FastAPI for OpenTelemetry
139+
# This must happen AFTER app creation but BEFORE route registration
140+
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
141+
if instrumentation_key:
142+
# SOLUTION: Use manual telemetry setup instead of configure_azure_monitor
143+
# This gives us precise control over what gets instrumented, avoiding interference
144+
# with Semantic Kernel's async generators while still tracking Azure SDK calls
145+
146+
# Set up Azure Monitor exporter for traces
147+
azure_trace_exporter = AzureMonitorTraceExporter(connection_string=instrumentation_key)
148+
149+
# Create a tracer provider and add the Azure Monitor exporter
150+
tracer_provider = TracerProvider()
151+
tracer_provider.add_span_processor(BatchSpanProcessor(azure_trace_exporter))
152+
153+
# Set the global tracer provider
154+
trace.set_tracer_provider(tracer_provider)
155+
156+
# Set up Azure Monitor exporter for logs (appears in traces table)
157+
azure_log_exporter = AzureMonitorLogExporter(connection_string=instrumentation_key)
158+
159+
# Create a logger provider and add the Azure Monitor exporter
160+
logger_provider = LoggerProvider()
161+
logger_provider.add_log_record_processor(BatchLogRecordProcessor(azure_log_exporter))
162+
set_logger_provider(logger_provider)
163+
164+
# Attach OpenTelemetry handler to Python's root logger
165+
handler = LoggingHandler(logger_provider=logger_provider)
166+
logging.getLogger().addHandler(handler)
167+
168+
# Instrument ONLY FastAPI for HTTP request/response tracing
169+
# This is safe because it only wraps HTTP handlers, not internal async operations
170+
FastAPIInstrumentor.instrument_app(
171+
app,
172+
excluded_urls="socket,ws", # Exclude WebSocket URLs to reduce noise
173+
tracer_provider=tracer_provider
174+
)
175+
176+
# Optional: Add manual spans in your code for Azure SDK operations using:
177+
# from opentelemetry import trace
178+
# tracer = trace.get_tracer(__name__)
179+
# with tracer.start_as_current_span("operation_name"):
180+
# # your Azure SDK call here
181+
182+
logger.logger.info("Application Insights configured with selective instrumentation")
183+
logger.logger.info("✓ FastAPI HTTP tracing enabled")
184+
logger.logger.info("✓ Python logging export to Application Insights enabled")
185+
logger.logger.info("✓ Manual span support enabled for Azure SDK operations")
186+
logger.logger.info("✓ Custom events via OpenTelemetry enabled")
187+
logger.logger.info("✓ Semantic Kernel async generators unaffected")
188+
else:
189+
logger.logger.warning("No Application Insights connection string found. Telemetry disabled.")
190+
191+
122192
# Include routers with /api prefix
123193
app.include_router(backend_router, prefix="/api", tags=["backend"])
124194
# app.include_router(agents_router, prefix="/api/agents", tags=["agents"])
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""Telemetry utilities for Application Insights integration."""
2+
3+
from common.telemetry.telemetry_helper import (
4+
add_span_attributes,
5+
get_tracer,
6+
trace_context,
7+
trace_operation,
8+
trace_sync_context,
9+
)
10+
11+
__all__ = [
12+
"trace_operation",
13+
"trace_context",
14+
"trace_sync_context",
15+
"get_tracer",
16+
"add_span_attributes",
17+
]
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
"""Helper utilities for adding telemetry spans to Azure SDK operations.
2+
3+
This module provides decorators and context managers for adding OpenTelemetry
4+
spans to Azure SDK calls (CosmosDB, Blob Storage, etc.) without interfering
5+
with Semantic Kernel's async generators.
6+
7+
Example usage:
8+
from common.telemetry.telemetry_helper import trace_operation
9+
10+
@trace_operation("cosmosdb_query")
11+
async def query_items(self, query: str):
12+
# Your CosmosDB query here
13+
pass
14+
"""
15+
16+
import functools
17+
from contextlib import asynccontextmanager, contextmanager
18+
from typing import Any, Optional
19+
20+
from opentelemetry import trace
21+
from opentelemetry.trace import Status, StatusCode
22+
23+
24+
def get_tracer(name: str = __name__):
25+
"""Get a tracer instance for the given name."""
26+
return trace.get_tracer(name)
27+
28+
29+
def trace_operation(operation_name: str, attributes: Optional[dict] = None):
30+
"""Decorator to add telemetry span to a function or method.
31+
32+
Args:
33+
operation_name: Name of the operation for the span
34+
attributes: Optional dictionary of attributes to add to the span
35+
36+
Example:
37+
@trace_operation("batch_processing", {"service": "sql_agents"})
38+
async def process_batch(batch_id: str):
39+
# Your code here
40+
pass
41+
"""
42+
def decorator(func):
43+
@functools.wraps(func)
44+
async def async_wrapper(*args, **kwargs):
45+
tracer = get_tracer(func.__module__)
46+
with tracer.start_as_current_span(operation_name) as span:
47+
# Add custom attributes if provided
48+
if attributes:
49+
for key, value in attributes.items():
50+
span.set_attribute(key, str(value))
51+
52+
# Add function arguments as attributes (optional, for debugging)
53+
span.set_attribute("function", func.__name__)
54+
55+
try:
56+
result = await func(*args, **kwargs)
57+
span.set_status(Status(StatusCode.OK))
58+
return result
59+
except Exception as e:
60+
span.record_exception(e)
61+
span.set_status(Status(StatusCode.ERROR, str(e)))
62+
raise
63+
64+
@functools.wraps(func)
65+
def sync_wrapper(*args, **kwargs):
66+
tracer = get_tracer(func.__module__)
67+
with tracer.start_as_current_span(operation_name) as span:
68+
if attributes:
69+
for key, value in attributes.items():
70+
span.set_attribute(key, str(value))
71+
72+
span.set_attribute("function", func.__name__)
73+
74+
try:
75+
result = func(*args, **kwargs)
76+
span.set_status(Status(StatusCode.OK))
77+
return result
78+
except Exception as e:
79+
span.record_exception(e)
80+
span.set_status(Status(StatusCode.ERROR, str(e)))
81+
raise
82+
83+
# Return appropriate wrapper based on function type
84+
if asyncio.iscoroutinefunction(func):
85+
return async_wrapper
86+
else:
87+
return sync_wrapper
88+
89+
return decorator
90+
91+
92+
@asynccontextmanager
93+
async def trace_context(operation_name: str, attributes: Optional[dict] = None):
94+
"""Async context manager for adding telemetry span to a code block.
95+
96+
Args:
97+
operation_name: Name of the operation for the span
98+
attributes: Optional dictionary of attributes to add to the span
99+
100+
Example:
101+
async with trace_context("cosmosdb_batch_query", {"batch_id": batch_id}):
102+
results = await database.query_items(query)
103+
# Your code here
104+
"""
105+
tracer = get_tracer()
106+
with tracer.start_as_current_span(operation_name) as span:
107+
if attributes:
108+
for key, value in attributes.items():
109+
span.set_attribute(key, str(value))
110+
111+
try:
112+
yield span
113+
span.set_status(Status(StatusCode.OK))
114+
except Exception as e:
115+
span.record_exception(e)
116+
span.set_status(Status(StatusCode.ERROR, str(e)))
117+
raise
118+
119+
120+
@contextmanager
121+
def trace_sync_context(operation_name: str, attributes: Optional[dict] = None):
122+
"""Sync context manager for adding telemetry span to a code block.
123+
124+
Args:
125+
operation_name: Name of the operation for the span
126+
attributes: Optional dictionary of attributes to add to the span
127+
128+
Example:
129+
with trace_sync_context("blob_upload", {"file_name": file_name}):
130+
blob_client.upload_blob(data)
131+
"""
132+
tracer = get_tracer()
133+
with tracer.start_as_current_span(operation_name) as span:
134+
if attributes:
135+
for key, value in attributes.items():
136+
span.set_attribute(key, str(value))
137+
138+
try:
139+
yield span
140+
span.set_status(Status(StatusCode.OK))
141+
except Exception as e:
142+
span.record_exception(e)
143+
span.set_status(Status(StatusCode.ERROR, str(e)))
144+
raise
145+
146+
147+
def add_span_attributes(attributes: dict):
148+
"""Add attributes to the current span.
149+
150+
Args:
151+
attributes: Dictionary of attributes to add
152+
153+
Example:
154+
add_span_attributes({"user_id": user_id, "batch_id": batch_id})
155+
"""
156+
span = trace.get_current_span()
157+
if span and span.is_recording():
158+
for key, value in attributes.items():
159+
span.set_attribute(key, str(value))
160+
161+
162+
import asyncio

src/backend/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,6 @@ opentelemetry-sdk==1.39.0
4949
opentelemetry-api==1.39.0
5050
opentelemetry-semantic-conventions==0.60b0
5151
opentelemetry-instrumentation==0.60b0
52+
opentelemetry-instrumentation-fastapi==0.60b0
5253
azure-monitor-opentelemetry==1.8.6
5354
azure-ai-projects==1.0.0

0 commit comments

Comments
 (0)