Skip to content

Commit 0593023

Browse files
Merge pull request #361 from microsoft/psl-abhar
feat: Implement OpenTelemetry telemetry for backend with Application Insights and Azure Monitor Private Link
2 parents f6915b1 + 6b768ae commit 0593023

File tree

9 files changed

+568
-161
lines changed

9 files changed

+568
-161
lines changed

infra/main.bicep

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,25 @@ module applicationInsights 'br/public:avm/res/insights/component:0.7.0' = if (en
291291
retentionInDays: 365
292292
kind: 'web'
293293
disableIpMasking: false
294-
disableLocalAuth: true
295294
flowType: 'Bluefield'
295+
// WAF aligned configuration for Private Networking - block public ingestion/query
296+
publicNetworkAccessForIngestion: enablePrivateNetworking ? 'Disabled' : 'Enabled'
297+
publicNetworkAccessForQuery: enablePrivateNetworking ? 'Disabled' : 'Enabled'
298+
}
299+
}
300+
301+
// ========== Data Collection Endpoint (DCE) ========== //
302+
// Required for Azure Monitor Private Link - provides private ingestion and configuration endpoints
303+
// Per: https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/private-link-configure
304+
module dataCollectionEndpoint 'br/public:avm/res/insights/data-collection-endpoint:0.5.0' = if (enablePrivateNetworking && enableMonitoring) {
305+
name: take('avm.res.insights.data-collection-endpoint.${solutionSuffix}', 64)
306+
params: {
307+
name: 'dce-${solutionSuffix}'
308+
location: location
309+
kind: 'Windows'
310+
publicNetworkAccess: 'Disabled'
311+
tags: allTags
312+
enableTelemetry: enableTelemetry
296313
}
297314
}
298315

@@ -320,6 +337,10 @@ var privateDnsZones = [
320337
'privatelink.vaultcore.azure.net'
321338
'privatelink.blob.${environment().suffixes.storage}'
322339
'privatelink.file.${environment().suffixes.storage}'
340+
'privatelink.monitor.azure.com' // Azure Monitor global endpoints (App Insights, DCE)
341+
'privatelink.oms.opinsights.azure.com' // Log Analytics OMS endpoints
342+
'privatelink.ods.opinsights.azure.com' // Log Analytics ODS ingestion endpoints
343+
'privatelink.agentsvc.azure-automation.net' // Agent service automation endpoints
323344
]
324345

325346
// DNS Zone Index Constants
@@ -331,6 +352,10 @@ var dnsZoneIndex = {
331352
keyVault: 4
332353
storageBlob: 5
333354
storageFile: 6
355+
monitor: 7
356+
oms: 8
357+
ods: 9
358+
agentSvc: 10
334359
}
335360

336361
// ===================================================
@@ -356,6 +381,76 @@ module avmPrivateDnsZones 'br/public:avm/res/network/private-dns-zone:0.8.0' = [
356381
}
357382
]
358383

384+
// ========== Azure Monitor Private Link Scope (AMPLS) ========== //
385+
// Step 1: Create AMPLS
386+
// Step 2: Connect Azure Monitor resources (LAW, Application Insights, DCE) to the AMPLS
387+
// Step 3: Connect AMPLS to a private endpoint with required DNS zones
388+
// Per: https://learn.microsoft.com/en-us/azure/azure-monitor/fundamentals/private-link-configure
389+
module azureMonitorPrivateLinkScope 'br/public:avm/res/insights/private-link-scope:0.6.0' = if (enablePrivateNetworking) {
390+
name: take('avm.res.insights.private-link-scope.${solutionSuffix}', 64)
391+
#disable-next-line no-unnecessary-dependson
392+
dependsOn: [logAnalyticsWorkspace, applicationInsights, dataCollectionEndpoint, virtualNetwork]
393+
params: {
394+
name: 'ampls-${solutionSuffix}'
395+
location: 'global'
396+
// Access mode: PrivateOnly ensures all ingestion and queries go through private link
397+
accessModeSettings: {
398+
ingestionAccessMode: 'PrivateOnly'
399+
queryAccessMode: 'PrivateOnly'
400+
}
401+
// Step 2: Connect Azure Monitor resources to the AMPLS as scoped resources
402+
scopedResources: concat([
403+
{
404+
name: 'scoped-law'
405+
linkedResourceId: logAnalyticsWorkspaceResourceId
406+
}
407+
], enableMonitoring ? [
408+
{
409+
name: 'scoped-appi'
410+
linkedResourceId: applicationInsights!.outputs.resourceId
411+
}
412+
{
413+
name: 'scoped-dce'
414+
linkedResourceId: dataCollectionEndpoint!.outputs.resourceId
415+
}
416+
] : [])
417+
// Step 3: Connect AMPLS to a private endpoint
418+
// The private endpoint requires 5 DNS zones per documentation:
419+
// - privatelink.monitor.azure.com (App Insights + DCE global endpoints)
420+
// - privatelink.oms.opinsights.azure.com (Log Analytics OMS)
421+
// - privatelink.ods.opinsights.azure.com (Log Analytics ODS ingestion)
422+
// - privatelink.agentsvc.azure-automation.net (Agent service automation)
423+
// - privatelink.blob.core.windows.net (Agent solution packs storage)
424+
privateEndpoints: [
425+
{
426+
name: 'pep-ampls-${solutionSuffix}'
427+
subnetResourceId: virtualNetwork!.outputs.pepsSubnetResourceId
428+
privateDnsZoneGroup: {
429+
privateDnsZoneGroupConfigs: [
430+
{
431+
privateDnsZoneResourceId: avmPrivateDnsZones[dnsZoneIndex.monitor]!.outputs.resourceId
432+
}
433+
{
434+
privateDnsZoneResourceId: avmPrivateDnsZones[dnsZoneIndex.oms]!.outputs.resourceId
435+
}
436+
{
437+
privateDnsZoneResourceId: avmPrivateDnsZones[dnsZoneIndex.ods]!.outputs.resourceId
438+
}
439+
{
440+
privateDnsZoneResourceId: avmPrivateDnsZones[dnsZoneIndex.agentSvc]!.outputs.resourceId
441+
}
442+
{
443+
privateDnsZoneResourceId: avmPrivateDnsZones[dnsZoneIndex.storageBlob]!.outputs.resourceId
444+
}
445+
]
446+
}
447+
}
448+
]
449+
tags: allTags
450+
enableTelemetry: enableTelemetry
451+
}
452+
}
453+
359454
// Azure Bastion Host
360455
var bastionHostName = 'bas-${solutionSuffix}'
361456
module bastionHost 'br/public:avm/res/network/bastion-host:0.8.0' = if (enablePrivateNetworking) {
@@ -437,6 +532,7 @@ module windowsVmDataCollectionRules 'br/public:avm/res/insights/data-collection-
437532
location: dataCollectionRulesLocation
438533
dataCollectionRuleProperties: {
439534
kind: 'Windows'
535+
dataCollectionEndpointResourceId: dataCollectionEndpoint!.outputs.resourceId
440536
dataSources: {
441537
performanceCounters: [
442538
{
@@ -495,26 +591,6 @@ module windowsVmDataCollectionRules 'br/public:avm/res/insights/data-collection-
495591
name: 'perfCounterDataSource60'
496592
}
497593
]
498-
windowsEventLogs: [
499-
{
500-
name: 'SecurityAuditEvents'
501-
streams: [
502-
'Microsoft-WindowsEvent'
503-
]
504-
eventLogName: 'Security'
505-
eventTypes: [
506-
{
507-
eventType: 'Audit Success'
508-
}
509-
{
510-
eventType: 'Audit Failure'
511-
}
512-
]
513-
xPathQueries: [
514-
'Security!*[System[(EventID=4624 or EventID=4625)]]'
515-
]
516-
}
517-
]
518594
}
519595
destinations: {
520596
logAnalytics: [
@@ -532,8 +608,6 @@ module windowsVmDataCollectionRules 'br/public:avm/res/insights/data-collection-
532608
destinations: [
533609
'la-${dataCollectionRulesResourceName}'
534610
]
535-
transformKql: 'source'
536-
outputStream: 'Microsoft-Perf'
537611
}
538612
]
539613
}

src/backend/api/api_routes.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
# Standard library
44
import asyncio
55
import io
6-
import logging
7-
import os
86
import zipfile
97
from typing import Optional
108

@@ -14,9 +12,6 @@
1412
from api.status_updates import app_connection_manager, close_connection
1513

1614
# Third-party
17-
# Azure Monitor OpenTelemetry integration is currently causing issues with OpenAI calls in process_batch_async, needs further investigation, commenting out for now
18-
# from azure.monitor.opentelemetry import configure_azure_monitor
19-
2015
from common.logger.app_logger import AppLogger
2116
from common.services.batch_service import BatchService
2217

@@ -40,21 +35,6 @@
4035
router = APIRouter()
4136
logger = AppLogger("APIRoutes")
4237

43-
# Check if the Application Insights Instrumentation Key is set in the environment variables
44-
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
45-
if instrumentation_key:
46-
# Configure Application Insights if the Instrumentation Key is found
47-
# commenting below line as configure_azure_monitor is causing issues with OpenAI calls in process_batch_async, needs further investigation
48-
# configure_azure_monitor(connection_string=instrumentation_key)
49-
logging.info(
50-
"Application Insights configured with the provided Instrumentation Key"
51-
)
52-
else:
53-
# Log a warning if the Instrumentation Key is not found
54-
logging.warning(
55-
"No Application Insights Instrumentation Key found. Skipping configuration"
56-
)
57-
5838

5939
def record_exception_to_trace(e):
6040
"""Record exception to the current OpenTelemetry trace span."""

src/backend/api/event_utils.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,69 @@
33
import os
44

55
# Third-party
6-
from azure.monitor.events.extension import track_event
6+
from applicationinsights import TelemetryClient
7+
from applicationinsights.channel import SynchronousQueue, SynchronousSender, TelemetryChannel
78

89
from dotenv import load_dotenv
910

1011
load_dotenv()
1112

13+
# Global telemetry client (initialized once)
14+
_telemetry_client = None
15+
16+
17+
def _get_telemetry_client():
18+
"""Get or create the Application Insights telemetry client."""
19+
global _telemetry_client
20+
21+
if _telemetry_client is None:
22+
connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
23+
if connection_string:
24+
try:
25+
# Extract instrumentation key from connection string
26+
# Format: InstrumentationKey=xxx;IngestionEndpoint=https://...
27+
parts = dict(part.split('=', 1) for part in connection_string.split(';') if '=' in part)
28+
instrumentation_key = parts.get('InstrumentationKey')
29+
30+
if instrumentation_key:
31+
# Create a synchronous channel for immediate sending
32+
sender = SynchronousSender()
33+
queue = SynchronousQueue(sender)
34+
channel = TelemetryChannel(None, queue)
35+
36+
_telemetry_client = TelemetryClient(instrumentation_key, channel)
37+
logging.info("Application Insights TelemetryClient initialized successfully")
38+
else:
39+
logging.error("Could not extract InstrumentationKey from connection string")
40+
except Exception as e:
41+
logging.error(f"Failed to initialize TelemetryClient: {e}")
42+
43+
return _telemetry_client
44+
1245

1346
def track_event_if_configured(event_name: str, event_data: dict):
47+
"""Track a custom event to Application Insights customEvents table.
48+
49+
This uses the Application Insights SDK TelemetryClient which properly
50+
sends custom events to the customEvents table in Application Insights.
51+
"""
1452
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
1553
if instrumentation_key:
16-
track_event(event_name, event_data)
54+
try:
55+
client = _get_telemetry_client()
56+
if client:
57+
# Convert all values to strings to ensure compatibility
58+
properties = {k: str(v) for k, v in event_data.items()}
59+
60+
# Track the custom event
61+
client.track_event(event_name, properties=properties)
62+
client.flush() # Ensure immediate sending
63+
64+
logging.debug(f"Tracked custom event: {event_name} with data: {event_data}")
65+
else:
66+
logging.warning("TelemetryClient not available, custom event not tracked")
67+
except Exception as e:
68+
logging.error(f"Failed to track event {event_name}: {e}")
1769
else:
1870
logging.warning(
1971
f"Skipping track_event for {event_name} as Application Insights is not configured"

src/backend/app.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from api.api_routes import router as backend_router
77

8+
from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter, AzureMonitorTraceExporter
9+
810
from common.config.config import app_config
911
from common.logger.app_logger import AppLogger
1012

@@ -15,6 +17,14 @@
1517

1618
from helper.azure_credential_utils import get_azure_credential
1719

20+
from opentelemetry import trace
21+
from opentelemetry._logs import set_logger_provider
22+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
23+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
24+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
25+
from opentelemetry.sdk.trace import TracerProvider
26+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
27+
1828
from semantic_kernel.agents.azure_ai.azure_ai_agent import AzureAIAgent # pylint: disable=E0611
1929

2030
from sql_agents.agent_manager import clear_sql_agents, set_sql_agents
@@ -46,6 +56,11 @@
4656
for logger_name in AZURE_LOGGING_PACKAGES:
4757
logging.getLogger(logger_name).setLevel(getattr(logging, AZURE_PACKAGE_LOGGING_LEVEL, logging.WARNING))
4858

59+
# Suppress noisy OpenTelemetry and Azure Monitor logs
60+
# logging.getLogger("opentelemetry.sdk").setLevel(logging.ERROR)
61+
# logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
62+
# logging.getLogger("azure.monitor.opentelemetry.exporter.export._base").setLevel(logging.WARNING)
63+
4964
logger = AppLogger("app")
5065

5166
# Global variables for agents
@@ -119,6 +134,59 @@ def create_app() -> FastAPI:
119134
allow_headers=["*"],
120135
)
121136

137+
# Configure Azure Monitor and instrument FastAPI for OpenTelemetry
138+
# This must happen AFTER app creation but BEFORE route registration
139+
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
140+
if instrumentation_key:
141+
# SOLUTION: Use manual telemetry setup instead of configure_azure_monitor
142+
# This gives us precise control over what gets instrumented, avoiding interference
143+
# with Semantic Kernel's async generators while still tracking Azure SDK calls
144+
145+
# Set up Azure Monitor exporter for traces
146+
azure_trace_exporter = AzureMonitorTraceExporter(connection_string=instrumentation_key)
147+
148+
# Create a tracer provider and add the Azure Monitor exporter
149+
tracer_provider = TracerProvider()
150+
tracer_provider.add_span_processor(BatchSpanProcessor(azure_trace_exporter))
151+
152+
# Set the global tracer provider
153+
trace.set_tracer_provider(tracer_provider)
154+
155+
# Set up Azure Monitor exporter for logs (appears in traces table)
156+
azure_log_exporter = AzureMonitorLogExporter(connection_string=instrumentation_key)
157+
158+
# Create a logger provider and add the Azure Monitor exporter
159+
logger_provider = LoggerProvider()
160+
logger_provider.add_log_record_processor(BatchLogRecordProcessor(azure_log_exporter))
161+
set_logger_provider(logger_provider)
162+
163+
# Attach OpenTelemetry handler to Python's root logger
164+
handler = LoggingHandler(logger_provider=logger_provider)
165+
logging.getLogger().addHandler(handler)
166+
167+
# Instrument ONLY FastAPI for HTTP request/response tracing
168+
# This is safe because it only wraps HTTP handlers, not internal async operations
169+
FastAPIInstrumentor.instrument_app(
170+
app,
171+
excluded_urls="socket,ws", # Exclude WebSocket URLs to reduce noise
172+
tracer_provider=tracer_provider
173+
)
174+
175+
# Optional: Add manual spans in your code for Azure SDK operations using:
176+
# from opentelemetry import trace
177+
# tracer = trace.get_tracer(__name__)
178+
# with tracer.start_as_current_span("operation_name"):
179+
# # your Azure SDK call here
180+
181+
logger.logger.info("Application Insights configured with selective instrumentation")
182+
logger.logger.info("✓ FastAPI HTTP tracing enabled")
183+
logger.logger.info("✓ Python logging export to Application Insights enabled")
184+
logger.logger.info("✓ Manual span support enabled for Azure SDK operations")
185+
logger.logger.info("✓ Custom events via OpenTelemetry enabled")
186+
logger.logger.info("✓ Semantic Kernel async generators unaffected")
187+
else:
188+
logger.logger.warning("No Application Insights connection string found. Telemetry disabled.")
189+
122190
# Include routers with /api prefix
123191
app.include_router(backend_router, prefix="/api", tags=["backend"])
124192
# app.include_router(agents_router, prefix="/api/agents", tags=["agents"])
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""Telemetry utilities for Application Insights integration."""
2+
3+
from common.telemetry.telemetry_helper import (
4+
add_span_attributes,
5+
get_tracer,
6+
trace_context,
7+
trace_operation,
8+
trace_sync_context,
9+
)
10+
11+
__all__ = [
12+
"trace_operation",
13+
"trace_context",
14+
"trace_sync_context",
15+
"get_tracer",
16+
"add_span_attributes",
17+
]

0 commit comments

Comments
 (0)