Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ ddtrace/appsec/_ddwaf.cpp
ddtrace/appsec/include
ddtrace/appsec/share
ddtrace/profiling/collector/_task.c
ddtrace/profiling/collector/_fast_poisson.c
ddtrace/profiling/collector/_exception.c
ddtrace/profiling/_threading.c
ddtrace/profiling/collector/_traceback.c
ddtrace/profiling/collector/stack.c
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class Sample;
struct _frame;
typedef struct _frame PyFrameObject;
// NOLINTEND(bugprone-reserved-identifier)
struct _traceback;
typedef struct _traceback PyTracebackObject;

#ifdef __cplusplus
extern "C"
Expand Down Expand Up @@ -80,6 +82,7 @@ extern "C"
void ddup_push_local_root_span_id(Datadog::Sample* sample, uint64_t local_root_span_id);
void ddup_push_trace_type(Datadog::Sample* sample, std::string_view trace_type);
void ddup_push_exceptioninfo(Datadog::Sample* sample, std::string_view exception_type, int64_t count);
void ddup_push_exception_message(Datadog::Sample* sample, std::string_view exception_message);
void ddup_push_class_name(Datadog::Sample* sample, std::string_view class_name);
void ddup_push_gpu_device_name(Datadog::Sample*, std::string_view device_name);
void ddup_push_frame(Datadog::Sample* sample,
Expand All @@ -88,6 +91,7 @@ extern "C"
uint64_t address,
int64_t line);
void ddup_push_pyframes(Datadog::Sample* sample, PyFrameObject* frame);
void ddup_push_pytraceback(Datadog::Sample* sample, PyTracebackObject* tb);
void ddup_push_absolute_ns(Datadog::Sample* sample, int64_t timestamp_ns);
void ddup_push_monotonic_ns(Datadog::Sample* sample, int64_t monotonic_ns);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ intern_string(std::string_view s);
// to have spaces in the names.
#define EXPORTER_LABELS(X) \
X(exception_type, "exception type") \
X(exception_message, "exception message") \
X(thread_id, "thread id") \
X(thread_native_id, "thread native id") \
X(thread_name, "thread name") \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
struct _frame;
typedef struct _frame PyFrameObject;
// NOLINTEND(bugprone-reserved-identifier)
struct _traceback;
typedef struct _traceback PyTracebackObject;

namespace Datadog {

Expand Down Expand Up @@ -132,6 +134,7 @@ class Sample
bool push_local_root_span_id(uint64_t local_root_span_id);
bool push_trace_type(std::string_view trace_type);
bool push_exceptioninfo(std::string_view exception_type, int64_t count);
bool push_exception_message(std::string_view exception_message);
bool push_class_name(std::string_view class_name);
bool push_monotonic_ns(int64_t monotonic_ns);
bool push_absolute_ns(int64_t timestamp_ns);
Expand Down Expand Up @@ -163,6 +166,15 @@ class Sample
// released by this function.
void push_pyframes(PyFrameObject* frame);

// Push frames from a Python traceback chain to the sample.
// Walks tb -> tb_next (root->leaf) and pushes frames in leaf-to-root order,
// using tb_lineno for accurate exception site line numbers.
// Ownership: does not take ownership of `tb`; all code object references
// obtained via PyFrame_GetCode() are released internally.
// The GIL must be held when calling this function. Some of its operations,
// call Python APIs, such as PyFrame_GetCode()
void push_pytraceback(PyTracebackObject* tb);

// Flushes the current buffer, clearing it
bool flush_sample();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ ddup_push_exceptioninfo(Datadog::Sample* sample, // cppcheck-suppress unusedFunc
sample->push_exceptioninfo(exception_type, count);
}

void
ddup_push_exception_message(Datadog::Sample* sample,
std::string_view exception_message) // cppcheck-suppress unusedFunction
{
sample->push_exception_message(exception_message);
}

void
ddup_push_class_name(Datadog::Sample* sample, std::string_view class_name) // cppcheck-suppress unusedFunction
{
Expand Down Expand Up @@ -282,6 +289,12 @@ ddup_push_pyframes(Datadog::Sample* sample, PyFrameObject* frame) // cppcheck-su
sample->push_pyframes(frame);
}

void
ddup_push_pytraceback(Datadog::Sample* sample, PyTracebackObject* tb) // cppcheck-suppress unusedFunction
{
sample->push_pytraceback(tb);
}

void
ddup_push_absolute_ns(Datadog::Sample* sample, int64_t timestamp_ns) // cppcheck-suppress unusedFunction
{
Expand Down
71 changes: 71 additions & 0 deletions ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,70 @@ Datadog::Sample::incr_dropped_frames(size_t count)
dropped_frames += count;
}

void
Datadog::Sample::push_pytraceback(PyTracebackObject* tb)
{
/* Walk the Python traceback chain and push each frame to the sample.
* The chain goes from outermost (root) to innermost (leaf) via tb_next.
* We collect raw traceback pointers first, then extract frame info only
* for the frames we actually keep (up to max_nframes from the leaf end).
* Frames are pushed in leaf-to-root order to match the convention used
* by push_pyframes and the rest of the profiler.
* https://docs.python.org/3/reference/datamodel.html#traceback.tb_next */

PythonErrorRestorer error_restorer;

// First pass: collect raw traceback pointers root->leaf.
// These are borrowed references owned by the traceback chain, so no
// ref-counting is needed here.
std::vector<PyTracebackObject*> tb_nodes;

// Bias for bigger upfront allocations than multiple reallocations (Can revisit this with DOE)
tb_nodes.reserve(max_nframes);
for (; tb != nullptr; tb = reinterpret_cast<PyTracebackObject*>(tb->tb_next)) {
tb_nodes.push_back(tb);
}

// Second pass: iterate leaf->root (reverse), only extracting frame info
// for frames we will actually keep (up to max_nframes).
for (auto it = tb_nodes.rbegin(); it != tb_nodes.rend(); ++it) {
if (locations.size() >= max_nframes) {
dropped_frames += std::distance(it, tb_nodes.rend());
break;
}

PyTracebackObject* node = *it;

int lineno = node->tb_lineno;
if (lineno < 0) {
// In Python 3.12+, tb_lineno can be -1 (lazy). Resolve it through
// the Python property which calls PyCode_Addr2Line internally.
PyObject* lineno_obj = PyObject_GetAttrString(reinterpret_cast<PyObject*>(node), "tb_lineno");
if (lineno_obj != nullptr) {
lineno = PyLong_AsLong(lineno_obj);
Py_DECREF(lineno_obj);
if (lineno < 0) {
lineno = 0;
}
} else {
PyErr_Clear();
lineno = 0;
}
}

PyCodeObject* code = (node->tb_frame != nullptr) ? PyFrame_GetCode(node->tb_frame) : nullptr;
if (code != nullptr) {
#if defined(PY311_AND_LATER)
PyObject* name_obj = code->co_qualname ? code->co_qualname : code->co_name;
#else
PyObject* name_obj = code->co_name;
#endif
push_frame(unicode_to_string_view(name_obj), unicode_to_string_view(code->co_filename), 0, lineno);
Py_DECREF(code);
}
}
}

void
Datadog::Sample::push_frame(function_id function_id, uint64_t address, int64_t line)
{
Expand Down Expand Up @@ -570,6 +634,13 @@ Datadog::Sample::push_gpu_flops(int64_t size, int64_t count)
return false;
}

bool
Datadog::Sample::push_exception_message(std::string_view exception_message)
{
push_label(ExportLabelKey::exception_message, exception_message);
return true;
}

bool
Datadog::Sample::push_lock_name(std::string_view lock_name)
{
Expand Down
3 changes: 3 additions & 0 deletions ddtrace/internal/datadog/profiling/ddup/_ddup.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from types import FrameType
from types import TracebackType
from typing import Mapping
from typing import Optional
from typing import Union
Expand Down Expand Up @@ -44,8 +45,10 @@ class SampleHandle:
def push_class_name(self, class_name: StringType) -> None: ...
def push_cputime(self, value: int, count: int) -> None: ...
def push_exceptioninfo(self, exc_type: Union[None, bytes, str, type], count: int) -> None: ...
def push_exception_message(self, exception_message: StringType) -> None: ...
def push_frame(self, name: StringType, filename: StringType, address: int, line: int) -> None: ...
def push_pyframes(self, frame: FrameType) -> None: ...
def push_pytraceback(self, tb: TracebackType) -> None: ...
def push_gpu_device_name(self, device_name: StringType) -> None: ...
def push_gpu_flops(self, value: int, count: int) -> None: ...
def push_gpu_gputime(self, value: int, count: int) -> None: ...
Expand Down
41 changes: 40 additions & 1 deletion ddtrace/internal/datadog/profiling/ddup/_ddup.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ cdef extern from "sample.hpp" namespace "Datadog":
cdef extern from "ddup_interface.hpp":
ctypedef struct PyFrameObject:
pass
ctypedef struct PyTracebackObject:
pass

void ddup_config_env(string_view env)
void ddup_config_service(string_view service)
Expand Down Expand Up @@ -85,10 +87,12 @@ cdef extern from "ddup_interface.hpp":
void ddup_push_local_root_span_id(Sample *sample, uint64_t local_root_span_id)
void ddup_push_trace_type(Sample *sample, string_view trace_type)
void ddup_push_exceptioninfo(Sample *sample, string_view exception_type, int64_t count)
void ddup_push_exception_message(Sample *sample, string_view exception_message)
void ddup_push_class_name(Sample *sample, string_view class_name)
void ddup_push_gpu_device_name(Sample *sample, string_view device_name)
void ddup_push_frame(Sample *sample, string_view _name, string_view _filename, uint64_t address, int64_t line)
void ddup_push_pyframes(Sample *sample, PyFrameObject* frame)
void ddup_push_pytraceback(Sample *sample, PyTracebackObject* tb)
void ddup_push_monotonic_ns(Sample *sample, int64_t monotonic_ns)
void ddup_push_absolute_ns(Sample *sample, int64_t monotonic_ns)
void ddup_flush_sample(Sample *sample)
Expand Down Expand Up @@ -272,6 +276,18 @@ cdef call_ddup_push_exceptioninfo(Sample* sample, exception_name: StringType, ui
if utf8_data != NULL:
ddup_push_exceptioninfo(sample, string_view(utf8_data, utf8_size), count)

cdef call_ddup_push_exception_message(Sample* sample, exception_message: StringType):
if not exception_message:
return
if isinstance(exception_message, bytes):
ddup_push_exception_message(sample, string_view(<const char*>exception_message, len(exception_message)))
return
cdef const char* utf8_data
cdef Py_ssize_t utf8_size
utf8_data = PyUnicode_AsUTF8AndSize(exception_message, &utf8_size)
if utf8_data != NULL:
ddup_push_exception_message(sample, string_view(utf8_data, utf8_size))

cdef call_ddup_push_class_name(Sample* sample, class_name: StringType):
if not class_name:
return
Expand Down Expand Up @@ -492,6 +508,21 @@ cdef class SampleHandle:
frame_ptr = <PyFrameObject*>frame_obj
ddup_push_pyframes(self.ptr, frame_ptr)

def push_pytraceback(self, object tb) -> None:
cdef PyObject* tb_obj
cdef PyTracebackObject* tb_ptr

if self.ptr is not NULL and tb is not None:
# Validate that tb is actually a traceback object to avoid crashes
# from invalid casts (e.g., if tb contains a non-traceback object)
if not isinstance(tb, types.TracebackType):
return
# In Cython, 'tb' is already a PyObject*. Get the raw pointer.
tb_obj = <PyObject*>tb
# Cast to PyTracebackObject* - both are just pointers to the same memory
tb_ptr = <PyTracebackObject*>tb_obj
ddup_push_pytraceback(self.ptr, tb_ptr)

def push_threadinfo(self, thread_id: int, thread_native_id: int, thread_name: StringType) -> None:
if self.ptr is not NULL:
thread_id = thread_id if thread_id is not None else 0
Expand All @@ -517,11 +548,19 @@ cdef class SampleHandle:
if self.ptr is not NULL:
exc_name = None
if isinstance(exc_type, type):
exc_name = exc_type.__module__ + "." + exc_type.__name__
module = exc_type.__module__
exc_name = f"{module}.{exc_type.__name__}" if module else exc_type.__name__
else:
exc_name = exc_type
call_ddup_push_exceptioninfo(self.ptr, exc_name, clamp_to_uint64_unsigned(count))

def push_exception_message(self, exception_message: StringType) -> None:
if self.ptr is NULL:
return
if exception_message is None:
return
call_ddup_push_exception_message(self.ptr, exception_message)

def push_class_name(self, class_name: StringType) -> None:
if self.ptr is not NULL:
call_ddup_push_class_name(self.ptr, class_name)
Expand Down
41 changes: 41 additions & 0 deletions ddtrace/internal/settings/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,11 @@ def _validate_non_negative_int(value: int) -> None:
raise ValueError("value must be non negative")


def _validate_positive_int(value: int) -> None:
if value < 1:
raise ValueError("value must be >= 1")


class ProfilingConfigPytorch(DDConfig):
__item__ = __prefix__ = "pytorch"

Expand All @@ -401,12 +406,46 @@ class ProfilingConfigPytorch(DDConfig):
)


class ProfilingConfigException(DDConfig):
__item__ = __prefix__ = "exception"

enabled = DDConfig.v(
bool,
"enabled",
default=False,
help_type="Boolean",
help="Whether to enable the exception profiler",
)

sampling_interval = DDConfig.v(
int,
"sampling_interval",
default=100,
help_type="Integer",
validator=_validate_positive_int,
help=(
"Average number of exceptions between samples (uses Poisson distribution). "
"Lower values sample more frequently but add more overhead."
"This value must be >= 1."
),
)

collect_message = DDConfig.v(
bool,
"collect_message",
default=False,
help_type="Boolean",
help="Whether to collect exception messages, which can contain sensitive data.",
)


# Include all the sub-configs
ProfilingConfig.include(ProfilingConfigStack, namespace="stack")
ProfilingConfig.include(ProfilingConfigLock, namespace="lock")
ProfilingConfig.include(ProfilingConfigMemory, namespace="memory")
ProfilingConfig.include(ProfilingConfigHeap, namespace="heap")
ProfilingConfig.include(ProfilingConfigPytorch, namespace="pytorch")
ProfilingConfig.include(ProfilingConfigException, namespace="exception")

config = ProfilingConfig()
report_configuration(config)
Expand Down Expand Up @@ -454,6 +493,8 @@ def config_str(config) -> str:
configured_features.append("heap")
if config.pytorch.enabled:
configured_features.append("pytorch")
if config.exception.enabled:
configured_features.append("exception")
configured_features.append("exp_dd")
configured_features.append("CAP" + str(config.capture_pct))
configured_features.append("MAXF" + str(config.max_frames))
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/internal/settings/profiling.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class ProfilingConfig(DDConfig):
memory: ProfilingConfigMemory
heap: ProfilingConfigHeap
pytorch: ProfilingConfigPytorch
exception: ProfilingConfigException

class ProfilingConfigStack(DDConfig):
enabled: bool
Expand All @@ -46,6 +47,11 @@ class ProfilingConfigPytorch(DDConfig):
enabled: bool
events_limit: int

class ProfilingConfigException(DDConfig):
enabled: bool
sampling_interval: int
collect_message: bool

config: ProfilingConfig
ddup_failure_msg: Optional[str]
ddup_is_available: bool
Expand Down
21 changes: 21 additions & 0 deletions ddtrace/profiling/collector/_exception.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from types import CodeType
from typing import Optional

from ddtrace.profiling import collector

HAS_MONITORING: bool
MAX_EXCEPTION_MESSAGE_LEN: int

class ExceptionCollector(collector.Collector):
_sampling_interval: int
_collect_message: bool
_monitoring_registered: bool
def __init__(
self,
sampling_interval: Optional[int] = None,
collect_message: Optional[bool] = None,
) -> None: ...
def _start_service(self) -> None: ...
def _stop_service(self) -> None: ...

def _on_exception_handled(code: CodeType, instruction_offset: int, exception: BaseException) -> None: ...
Loading
Loading