From 3a9e785029baa2a8ec52b8a69148761707d75057 Mon Sep 17 00:00:00 2001 From: Ghraven Date: Tue, 26 May 2026 09:27:48 +0800 Subject: [PATCH 1/5] fix: use utf-8 for local tracking text files --- burr/tracking/server/backend.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/burr/tracking/server/backend.py b/burr/tracking/server/backend.py index 1e1c27d3a..0dafd88f5 100644 --- a/burr/tracking/server/backend.py +++ b/burr/tracking/server/backend.py @@ -309,7 +309,7 @@ async def _load_project_annotations(self, project_id: str): annotations_path = self._get_annotation_path(project_id) annotations = [] if os.path.exists(annotations_path): - async with aiofiles.open(annotations_path) as f: + async with aiofiles.open(annotations_path, encoding="utf-8") as f: for line in await f.readlines(): annotations.append(AnnotationOut.parse_raw(line)) return annotations @@ -348,7 +348,7 @@ async def create_annotation( **annotation.dict(), ) annotations_path = self._get_annotation_path(project_id) - async with aiofiles.open(annotations_path, "a") as f: + async with aiofiles.open(annotations_path, "a", encoding="utf-8") as f: await f.write(annotation_out.json() + "\n") return annotation_out @@ -381,7 +381,7 @@ async def update_annotation( detail=f"Annotation: {annotation_id} from project: {project_id} not found", ) annotations_path = self._get_annotation_path(project_id) - async with aiofiles.open(annotations_path, "w") as f: + async with aiofiles.open(annotations_path, "w", encoding="utf-8") as f: for a in all_annotations: await f.write(a.json() + "\n") return annotation_out @@ -407,7 +407,7 @@ async def get_annotations( if not os.path.exists(annotation_path): return [] annotations = [] - async with aiofiles.open(annotation_path) as f: + async with aiofiles.open(annotation_path, encoding="utf-8") as f: for line in await f.readlines(): parsed = AnnotationOut.parse_raw(line) if ( @@ -530,7 +530,7 @@ async def get_application_logs( steps = Step.from_logs(lines) children = [] if os.path.exists(children_file): - async with aiofiles.open(children_file) as f: + async with aiofiles.open(children_file, encoding="utf-8") as f: str_children = await f.readlines() children = [ ChildApplicationModel.parse_obj(json.loads(item)) for item in str_children From 65837114de740f3bcce745542dcd109a7e59622b Mon Sep 17 00:00:00 2001 From: Ghraven Date: Wed, 27 May 2026 16:58:44 +0800 Subject: [PATCH 2/5] fix: read local tracking graph as utf-8 --- burr/tracking/server/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/burr/tracking/server/backend.py b/burr/tracking/server/backend.py index 0dafd88f5..5e9e1c771 100644 --- a/burr/tracking/server/backend.py +++ b/burr/tracking/server/backend.py @@ -521,7 +521,7 @@ async def get_application_logs( status_code=404, detail=f"Graph file for app: {app_id} from project: {project_id} not found", ) - async with aiofiles.open(graph_file) as f: + async with aiofiles.open(graph_file, encoding="utf-8") as f: str_graph = await f.read() collections.defaultdict(list) if os.path.exists(log_file): From 53c4365ba47418fcbc614abc8242de377dcf5d71 Mon Sep 17 00:00:00 2001 From: Ghraven Date: Fri, 29 May 2026 16:24:52 +0800 Subject: [PATCH 3/5] test: cover local backend utf-8 reads --- tests/tracking/test_local_tracking_client.py | 80 ++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/tracking/test_local_tracking_client.py b/tests/tracking/test_local_tracking_client.py index 6fe455434..66eaeb86f 100644 --- a/tests/tracking/test_local_tracking_client.py +++ b/tests/tracking/test_local_tracking_client.py @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. +import asyncio import json import os import uuid +from datetime import datetime from typing import Literal, Optional, Tuple import pytest @@ -37,6 +39,13 @@ ChildApplicationModel, EndEntryModel, EndSpanModel, + PointerModel, +) +from burr.tracking.server.backend import LocalBackend +from burr.tracking.server.schema import ( + AnnotationCreate, + AnnotationDataPointer, + AnnotationObservation, ) from burr.visibility import TracerFactory @@ -309,6 +318,77 @@ def test_fork_children_have_correct_partition_key(tmpdir): assert child.event_type == "fork" +def test_local_backend_reads_utf8_annotations_graph_and_children(tmpdir): + project_name = "test_local_backend_utf8" + app_id = "app-unicode" + partition_key = "partici\u00f3n-ni\u00f1a" + step_name = "an\u00e1lisis caf\u00e9" + tag = "ni\u00f1o" + note = "acci\u00f3n termin\u00f3 con \u00e9xito" + entrypoint = "inicio-caf\u00e9" + child_app_id = "hijo-ni\u00f1o" + child_partition_key = "clave-ni\u00f1a" + log_dir = os.path.join(tmpdir, "tracking") + project_dir = os.path.join(log_dir, project_name) + app_dir = os.path.join(project_dir, app_id) + os.makedirs(app_dir) + + backend = LocalBackend(path=log_dir) + + annotation = AnnotationCreate( + span_id="span-1", + step_name=step_name, + tags=["revision", tag], + observations=[ + AnnotationObservation( + data_fields={"note": note}, + thumbs_up_thumbs_down=True, + data_pointers=[ + AnnotationDataPointer( + type="state_field", + field_name="resultado_final", + span_id=None, + ) + ], + ) + ], + ) + + created_annotation = asyncio.run( + backend.create_annotation(annotation, project_name, partition_key, app_id, 1) + ) + annotations = asyncio.run( + backend.get_annotations(project_name, partition_key, app_id, step_sequence_id=1) + ) + + assert created_annotation.step_name == step_name + assert annotations[0].tags == ["revision", tag] + assert annotations[0].observations[0].data_fields["note"] == note + + application = ApplicationModel(entrypoint=entrypoint, actions=[], transitions=[]) + child = ChildApplicationModel( + child=PointerModel(app_id=child_app_id, sequence_id=2, partition_key=child_partition_key), + event_time=datetime.now(), + event_type="fork", + sequence_id=1, + ) + + with open(os.path.join(app_dir, LocalTrackingClient.GRAPH_FILENAME), "w", encoding="utf-8") as f: + f.write(application.model_dump_json()) + with open(os.path.join(app_dir, LocalTrackingClient.LOG_FILENAME), "w", encoding="utf-8"): + pass + with open( + os.path.join(app_dir, LocalTrackingClient.CHILDREN_FILENAME), "w", encoding="utf-8" + ) as f: + f.write(child.model_dump_json() + "\n") + + logs = asyncio.run(backend.get_application_logs(None, project_name, app_id, partition_key)) + + assert logs.application.entrypoint == entrypoint + assert logs.children[0].child.app_id == child_app_id + assert logs.children[0].child.partition_key == child_partition_key + + def test_multi_fork_tracking_client(tmpdir): """This is more of an end-to-end test. We shoudl probably break it out into smaller tests but the local tracking client being used as a persister is From b142647d0225843221e0ca5a3b18b0e0b1483f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 29 May 2026 08:49:10 -0300 Subject: [PATCH 4/5] test: assert utf-8 on local backend text opens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Harden the local-backend UTF-8 regression test so it catches missed call sites independently of the host's default encoding. A round-trip assertion alone passes on a UTF-8 host even if a text open drops encoding="utf-8". Wrap the backend's aiofiles.open and assert every text-mode (non-binary) open is explicitly UTF-8; binary opens (mode "rb") stay exempt. Also exercise non-ASCII (incl. CJK) content through annotations, graph.json and children.jsonl end to end. Signed-off-by: André Ahlert --- tests/tracking/test_local_tracking_client.py | 32 ++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/tracking/test_local_tracking_client.py b/tests/tracking/test_local_tracking_client.py index 66eaeb86f..a7f80a381 100644 --- a/tests/tracking/test_local_tracking_client.py +++ b/tests/tracking/test_local_tracking_client.py @@ -318,15 +318,33 @@ def test_fork_children_have_correct_partition_key(tmpdir): assert child.event_type == "fork" -def test_local_backend_reads_utf8_annotations_graph_and_children(tmpdir): +def test_local_backend_reads_utf8_annotations_graph_and_children(tmpdir, monkeypatch): + # Guard against missed call sites in a host-independent way: a round-trip + # assertion alone would pass on a UTF-8 default host even if a text open + # dropped encoding="utf-8". So wrap the backend's aiofiles.open and assert + # every text-mode (non-binary) open is explicitly UTF-8. Binary opens + # (log/metadata, mode "rb") are intentionally exempt. + import burr.tracking.server.backend as backend_module + + real_aiofiles_open = backend_module.aiofiles.open + + def _utf8_guarded_open(file, mode="r", *args, **kwargs): + if "b" not in mode: + assert ( + kwargs.get("encoding") == "utf-8" + ), f"text-mode open of {file} (mode={mode!r}) must pass encoding='utf-8'" + return real_aiofiles_open(file, mode, *args, **kwargs) + + monkeypatch.setattr(backend_module.aiofiles, "open", _utf8_guarded_open) + project_name = "test_local_backend_utf8" app_id = "app-unicode" partition_key = "partici\u00f3n-ni\u00f1a" - step_name = "an\u00e1lisis caf\u00e9" + step_name = "an\u00e1lisis caf\u00e9 \u65e5\u672c\u8a9e" tag = "ni\u00f1o" - note = "acci\u00f3n termin\u00f3 con \u00e9xito" - entrypoint = "inicio-caf\u00e9" - child_app_id = "hijo-ni\u00f1o" + note = "acci\u00f3n termin\u00f3 con \u00e9xito \u4f60\u597d" + entrypoint = "inicio-caf\u00e9-\u4e16\u754c" + child_app_id = "hijo-ni\u00f1o-\u6f22\u5b57" child_partition_key = "clave-ni\u00f1a" log_dir = os.path.join(tmpdir, "tracking") project_dir = os.path.join(log_dir, project_name) @@ -373,7 +391,9 @@ def test_local_backend_reads_utf8_annotations_graph_and_children(tmpdir): sequence_id=1, ) - with open(os.path.join(app_dir, LocalTrackingClient.GRAPH_FILENAME), "w", encoding="utf-8") as f: + with open( + os.path.join(app_dir, LocalTrackingClient.GRAPH_FILENAME), "w", encoding="utf-8" + ) as f: f.write(application.model_dump_json()) with open(os.path.join(app_dir, LocalTrackingClient.LOG_FILENAME), "w", encoding="utf-8"): pass From 97019739a2c7a4c275ea2a660271341d62335fa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 29 May 2026 10:41:35 -0300 Subject: [PATCH 5/5] fix(ci): run local backend utf-8 test with tracking-server deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new regression test imports burr.tracking.server.backend, which pulls aiofiles (tracking-server extra). The base test job only installed tracking-client, so the top-level server import broke collection and failed all test (3.9-3.12) jobs. - Make the server backend/schema imports lazy and gate the test with pytest.importorskip("aiofiles"), matching the tracking-server-s3 test idiom, so the module still collects with only tracking-client present. - Add the tracking-server extra to the base test job so aiofiles is available and the regression test actually runs across the matrix instead of being skipped everywhere. Signed-off-by: André Ahlert --- .github/workflows/python-package.yml | 2 +- tests/tracking/test_local_tracking_client.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 407db459c..8fd3a91e6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -69,7 +69,7 @@ jobs: - name: Install dependencies run: | - python -m pip install -e ".[tests,tracking-client,graphviz]" + python -m pip install -e ".[tests,tracking-client,tracking-server,graphviz]" - name: Run tests run: | diff --git a/tests/tracking/test_local_tracking_client.py b/tests/tracking/test_local_tracking_client.py index a7f80a381..96671d860 100644 --- a/tests/tracking/test_local_tracking_client.py +++ b/tests/tracking/test_local_tracking_client.py @@ -41,12 +41,6 @@ EndSpanModel, PointerModel, ) -from burr.tracking.server.backend import LocalBackend -from burr.tracking.server.schema import ( - AnnotationCreate, - AnnotationDataPointer, - AnnotationObservation, -) from burr.visibility import TracerFactory @@ -319,12 +313,22 @@ def test_fork_children_have_correct_partition_key(tmpdir): def test_local_backend_reads_utf8_annotations_graph_and_children(tmpdir, monkeypatch): + # Server backend imports require the tracking-server extra (aiofiles); skip + # cleanly where it is not installed so this module still collects with only + # tracking-client present. + pytest.importorskip("aiofiles") # Guard against missed call sites in a host-independent way: a round-trip # assertion alone would pass on a UTF-8 default host even if a text open # dropped encoding="utf-8". So wrap the backend's aiofiles.open and assert # every text-mode (non-binary) open is explicitly UTF-8. Binary opens # (log/metadata, mode "rb") are intentionally exempt. import burr.tracking.server.backend as backend_module + from burr.tracking.server.backend import LocalBackend + from burr.tracking.server.schema import ( + AnnotationCreate, + AnnotationDataPointer, + AnnotationObservation, + ) real_aiofiles_open = backend_module.aiofiles.open