Skip to content

Commit 6126970

Browse files
authored
Fix condition diff output ordering and deduplication (daisy#488)
1 parent f2f769c commit 6126970

File tree

3 files changed

+82
-5
lines changed

3 files changed

+82
-5
lines changed

PythonScripts/audit_translations/parsers.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,19 @@ def normalize_match(value: Any) -> str:
296296
def normalize_xpath(value: str) -> str:
297297
return " ".join(value.split())
298298

299+
def dedup_list(values: List[str]) -> List[str]:
300+
"""
301+
Return a list without duplicates while preserving first-seen order.
302+
Originally, rule differences were stored as sets, losing their original order,
303+
which is not helpful and why it changed with the help of this function.
304+
305+
Example:
306+
>>> dedup_list(["if:a", "if:b", "if:a"])
307+
['if:a', 'if:b']
308+
"""
309+
310+
return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+)
311+
299312

300313
def extract_match_pattern(rule_data: Any) -> str:
301314
if isinstance(rule_data, dict):
@@ -403,8 +416,8 @@ def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> List[RuleDi
403416
translated_rule=translated_rule,
404417
diff_type='condition',
405418
description='Conditions differ',
406-
english_snippet=', '.join(sorted(en_set)) or '(none)',
407-
translated_snippet=', '.join(sorted(tr_set)) or '(none)'
419+
english_snippet=', '.join(dedup_list(en_conditions)) or '(none)',
420+
translated_snippet=', '.join(dedup_list(tr_conditions)) or '(none)'
408421
))
409422

410423
# Check variable differences

PythonScripts/audit_translations/tests/golden/jsonl/de.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@
158158
"issue_type": "rule_difference",
159159
"diff_type": "condition",
160160
"description": "Conditions differ",
161-
"english_snippet": "$Setting = 'Value', $Verbosity!='Terse', *[2][.='2'], parent::m:minus",
162-
"translated_snippet": "$Setting = 'Value', *[2][.='2'], parent::m:minus",
161+
"english_snippet": "$Verbosity!='Terse', $Setting = 'Value', parent::m:minus, *[2][.='2']",
162+
"translated_snippet": "$Setting = 'Value', parent::m:minus, *[2][.='2']",
163163
"untranslated_texts": [],
164164
"_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference."
165165
},

PythonScripts/audit_translations/tests/test_parsers.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
"""
22
Tests for parsers.py.
33
"""
4+
from typing import List
45

56
import pytest
67
from ruamel.yaml import YAML
78
from ruamel.yaml.scanner import ScannerError
89

9-
from ..dataclasses import RuleInfo
10+
from ..dataclasses import RuleInfo, RuleDifference
1011
from ..parsers import (
1112
diff_rules,
1213
extract_conditions,
@@ -383,6 +384,69 @@ def test_detects_condition_difference(self):
383384
diffs = diff_rules(en, tr)
384385
assert any(d.diff_type == "condition" for d in diffs)
385386

387+
def test_condition_snippet_preserves_rule_order(self):
388+
"""
389+
Condition snippets should preserve the order seen in each rule.
390+
Originally, alphabetical order was used, which is not very helpful.
391+
"""
392+
en = make_rule(
393+
"test",
394+
"mo",
395+
{
396+
"test": {
397+
"if": "condition_b",
398+
"then": [
399+
{
400+
"test": {
401+
"if": "condition_a",
402+
"then": [{"T": "x"}],
403+
}
404+
}
405+
],
406+
}
407+
},
408+
)
409+
tr = make_rule("test", "mo", {"if": "condition_c"})
410+
diffs: List[RuleDifference] = diff_rules(en, tr)
411+
cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0]
412+
assert cond_diff.english_snippet == "condition_b, condition_a"
413+
assert cond_diff.translated_snippet == "condition_c"
414+
415+
def test_condition_snippet_deduplicates_repeated_conditions(self):
416+
"""
417+
Repeated conditions should be shown once, in first-seen order.
418+
"""
419+
en = make_rule(
420+
"test",
421+
"mo",
422+
{
423+
"test": {
424+
"if": "condition_a",
425+
"then": [
426+
{
427+
"test": {
428+
"if": "condition_a",
429+
"then": [{"T": "x"}],
430+
}
431+
},
432+
{
433+
"test": {
434+
"if": "condition_b",
435+
"then": [{"T": "y"}],
436+
}
437+
},
438+
],
439+
}
440+
},
441+
)
442+
tr = make_rule("test", "mo", {"if": "condition_c"})
443+
diffs: List[RuleDifference] = diff_rules(en, tr)
444+
cond_diff: RuleDifference = [d for d in diffs if d.diff_type == "condition"][0]
445+
446+
# without deduplication, we'd have "condition_a" repeated.
447+
assert cond_diff.english_snippet == "condition_a, condition_b"
448+
assert cond_diff.translated_snippet == "condition_c"
449+
386450
def test_detects_missing_condition(self):
387451
"""Ensure detects missing condition."""
388452
en = make_rule("test", "mo", {"if": "condition1"})

0 commit comments

Comments
 (0)