1717from rich .table import Table
1818
1919from .dataclasses import RuleInfo , RuleDifference , ComparisonResult
20- from .parsers import parse_yaml_file , diff_rules
20+ from .parsers import parse_yaml_file , diff_rules , extract_structure_elements
2121console = Console ()
2222
2323
@@ -127,7 +127,7 @@ def merge_rules(base_rules: List[RuleInfo], region_rules: List[RuleInfo]) -> Lis
127127 if include_untranslated :
128128 for rule in translated_rules :
129129 if rule .has_untranslated_text and not rule .audit_ignore :
130- untranslated_text .append ((rule , rule .untranslated_keys ))
130+ untranslated_text .append ((rule , rule .untranslated_entries ))
131131
132132 # Find fine-grained differences in rules that exist in both files (skip if audit-ignore)
133133 rule_differences = []
@@ -157,16 +157,16 @@ def rule_label(rule: RuleInfo) -> str:
157157 return f"[cyan]{ escape (rule .name )} [/] [dim][{ escape (tag )} ][/]"
158158
159159
160- def print_rule_item (rule : RuleInfo , context : str = "" ):
161- console .print (f" [dim]•[/] { rule_label (rule )} [dim](line { rule . line_number } { context } )[/]" )
160+ def print_rule_item (rule : RuleInfo , issue_line : int , context : str = "" ):
161+ console .print (f" [dim]•[/] { rule_label (rule )} [dim](line { issue_line } { context } )[/]" )
162162
163163
164- def print_diff_item (diff : RuleDifference , verbose : bool = False ):
164+ def print_diff_item (diff : RuleDifference , line_en : int , line_tr : int , verbose : bool = False ):
165165 """Print a single rule difference"""
166166 rule = diff .english_rule
167167 console .print (
168168 f" [dim]•[/] { rule_label (rule )} "
169- f"[dim](line { rule . line_number } en, { diff . translated_rule . line_number } tr)[/]"
169+ f"[dim](line { line_en } en, { line_tr } tr)[/]"
170170 )
171171 console .print (f" [dim]{ diff .description } [/]" )
172172 if verbose :
@@ -181,11 +181,43 @@ def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict:
181181 "rule_name" : rule .name or "" ,
182182 "rule_tag" : rule .tag or "" ,
183183 "rule_key" : rule .key ,
184- "line_en" : None ,
185- "line_tr" : None ,
184+ "issue_line_en" : None ,
185+ "issue_line_tr" : None ,
186+ "rule_line_en" : None ,
187+ "rule_line_tr" : None ,
186188 }
187189
188190
191+ def first_structure_mismatch (
192+ english_tokens : List [str ],
193+ translated_tokens : List [str ],
194+ ) -> Tuple [Optional [str ], Optional [str ]]:
195+ min_len = min (len (english_tokens ), len (translated_tokens ))
196+ for idx in range (min_len ):
197+ if english_tokens [idx ] != translated_tokens [idx ]:
198+ return english_tokens [idx ], translated_tokens [idx ]
199+ if len (english_tokens ) > min_len :
200+ return english_tokens [min_len ], None
201+ if len (translated_tokens ) > min_len :
202+ return None , translated_tokens [min_len ]
203+ return None , None
204+
205+
206+ def resolve_issue_line (rule : RuleInfo , kind : str , token : Optional [str ] = None ) -> int :
207+ if kind == "match" :
208+ lines = rule .line_map .get ("match" , [])
209+ elif kind == "condition" :
210+ lines = rule .line_map .get ("condition" , [])
211+ elif kind == "variables" :
212+ lines = rule .line_map .get ("variables" , [])
213+ elif kind == "structure" and token :
214+ token_key = f"structure:{ token .rstrip (':' )} "
215+ lines = rule .line_map .get (token_key , [])
216+ else :
217+ lines = []
218+ return lines [0 ] if lines else rule .line_number
219+
220+
189221def collect_issues (
190222 result : ComparisonResult ,
191223 file_name : str ,
@@ -198,7 +230,8 @@ def collect_issues(
198230 issue .update (
199231 issue_type = "missing_rule" ,
200232 diff_type = "" ,
201- line_en = rule .line_number ,
233+ issue_line_en = rule .line_number ,
234+ rule_line_en = rule .line_number ,
202235 description = "Rule present in English but missing in translation" ,
203236 english_snippet = "" ,
204237 translated_snippet = "" ,
@@ -211,35 +244,49 @@ def collect_issues(
211244 issue .update (
212245 issue_type = "extra_rule" ,
213246 diff_type = "" ,
214- line_tr = rule .line_number ,
247+ issue_line_tr = rule .line_number ,
248+ rule_line_tr = rule .line_number ,
215249 description = "Rule present in translation but missing in English" ,
216250 english_snippet = "" ,
217251 translated_snippet = "" ,
218252 untranslated_texts = [],
219253 )
220254 issues .append (issue )
221255
222- for rule , texts in result .untranslated_text :
223- issue = issue_base (rule , file_name , language )
224- issue .update (
225- issue_type = "untranslated_text" ,
226- diff_type = "" ,
227- line_tr = rule .line_number ,
228- description = "Lowercase t/ot/ct keys indicate untranslated text" ,
229- english_snippet = "" ,
230- translated_snippet = "" ,
231- untranslated_texts = texts ,
232- )
233- issues .append (issue )
256+ for rule , entries in result .untranslated_text :
257+ for key , text , line in entries :
258+ issue = issue_base (rule , file_name , language )
259+ issue .update (
260+ issue_type = "untranslated_text" ,
261+ diff_type = "" ,
262+ issue_line_tr = line or rule .line_number ,
263+ rule_line_tr = rule .line_number ,
264+ description = "Lowercase t/ot/ct keys indicate untranslated text" ,
265+ english_snippet = "" ,
266+ translated_snippet = "" ,
267+ untranslated_texts = [text ],
268+ )
269+ issues .append (issue )
234270
235271 for diff in result .rule_differences :
236272 rule = diff .english_rule
237273 issue = issue_base (rule , file_name , language )
274+ if diff .diff_type == "structure" :
275+ en_tokens = extract_structure_elements (diff .english_rule .data )
276+ tr_tokens = extract_structure_elements (diff .translated_rule .data )
277+ en_token , tr_token = first_structure_mismatch (en_tokens , tr_tokens )
278+ issue_line_en = resolve_issue_line (diff .english_rule , "structure" , en_token )
279+ issue_line_tr = resolve_issue_line (diff .translated_rule , "structure" , tr_token )
280+ else :
281+ issue_line_en = resolve_issue_line (diff .english_rule , diff .diff_type )
282+ issue_line_tr = resolve_issue_line (diff .translated_rule , diff .diff_type )
238283 issue .update (
239284 issue_type = "rule_difference" ,
240285 diff_type = diff .diff_type ,
241- line_en = diff .english_rule .line_number ,
242- line_tr = diff .translated_rule .line_number ,
286+ issue_line_en = issue_line_en ,
287+ issue_line_tr = issue_line_tr ,
288+ rule_line_en = diff .english_rule .line_number ,
289+ rule_line_tr = diff .translated_rule .line_number ,
243290 description = diff .description ,
244291 english_snippet = diff .english_snippet ,
245292 translated_snippet = diff .translated_snippet ,
@@ -279,16 +326,18 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
279326 if result .missing_rules :
280327 console .print (f"\n [red]✗[/] [bold]Missing Rules[/] [[red]{ len (result .missing_rules )} [/]] [dim](in English but not in translation)[/]" )
281328 for rule in result .missing_rules :
282- print_rule_item (rule , context = " in English" )
329+ print_rule_item (rule , issue_line = rule . line_number , context = " in English" )
283330 issues += 1
284331
285332 if result .untranslated_text :
286- console .print (f"\n [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{ len (result .untranslated_text )} [/]] [dim](lowercase t/ot/ct keys)[/]" )
287- for rule , texts in result .untranslated_text :
288- print_rule_item (rule )
289- for text in texts :
333+ untranslated_count = sum (len (entries ) for _ , entries in result .untranslated_text )
334+ console .print (f"\n [yellow]⚠[/] [bold]Untranslated Text[/] [[yellow]{ untranslated_count } [/]] [dim](lowercase t/ot/ct keys)[/]" )
335+ for rule , entries in result .untranslated_text :
336+ for _ , text , line in entries :
337+ issue_line = line or rule .line_number
338+ print_rule_item (rule , issue_line = issue_line )
290339 console .print (f" [dim]→[/] [yellow]\" { escape (text )} \" [/]" )
291- issues += 1
340+ issues += 1
292341
293342 if result .rule_differences :
294343 total_diffs = len (result .rule_differences )
@@ -297,13 +346,22 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal
297346 f"[[magenta]{ total_diffs } [/]] [dim](structural differences between en and translation)[/]"
298347 )
299348 for diff in result .rule_differences :
300- print_diff_item (diff , verbose )
349+ if diff .diff_type == "structure" :
350+ en_tokens = extract_structure_elements (diff .english_rule .data )
351+ tr_tokens = extract_structure_elements (diff .translated_rule .data )
352+ en_token , tr_token = first_structure_mismatch (en_tokens , tr_tokens )
353+ line_en = resolve_issue_line (diff .english_rule , "structure" , en_token )
354+ line_tr = resolve_issue_line (diff .translated_rule , "structure" , tr_token )
355+ else :
356+ line_en = resolve_issue_line (diff .english_rule , diff .diff_type )
357+ line_tr = resolve_issue_line (diff .translated_rule , diff .diff_type )
358+ print_diff_item (diff , line_en = line_en , line_tr = line_tr , verbose = verbose )
301359 issues += 1
302360
303361 if result .extra_rules :
304362 console .print (f"\n [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{ len (result .extra_rules )} [/]] [dim](may be intentional)[/]" )
305363 for rule in result .extra_rules :
306- print_rule_item (rule )
364+ print_rule_item (rule , issue_line = rule . line_number )
307365 issues += 1
308366
309367 return issues
@@ -401,7 +459,7 @@ def audit_language(
401459 files_ok += 1
402460
403461 total_missing += len (result .missing_rules )
404- total_untranslated += len (result .untranslated_text )
462+ total_untranslated += sum ( len (entries ) for _ , entries in result .untranslated_text )
405463 total_extra += len (result .extra_rules )
406464 total_differences += len (result .rule_differences )
407465
0 commit comments