Skip to content

Commit 2287cbb

Browse files
fix(core): use byte offsets for position reporting in raw-scoped script rules
Script rules with `scope: raw` return begin/end byte offsets in their match arrays, but AddAlert ignores these and performs a text search via FindLoc/initialPosition to determine the alert position. When the matched text appears multiple times in the document, this always reports the position of the first occurrence rather than the intended one. Add locFromByteOffset() to compute line:column directly from the byte offsets the script provides, bypassing the text-search path. The new path activates when the alert carries valid byte offsets within a raw-scope block, falling back to the existing FindLoc path otherwise. Relates to #869, #272. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 034c5f1 commit 2287cbb

File tree

1 file changed

+49
-11
lines changed

1 file changed

+49
-11
lines changed

internal/core/file.go

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,32 @@ func (f *File) assignLoc(ctx string, blk nlp.Block, pad int, a Alert) (int, []in
252252
return blk.Line + 1, a.Span
253253
}
254254

255+
// locFromByteOffset computes a 1-based line number and a [col, col+len] span
256+
// from absolute byte offsets into the raw document text. This avoids the
257+
// text-search approach used by FindLoc/initialPosition, which can report the
258+
// wrong location when the matched text appears more than once.
259+
func locFromByteOffset(ctx string, begin, end, pad int) (int, []int) {
260+
line := 1
261+
lineStart := 0
262+
263+
for i := 0; i < begin && i < len(ctx); i++ {
264+
if ctx[i] == '\n' {
265+
line++
266+
lineStart = i + 1
267+
}
268+
}
269+
270+
col := nlp.StrLen(ctx[lineStart:begin]) + 1 + pad
271+
matchLen := nlp.StrLen(ctx[begin:end])
272+
273+
span := []int{col, col + matchLen - 1}
274+
if span[1] <= 0 {
275+
span[1] = 1
276+
}
277+
278+
return line, span
279+
}
280+
255281
// SetText updates the file's content, lines, and history.
256282
func (f *File) SetText(s string) {
257283
f.Content = s
@@ -271,19 +297,31 @@ func (f *File) AddAlert(a Alert, blk nlp.Block, lines, pad int, lookup bool) {
271297
ctx = old
272298
}
273299

274-
// NOTE: If the `ctx` document is large (as could be the case with
275-
// `scope: raw`) this is *slow*. Thus, the cap at 1k.
300+
// When we have a raw-scope block (Context == Text) and the alert carries
301+
// byte offsets that fall within the document, compute line:column directly
302+
// from those offsets instead of performing a text search. This fixes
303+
// incorrect position reporting for script rules with `scope: raw` when
304+
// the matched text appears more than once.
276305
//
277-
// TODO: Actually fix this.
278-
if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 {
279-
a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...)
280-
}
306+
// We use blk.Context (the original document) rather than ctx, which may
307+
// have been modified by ChkToCtx substitutions from earlier alerts.
308+
if lookup && blk.Context == blk.Text && a.Span[0] >= 0 && a.Span[1] <= len(blk.Context) {
309+
a.Line, a.Span = locFromByteOffset(blk.Context, a.Span[0], a.Span[1], pad)
310+
} else {
311+
// NOTE: If the `ctx` document is large (as could be the case with
312+
// `scope: raw`) this is *slow*. Thus, the cap at 1k.
313+
//
314+
// TODO: Actually fix this.
315+
if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 {
316+
a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...)
317+
}
281318

282-
if !lookup {
283-
a.Line, a.Span = f.assignLoc(ctx, blk, pad, a)
284-
}
285-
if (!lookup && a.Span[0] < 0) || lookup {
286-
a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a)
319+
if !lookup {
320+
a.Line, a.Span = f.assignLoc(ctx, blk, pad, a)
321+
}
322+
if (!lookup && a.Span[0] < 0) || lookup {
323+
a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a)
324+
}
287325
}
288326

289327
if a.Span[0] > 0 {

0 commit comments

Comments
 (0)