fix compact-boundary cancel race and add tests

charlesvien · charlesvien · commit fec00b85c7e6 · 2026-06-07T17:33:13.000-07:00
diff --git a/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts b/packages/agent/src/adapters/claude/claude-agent.slash-command.test.ts
@@ -172,3 +172,61 @@ describe("ClaudeAcpAgent.prompt — early idle handling", () => {
     }
   });
 });
+
+describe("ClaudeAcpAgent.prompt — force-cancel backstop", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("returns 'cancelled' when the SDK never yields after interrupt (issue #680)", async () => {
+    const { agent } = makeAgent();
+    const sessionId = "s-wedged";
+    const query = installFakeSession(agent, sessionId);
+    // Simulate a wedged SDK: interrupt() resolves but never makes next() yield.
+    query.interrupt.mockImplementation(async () => {});
+    // Shrink the grace period so the backstop fires promptly under real timers.
+    (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs = 5;
+
+    const promptPromise = agent.prompt({
+      sessionId,
+      prompt: [{ type: "text", text: "do something slow" }],
+    });
+
+    // Let the loop reach `await query.next()`, which stays pending forever.
+    await new Promise((resolve) => setImmediate(resolve));
+
+    // Arms the backstop and calls the (no-op) interrupt; the timer must drive
+    // the loop to return rather than hanging on the wedged next().
+    await agent.cancel({ sessionId });
+
+    const result = await promptPromise;
+    expect(result.stopReason).toBe("cancelled");
+  });
+
+  it("clears the backstop timer on a healthy cancel (interrupt yields)", async () => {
+    const { agent } = makeAgent();
+    const sessionId = "s-healthy";
+    installFakeSession(agent, sessionId);
+    // Large grace so the test can only pass via the normal idle/done path, not
+    // the timer; the loop must clear the armed timer in its finally.
+    (agent as unknown as { forceCancelGraceMs: number }).forceCancelGraceMs =
+      50_000;
+
+    const promptPromise = agent.prompt({
+      sessionId,
+      prompt: [{ type: "text", text: "do something" }],
+    });
+    await new Promise((resolve) => setImmediate(resolve));
+
+    // The mock's default interrupt() resolves next() with done, so the loop
+    // returns through its normal path well before the 50s backstop.
+    await agent.cancel({ sessionId });
+
+    const result = await promptPromise;
+    expect(result.stopReason).toBe("cancelled");
+    expect(
+      (agent as unknown as { session: { forceCancelTimer?: unknown } }).session
+        .forceCancelTimer,
+    ).toBeUndefined();
+  });
+});
diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -486,7 +486,10 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     // force-cancel backstop armed in interrupt() aborts this controller.
     const cancelController = new AbortController();
     this.session.cancelController = cancelController;
-    const cancelled = new Promise<void>((resolve) => {
+    // Resolves when the backstop aborts the controller. Named distinctly from
+    // the `cancelled` boolean above (the queue-handoff result) to avoid two
+    // variables named `cancelled` in this method.
+    const cancelWake = new Promise<void>((resolve) => {
       cancelController.signal.addEventListener("abort", () => resolve(), {
         once: true,
       });
@@ -545,13 +548,19 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     try {
       while (true) {
         const nextMessage = this.session.query.next();
-        const next = await Promise.race([nextMessage, cancelled]);
+        const next = await Promise.race([nextMessage, cancelWake]);
         if (cancelController.signal.aborted) {
           // The SDK never yielded after interrupt() (e.g. a wedged TaskOutput
-          // block). Abandon the in-flight next() — swallowing any later
-          // rejection so it can't surface as an unhandled rejection — and honor
-          // the cancel per the ACP contract.
-          void nextMessage.catch(() => {});
+          // block). Abandon the in-flight next(); log any later rejection (an
+          // auth/process error the SDK threw at cancel time would otherwise be
+          // lost) but swallow it so it can't surface as an unhandled rejection,
+          // then honor the cancel per the ACP contract.
+          void nextMessage.catch((err) =>
+            this.logger.warn("in-flight query.next() rejected after cancel", {
+              sessionId: params.sessionId,
+              error: err instanceof Error ? err.message : String(err),
+            }),
+          );
           return {
             stopReason: "cancelled",
             _meta: this.session.interruptReason
@@ -599,10 +608,16 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
               // (context just dropped) and replaced within seconds by the next
               // result. `size` keeps coming from the gateway-learned window
               // (getContextUsage under-reports extended 1M windows).
-              const usedTokens = await fetchContextUsedTokens(
-                this.session.query,
-                this.logger,
-              );
+              // Race the control request against the force-cancel wake: the
+              // loop only observes cancelWake at its top, so a wedged
+              // getContextUsage() awaited here would otherwise re-introduce the
+              // exact hang the backstop exists to break (issue #680). On a
+              // forced cancel usedTokens is null and the next iteration returns
+              // "cancelled".
+              const usedTokens = await Promise.race([
+                fetchContextUsedTokens(this.session.query, this.logger),
+                cancelWake.then(() => null),
+              ]);
               lastAssistantTotalUsage = usedTokens ?? 0;
               promptReplayed = true;
               await this.client.sessionUpdate({
@@ -613,6 +628,8 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
                   size: lastContextWindowSize,
                 },
               });
+              // No break: intentionally falls through to handleSystemMessage so
+              // the COMPACT_BOUNDARY ext notification still fires.
             }
             if (message.subtype === "commands_changed") {
               // Mid-session command-list change (e.g. skills discovered as the
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from "vitest";
+import { stripMarkerTags } from "./sdk-to-acp";
+
+describe("stripMarkerTags", () => {
+  it("strips a single marker and keeps surrounding prose", () => {
+    expect(
+      stripMarkerTags("before<command-name>/model</command-name>after"),
+    ).toBe("beforeafter");
+  });
+
+  it("strips multiple different markers in one pass", () => {
+    const input =
+      "a<command-args>x</command-args>b<local-command-stdout>out</local-command-stdout>c";
+    expect(stripMarkerTags(input)).toBe("abc");
+  });
+
+  it("leaves text without markers unchanged", () => {
+    expect(stripMarkerTags("")).toBe("");
+    expect(stripMarkerTags("plain prose with < and > but no tags")).toBe(
+      "plain prose with < and > but no tags",
+    );
+  });
+
+  it("passes an unclosed opener through verbatim (dead-set path)", () => {
+    const input = "<command-name>no closing tag, prose continues";
+    expect(stripMarkerTags(input)).toBe(input);
+  });
+
+  it("does not treat an orphan closing tag as an opener", () => {
+    expect(
+      stripMarkerTags("</command-name>text<command-name>real</command-name>"),
+    ).toBe("</command-name>text");
+  });
+
+  it("matches the nearest closing tag for a repeated opener", () => {
+    // Lazy match: the first opener pairs with the first close, swallowing the
+    // inner opener and its text, exactly like the original `[\s\S]*?` regex.
+    expect(
+      stripMarkerTags(
+        "<command-name>outer<command-name>inner</command-name>trailing",
+      ),
+    ).toBe("trailing");
+  });
+
+  it("stays linear on pathological unclosed input", () => {
+    // A long run of openers with no close must not catastrophically backtrack.
+    const input = `${"<command-name>".repeat(20000)}tail`;
+    expect(stripMarkerTags(input)).toBe(input);
+  });
+});
diff --git a/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts b/packages/agent/src/adapters/claude/conversion/sdk-to-acp.ts
@@ -1009,7 +1009,8 @@ const LOCAL_COMMAND_MARKERS = [
 // Single-pass scanner that removes each `<tag>…</tag>` marker (matching the
 // nearest closing tag of the same name, like a lazy regex would) without the
 // catastrophic-backtracking risk of `[\s\S]*?` over pathological input.
-function stripMarkerTags(text: string): string {
+// Exported for unit testing.
+export function stripMarkerTags(text: string): string {
   const dead = new Set<string>();
   let result = "";
   let copiedUpTo = 0;
diff --git a/packages/agent/src/adapters/claude/session/models.test.ts b/packages/agent/src/adapters/claude/session/models.test.ts
@@ -122,6 +122,27 @@ describe("resolveModelPreference", () => {
     expect(resolveModelPreference("gpt-5", options)).toBeNull();
   });
 
+  it("does not inherit a cross-family match from the context hint alone", () => {
+    // `opus[1m]` must not resolve to a sonnet entry purely because both share
+    // the "1m" hint token, with no real family token matching (#731).
+    const sonnetOnly = [
+      { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
+    ];
+    expect(resolveModelPreference("opus[1m]", sonnetOnly)).toBeNull();
+  });
+
+  it("resolves a hinted alias to the right family when a family token matches", () => {
+    // Both entries carry the "1m" hint; the "opus" token must break the tie so
+    // the hint alone can't pull the match onto sonnet.
+    const withHints = [
+      { value: "claude-opus-4-8", name: "Claude Opus 4.8 (1M context)" },
+      { value: "claude-sonnet-4-6", name: "Claude Sonnet 4.6 (1M context)" },
+    ];
+    expect(resolveModelPreference("opus[1m]", withHints)).toBe(
+      "claude-opus-4-8",
+    );
+  });
+
   it("treats `best` and `default` as wildcards (no tokens contribute)", () => {
     expect(resolveModelPreference("best", options)).toBeNull();
     expect(resolveModelPreference("default", options)).toBeNull();
diff --git a/packages/agent/src/adapters/codex/models.test.ts b/packages/agent/src/adapters/codex/models.test.ts
@@ -1,8 +1,49 @@
+import type { SessionConfigOption } from "@agentclientprotocol/sdk";
 import { describe, expect, it } from "vitest";
-import { formatCodexModelName } from "./models";
+import { formatCodexModelName, modelIdFromConfigOptions } from "./models";
 
 describe("formatCodexModelName", () => {
   it("uses raw lowercase model ids", () => {
     expect(formatCodexModelName("GPT-5.5")).toBe("gpt-5.5");
   });
 });
+
+describe("modelIdFromConfigOptions", () => {
+  const modelOption = (currentValue: unknown): SessionConfigOption =>
+    ({
+      id: "model",
+      name: "Model",
+      type: "select",
+      category: "model",
+      currentValue,
+      options: [],
+    }) as unknown as SessionConfigOption;
+
+  it("returns the currentValue of the model-category option", () => {
+    expect(modelIdFromConfigOptions([modelOption("gpt-5.5-codex")])).toBe(
+      "gpt-5.5-codex",
+    );
+  });
+
+  it("ignores non-model categories", () => {
+    const modeOption = {
+      id: "mode",
+      name: "Mode",
+      type: "select",
+      category: "mode",
+      currentValue: "auto",
+      options: [],
+    } as unknown as SessionConfigOption;
+    expect(modelIdFromConfigOptions([modeOption])).toBeUndefined();
+  });
+
+  it("returns undefined when currentValue is not a string", () => {
+    expect(modelIdFromConfigOptions([modelOption(null)])).toBeUndefined();
+    expect(modelIdFromConfigOptions([modelOption(123)])).toBeUndefined();
+  });
+
+  it("returns undefined for null/undefined input", () => {
+    expect(modelIdFromConfigOptions(null)).toBeUndefined();
+    expect(modelIdFromConfigOptions(undefined)).toBeUndefined();
+  });
+});