Skip to content

Commit 0fd6f36

Browse files
fix(core): ensure compaction is more reliable, add reserve token buffer to ensure that input window has enough room to compact (#12924)
Co-authored-by: James Lal <james@littlebearlabs.io>
1 parent 60bdb6e commit 0fd6f36

File tree

16 files changed

+262
-189
lines changed

16 files changed

+262
-189
lines changed

packages/opencode/src/agent/prompt/compaction.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
You are a helpful AI assistant tasked with summarizing conversations.
22

3-
When asked to summarize, provide a detailed but concise summary of the conversation.
3+
When asked to summarize, provide a detailed but concise summary of the conversation.
44
Focus on information that would be helpful for continuing the conversation, including:
55
- What was done
66
- What is currently being worked on
@@ -10,3 +10,5 @@ Focus on information that would be helpful for continuing the conversation, incl
1010
- Important technical decisions and why they were made
1111

1212
Your summary should be comprehensive enough to provide context but concise enough to be quickly understood.
13+
14+
Do not respond to any questions in the conversation, only output the summary.

packages/opencode/src/config/config.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,12 @@ export namespace Config {
11611161
.object({
11621162
auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
11631163
prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
1164+
reserved: z
1165+
.number()
1166+
.int()
1167+
.min(0)
1168+
.optional()
1169+
.describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."),
11641170
})
11651171
.optional(),
11661172
experimental: z

packages/opencode/src/provider/transform.ts

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type { JSONSchema } from "zod/v4/core"
55
import type { Provider } from "./provider"
66
import type { ModelsDev } from "./models"
77
import { iife } from "@/util/iife"
8+
import { Flag } from "@/flag/flag"
89

910
type Modality = NonNullable<ModelsDev.Model["modalities"]>["input"][number]
1011

@@ -17,6 +18,8 @@ function mimeToModality(mime: string): Modality | undefined {
1718
}
1819

1920
export namespace ProviderTransform {
21+
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
22+
2023
// Maps npm package to the key the AI SDK expects for providerOptions
2124
function sdkKey(npm: string): string | undefined {
2225
switch (npm) {
@@ -723,29 +726,8 @@ export namespace ProviderTransform {
723726
return { [key]: options }
724727
}
725728

726-
export function maxOutputTokens(
727-
npm: string,
728-
options: Record<string, any>,
729-
modelLimit: number,
730-
globalLimit: number,
731-
): number {
732-
const modelCap = modelLimit || globalLimit
733-
const standardLimit = Math.min(modelCap, globalLimit)
734-
735-
if (npm === "@ai-sdk/anthropic" || npm === "@ai-sdk/google-vertex/anthropic") {
736-
const thinking = options?.["thinking"]
737-
const budgetTokens = typeof thinking?.["budgetTokens"] === "number" ? thinking["budgetTokens"] : 0
738-
const enabled = thinking?.["type"] === "enabled"
739-
if (enabled && budgetTokens > 0) {
740-
// Return text tokens so that text + thinking <= model cap, preferring 32k text when possible.
741-
if (budgetTokens + standardLimit <= modelCap) {
742-
return standardLimit
743-
}
744-
return modelCap - budgetTokens
745-
}
746-
}
747-
748-
return standardLimit
729+
export function maxOutputTokens(model: Provider.Model): number {
730+
return Math.min(model.limit.output, OUTPUT_TOKEN_MAX) || OUTPUT_TOKEN_MAX
749731
}
750732

751733
export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema | JSONSchema7): JSONSchema7 {

packages/opencode/src/session/compaction.ts

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ import { Instance } from "../project/instance"
66
import { Provider } from "../provider/provider"
77
import { MessageV2 } from "./message-v2"
88
import z from "zod"
9-
import { SessionPrompt } from "./prompt"
109
import { Token } from "../util/token"
1110
import { Log } from "../util/log"
1211
import { SessionProcessor } from "./processor"
1312
import { fn } from "@/util/fn"
1413
import { Agent } from "@/agent/agent"
1514
import { Plugin } from "@/plugin"
1615
import { Config } from "@/config/config"
16+
import { ProviderTransform } from "@/provider/transform"
1717

1818
export namespace SessionCompaction {
1919
const log = Log.create({ service: "session.compaction" })
@@ -27,15 +27,22 @@ export namespace SessionCompaction {
2727
),
2828
}
2929

30+
const COMPACTION_BUFFER = 20_000
31+
3032
export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
3133
const config = await Config.get()
3234
if (config.compaction?.auto === false) return false
3335
const context = input.model.limit.context
3436
if (context === 0) return false
35-
const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
36-
const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
37-
const usable = input.model.limit.input || context - output
38-
return count > usable
37+
38+
const count =
39+
input.tokens.total ||
40+
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
41+
42+
const reserved =
43+
config.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
44+
const usable = input.model.limit.input ? input.model.limit.input - reserved : context - reserved
45+
return count >= usable
3946
}
4047

4148
export const PRUNE_MINIMUM = 20_000
@@ -139,8 +146,34 @@ export namespace SessionCompaction {
139146
{ sessionID: input.sessionID },
140147
{ context: [], prompt: undefined },
141148
)
142-
const defaultPrompt =
143-
"Provide a detailed prompt for continuing our conversation above. Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next considering new session will not have access to our conversation."
149+
const defaultPrompt = `Provide a detailed prompt for continuing our conversation above.
150+
Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next.
151+
The summary that you construct will be used so that another agent can read it and continue the work.
152+
153+
When constructing the summary, try to stick to this template:
154+
---
155+
## Goal
156+
157+
[What goal(s) is the user trying to accomplish?]
158+
159+
## Instructions
160+
161+
- [What important instructions did the user give you that are relevant]
162+
- [If there is a plan or spec, include information about it so next agent can continue using it]
163+
164+
## Discoveries
165+
166+
[What notable things were learned during this conversation that would be useful for the next agent to know when continuing the work]
167+
168+
## Accomplished
169+
170+
[What work has been completed, what work is still in progress, and what work is left?]
171+
172+
## Relevant files / directories
173+
174+
[Construct a structured list of relevant files that have been read, edited, or created that pertain to the task at hand. If all the files in a directory are relevant, include the path to the directory.]
175+
---`
176+
144177
const promptText = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n")
145178
const result = await processor.process({
146179
user: userMessage,
@@ -181,7 +214,7 @@ export namespace SessionCompaction {
181214
sessionID: input.sessionID,
182215
type: "text",
183216
synthetic: true,
184-
text: "Continue if you have next steps",
217+
text: "Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.",
185218
time: {
186219
start: Date.now(),
187220
end: Date.now(),

packages/opencode/src/session/index.ts

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { BusEvent } from "@/bus/bus-event"
44
import { Bus } from "@/bus"
55
import { Decimal } from "decimal.js"
66
import z from "zod"
7-
import { type LanguageModelUsage, type ProviderMetadata } from "ai"
7+
import { type ProviderMetadata } from "ai"
88
import { Config } from "../config/config"
99
import { Flag } from "../flag/flag"
1010
import { Identifier } from "../id/id"
@@ -22,6 +22,8 @@ import { Snapshot } from "@/snapshot"
2222
import type { Provider } from "@/provider/provider"
2323
import { PermissionNext } from "@/permission/next"
2424
import { Global } from "@/global"
25+
import type { LanguageModelV2Usage } from "@ai-sdk/provider"
26+
import { iife } from "@/util/iife"
2527

2628
export namespace Session {
2729
const log = Log.create({ service: "session" })
@@ -439,34 +441,58 @@ export namespace Session {
439441
export const getUsage = fn(
440442
z.object({
441443
model: z.custom<Provider.Model>(),
442-
usage: z.custom<LanguageModelUsage>(),
444+
usage: z.custom<LanguageModelV2Usage>(),
443445
metadata: z.custom<ProviderMetadata>().optional(),
444446
}),
445447
(input) => {
446-
const cacheReadInputTokens = input.usage.cachedInputTokens ?? 0
447-
const cacheWriteInputTokens = (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
448-
// @ts-expect-error
449-
input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
450-
// @ts-expect-error
451-
input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ??
452-
0) as number
453-
454-
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
455-
const adjustedInputTokens = excludesCachedTokens
456-
? (input.usage.inputTokens ?? 0)
457-
: (input.usage.inputTokens ?? 0) - cacheReadInputTokens - cacheWriteInputTokens
458448
const safe = (value: number) => {
459449
if (!Number.isFinite(value)) return 0
460450
return value
461451
}
452+
const inputTokens = safe(input.usage.inputTokens ?? 0)
453+
const outputTokens = safe(input.usage.outputTokens ?? 0)
454+
const reasoningTokens = safe(input.usage.reasoningTokens ?? 0)
455+
456+
const cacheReadInputTokens = safe(input.usage.cachedInputTokens ?? 0)
457+
const cacheWriteInputTokens = safe(
458+
(input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
459+
// @ts-expect-error
460+
input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
461+
// @ts-expect-error
462+
input.metadata?.["venice"]?.["usage"]?.["cacheCreationInputTokens"] ??
463+
0) as number,
464+
)
465+
466+
// OpenRouter provides inputTokens as the total count of input tokens (including cached).
467+
// AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
468+
// Anthropic does it differently though - inputTokens doesn't include cached tokens.
469+
// It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
470+
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
471+
const adjustedInputTokens = safe(
472+
excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
473+
)
474+
475+
const total = iife(() => {
476+
// Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
477+
// don't compute from components
478+
if (
479+
input.model.api.npm === "@ai-sdk/anthropic" ||
480+
input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
481+
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
482+
) {
483+
return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
484+
}
485+
return input.usage.totalTokens
486+
})
462487

463488
const tokens = {
464-
input: safe(adjustedInputTokens),
465-
output: safe(input.usage.outputTokens ?? 0),
466-
reasoning: safe(input.usage?.reasoningTokens ?? 0),
489+
total,
490+
input: adjustedInputTokens,
491+
output: outputTokens,
492+
reasoning: reasoningTokens,
467493
cache: {
468-
write: safe(cacheWriteInputTokens),
469-
read: safe(cacheReadInputTokens),
494+
write: cacheWriteInputTokens,
495+
read: cacheReadInputTokens,
470496
},
471497
}
472498

packages/opencode/src/session/llm.ts

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ import { Auth } from "@/auth"
2525

2626
export namespace LLM {
2727
const log = Log.create({ service: "llm" })
28-
29-
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
28+
export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
3029

3130
export type StreamInput = {
3231
user: MessageV2.User
@@ -149,14 +148,7 @@ export namespace LLM {
149148
)
150149

151150
const maxOutputTokens =
152-
isCodex || provider.id.includes("github-copilot")
153-
? undefined
154-
: ProviderTransform.maxOutputTokens(
155-
input.model.api.npm,
156-
params.options,
157-
input.model.limit.output,
158-
OUTPUT_TOKEN_MAX,
159-
)
151+
isCodex || provider.id.includes("github-copilot") ? undefined : ProviderTransform.maxOutputTokens(input.model)
160152

161153
const tools = await resolveTools(input)
162154

packages/opencode/src/session/message-v2.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ export namespace MessageV2 {
210210
snapshot: z.string().optional(),
211211
cost: z.number(),
212212
tokens: z.object({
213+
total: z.number().optional(),
213214
input: z.number(),
214215
output: z.number(),
215216
reasoning: z.number(),
@@ -383,6 +384,7 @@ export namespace MessageV2 {
383384
summary: z.boolean().optional(),
384385
cost: z.number(),
385386
tokens: z.object({
387+
total: z.number().optional(),
386388
input: z.number(),
387389
output: z.number(),
388390
reasoning: z.number(),

packages/opencode/src/session/processor.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ export namespace SessionProcessor {
342342
stack: JSON.stringify(e.stack),
343343
})
344344
const error = MessageV2.fromError(e, { providerID: input.model.providerID })
345+
if (MessageV2.ContextOverflowError.isInstance(error)) {
346+
// TODO: Handle context overflow error
347+
}
345348
const retry = SessionRetry.retryable(error)
346349
if (retry !== undefined) {
347350
attempt++

packages/opencode/src/session/prompt.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ globalThis.AI_SDK_LOG_WARNINGS = false
5252

5353
export namespace SessionPrompt {
5454
const log = Log.create({ service: "session.prompt" })
55-
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
5655

5756
const state = Instance.state(
5857
() => {

packages/opencode/src/session/retry.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,8 @@ export namespace SessionRetry {
5959
}
6060

6161
export function retryable(error: ReturnType<NamedError["toObject"]>) {
62-
// DO NOT retry context overflow errors
62+
// context overflow errors should not be retried
6363
if (MessageV2.ContextOverflowError.isInstance(error)) return undefined
64-
6564
if (MessageV2.APIError.isInstance(error)) {
6665
if (!error.data.isRetryable) return undefined
6766
if (error.data.responseBody?.includes("FreeUsageLimitError"))

0 commit comments

Comments
 (0)