From dfa3f9551d51534187dbffc79ce4522eec655e49 Mon Sep 17 00:00:00 2001 From: GuidanNick <224735395+guidan-nick@users.noreply.github.com> Date: Sun, 21 Jun 2026 01:08:12 +0200 Subject: [PATCH 1/5] feat: add configurable effort levels for thinking budget Add native Kiro effort parameter support to control thinking/reasoning depth. Kiro ignores Anthropic's budget_tokens/max_thinking_length and instead uses its own effort levels (low/medium/high/xhigh/max) passed via additionalModelRequestFields.output_config.effort. Changes: - Add effort and auto_effort_mapping config options to schema - Add effort.ts with budget-to-effort mapping logic - Modify sdk-client to inject additionalModelRequestFields via middleware - Update request pipeline to resolve and pass effort through - Add unit tests for effort resolution Effort levels: - low: minimal reasoning - medium: balanced (default, ~20k tokens) - high: deeper reasoning (~50k tokens) - xhigh: extended (opus-4.7/4.8 only, ~80k tokens) - max: maximum depth (128k tokens on opus-4.7/4.8) Usage in kiro.json: { "effort": "max" } // explicit effort level { "auto_effort_mapping": true } // map OpenCode budget to effort (default) Fixes thinking budget being capped at 20k regardless of OpenCode settings. --- src/__tests__/effort.test.ts | 95 ++++++++++++++++++++ src/core/request/request-handler.ts | 9 +- src/plugin/config/schema.ts | 30 ++++++- src/plugin/effort.ts | 132 ++++++++++++++++++++++++++++ src/plugin/request.ts | 23 ++++- src/plugin/sdk-client.ts | 49 +++++++++-- src/plugin/types.ts | 5 +- 7 files changed, 329 insertions(+), 14 deletions(-) create mode 100644 src/__tests__/effort.test.ts create mode 100644 src/plugin/effort.ts diff --git a/src/__tests__/effort.test.ts b/src/__tests__/effort.test.ts new file mode 100644 index 0000000..d475a58 --- /dev/null +++ b/src/__tests__/effort.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, test } from 'bun:test' +import { + budgetToEffort, + getEffectiveEffort, + resolveEffort, + supportsEffort, + supportsXHighEffort +} from '../plugin/effort.js' + +describe('effort module', () => { + describe('supportsEffort', () => { + test('returns true for supported models', () => { + expect(supportsEffort('claude-opus-4.8')).toBe(true) + expect(supportsEffort('claude-opus-4.7')).toBe(true) + expect(supportsEffort('claude-sonnet-4.6')).toBe(true) + expect(supportsEffort('claude-sonnet-4.6-1m')).toBe(true) + }) + + test('returns false for unsupported models', () => { + expect(supportsEffort('claude-haiku-4.5')).toBe(false) + expect(supportsEffort('unknown-model')).toBe(false) + }) + }) + + describe('supportsXHighEffort', () => { + test('returns true for opus 4.7 and 4.8', () => { + expect(supportsXHighEffort('claude-opus-4.8')).toBe(true) + expect(supportsXHighEffort('claude-opus-4.7')).toBe(true) + }) + + test('returns false for other models', () => { + expect(supportsXHighEffort('claude-opus-4.6')).toBe(false) + expect(supportsXHighEffort('claude-sonnet-4.6')).toBe(false) + }) + }) + + describe('resolveEffort', () => { + test('returns undefined for unsupported models', () => { + expect(resolveEffort('claude-haiku-4.5', 'max')).toBeUndefined() + }) + + test('returns effort as-is for supported levels', () => { + expect(resolveEffort('claude-opus-4.8', 'low')).toBe('low') + expect(resolveEffort('claude-opus-4.8', 'max')).toBe('max') + expect(resolveEffort('claude-opus-4.8', 'xhigh')).toBe('xhigh') + }) + + test('clamps xhigh to max for models without xhigh support', () => { + expect(resolveEffort('claude-sonnet-4.6', 'xhigh')).toBe('max') + expect(resolveEffort('claude-opus-4.6', 'xhigh')).toBe('max') + }) + }) + + describe('budgetToEffort', () => { + test('returns undefined for unsupported models', () => { + expect(budgetToEffort(100000, 'claude-haiku-4.5')).toBeUndefined() + }) + + test('maps budget ranges correctly', () => { + expect(budgetToEffort(5000, 'claude-opus-4.8')).toBe('low') + expect(budgetToEffort(20000, 'claude-opus-4.8')).toBe('medium') + expect(budgetToEffort(50000, 'claude-opus-4.8')).toBe('high') + expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('xhigh') + expect(budgetToEffort(128000, 'claude-opus-4.8')).toBe('max') + }) + + test('maps to max instead of xhigh for non-xhigh models', () => { + expect(budgetToEffort(80000, 'claude-sonnet-4.6')).toBe('max') + }) + }) + + describe('getEffectiveEffort', () => { + test('returns undefined for unsupported models', () => { + expect(getEffectiveEffort('claude-haiku-4.5', true, 100000)).toBeUndefined() + }) + + test('uses explicit config when provided', () => { + expect(getEffectiveEffort('claude-opus-4.8', true, 20000, 'max')).toBe('max') + expect(getEffectiveEffort('claude-opus-4.8', false, 20000, 'high')).toBe('high') + }) + + test('returns undefined when not thinking and no config', () => { + expect(getEffectiveEffort('claude-opus-4.8', false, 20000)).toBeUndefined() + }) + + test('uses budget mapping when thinking and auto-mapping enabled', () => { + expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, true)).toBe('max') + expect(getEffectiveEffort('claude-opus-4.8', true, 20000, undefined, true)).toBe('medium') + }) + + test('falls back to medium when auto-mapping disabled', () => { + expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, false)).toBe('medium') + }) + }) +}) diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts index f1ff1da..f57382b 100644 --- a/src/core/request/request-handler.ts +++ b/src/core/request/request-handler.ts @@ -7,7 +7,7 @@ import * as logger from '../../plugin/logger' import { transformToSdkRequest } from '../../plugin/request' import { createSdkClient } from '../../plugin/sdk-client' import { syncFromKiroCli } from '../../plugin/sync/kiro-cli' -import type { KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types' +import type { Effort, KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types' import { AccountSelector } from '../account/account-selector' import { UsageTracker } from '../account/usage-tracker' import { TokenRefresher } from '../auth/token-refresher' @@ -134,7 +134,7 @@ export class RequestHandler { } try { - const client = createSdkClient(auth, sdkPrep.region) + const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort) const command = new GenerateAssistantResponseCommand({ conversationState: sdkPrep.conversationState as any, profileArn: sdkPrep.profileArn @@ -219,7 +219,10 @@ export class RequestHandler { budget: number, showToast?: (message: string, variant: 'info' | 'warning' | 'success' | 'error') => void ): SdkPreparedRequest { - return transformToSdkRequest(body, model, auth, think, budget, showToast) + return transformToSdkRequest(body, model, auth, think, budget, showToast, { + effort: this.config.effort, + autoEffortMapping: this.config.auto_effort_mapping + }) } private handleSuccessfulRequest(acc: ManagedAccount): void { diff --git a/src/plugin/config/schema.ts b/src/plugin/config/schema.ts index 2aa0e39..18cd3cf 100644 --- a/src/plugin/config/schema.ts +++ b/src/plugin/config/schema.ts @@ -3,6 +3,17 @@ import { z } from 'zod' export const AccountSelectionStrategySchema = z.enum(['sticky', 'round-robin', 'lowest-usage']) export type AccountSelectionStrategy = z.infer +/** + * Kiro effort levels control thinking/reasoning depth. + * - low: minimal reasoning + * - medium: balanced (default when thinking enabled) + * - high: deeper reasoning + * - xhigh: extended reasoning (opus-4.7, opus-4.8 only) + * - max: maximum reasoning depth (128k thinking tokens on opus-4.7/4.8) + */ +export const EffortSchema = z.enum(['low', 'medium', 'high', 'xhigh', 'max']) +export type Effort = z.infer + export const RegionSchema = z.enum([ 'us-east-1', 'us-east-2', @@ -70,7 +81,21 @@ export const KiroConfigSchema = z.object({ usage_tracking_enabled: z.boolean().default(true), auto_sync_kiro_cli: z.boolean().default(true), - enable_log_api_request: z.boolean().default(false) + enable_log_api_request: z.boolean().default(false), + + /** + * Default effort level for thinking models. Controls reasoning depth. + * When set, this overrides the automatic budget-based mapping. + * Values: 'low', 'medium', 'high', 'xhigh' (opus-4.7/4.8 only), 'max' + */ + effort: EffortSchema.optional(), + + /** + * Enable automatic effort mapping from OpenCode's thinking budget. + * When true (default), maps budget ranges to effort levels. + * When false, only uses explicit effort config or falls back to 'medium'. + */ + auto_effort_mapping: z.boolean().default(true) }) export type KiroConfig = z.infer @@ -88,5 +113,6 @@ export const DEFAULT_CONFIG: KiroConfig = { auth_server_port_range: 10, usage_tracking_enabled: true, auto_sync_kiro_cli: true, - enable_log_api_request: false + enable_log_api_request: false, + auto_effort_mapping: true } diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts new file mode 100644 index 0000000..7f99f8a --- /dev/null +++ b/src/plugin/effort.ts @@ -0,0 +1,132 @@ +import type { Effort } from './config/schema' + +/** + * Effort levels ordered from lowest to highest reasoning depth. + */ +export const EFFORT_LEVELS: readonly Effort[] = ['low', 'medium', 'high', 'xhigh', 'max'] as const + +/** + * Models that support the 5-value effort enum (including xhigh). + * These models support up to 128k thinking tokens with max effort. + */ +const XHIGH_CAPABLE_MODELS = new Set([ + 'claude-opus-4.7', + 'claude-opus-4.8' +]) + +/** + * Models that support the 4-value effort enum (no xhigh). + * xhigh requests on these models are clamped to max. + */ +const EFFORT_CAPABLE_MODELS = new Set([ + 'claude-opus-4.5', + 'claude-opus-4.6', + 'claude-opus-4.6-1m', + 'claude-sonnet-4.5', + 'claude-sonnet-4.5-1m', + 'claude-sonnet-4.6', + 'claude-sonnet-4.6-1m', + ...XHIGH_CAPABLE_MODELS +]) + +/** + * Check if a model supports the effort parameter. + */ +export function supportsEffort(kiroModel: string): boolean { + return EFFORT_CAPABLE_MODELS.has(kiroModel) +} + +/** + * Check if a model supports xhigh effort level. + */ +export function supportsXHighEffort(kiroModel: string): boolean { + return XHIGH_CAPABLE_MODELS.has(kiroModel) +} + +/** + * Resolve effort level for a given model. + * - Returns undefined if model doesn't support effort + * - Clamps xhigh to max for models that don't support it + */ +export function resolveEffort(kiroModel: string, requested: Effort): Effort | undefined { + if (!supportsEffort(kiroModel)) { + return undefined + } + + // xhigh is only supported on opus-4.7 and opus-4.8 + if (requested === 'xhigh' && !supportsXHighEffort(kiroModel)) { + return 'max' + } + + return requested +} + +/** + * Map OpenCode thinking budget to Kiro effort level. + * + * Budget ranges (approximate thinking token allocations): + * - low: minimal thinking + * - medium: ~20k tokens (OpenCode default) + * - high: ~50k tokens + * - xhigh: ~80k tokens (opus-4.7/4.8 only) + * - max: ~128k tokens + */ +export function budgetToEffort(budget: number, kiroModel: string): Effort | undefined { + if (!supportsEffort(kiroModel)) { + return undefined + } + + let effort: Effort + if (budget <= 10000) { + effort = 'low' + } else if (budget <= 30000) { + effort = 'medium' + } else if (budget <= 60000) { + effort = 'high' + } else if (budget <= 100000) { + effort = supportsXHighEffort(kiroModel) ? 'xhigh' : 'max' + } else { + effort = 'max' + } + + return effort +} + +/** + * Get the effective effort level based on config, budget, and model. + * + * Priority: + * 1. Explicit effort config (if set) + * 2. Budget-to-effort mapping (if auto_effort_mapping enabled and thinking) + * 3. 'medium' default (if thinking enabled) + * 4. undefined (if not thinking) + */ +export function getEffectiveEffort( + kiroModel: string, + thinking: boolean, + budget: number, + configEffort?: Effort, + autoEffortMapping = true +): Effort | undefined { + if (!supportsEffort(kiroModel)) { + return undefined + } + + // Explicit config takes precedence + if (configEffort) { + return resolveEffort(kiroModel, configEffort) + } + + // If not thinking, no effort needed + if (!thinking) { + return undefined + } + + // Auto-map budget to effort + if (autoEffortMapping) { + return budgetToEffort(budget, kiroModel) + } + + // Default to medium when thinking without auto-mapping + return 'medium' +} diff --git a/src/plugin/request.ts b/src/plugin/request.ts index 71542f7..a9459ef 100644 --- a/src/plugin/request.ts +++ b/src/plugin/request.ts @@ -16,6 +16,7 @@ import { convertToolsToCodeWhisperer, deduplicateToolResults } from '../infrastructure/transformers/tool-transformer.js' +import { getEffectiveEffort } from './effort.js' import { convertImagesToKiroFormat, extractAllImages, @@ -24,6 +25,7 @@ import { import { resolveKiroModel } from './models.js' import type { CodeWhispererRequest, + Effort, KiroAuthDetails, PreparedRequest, SdkPreparedRequest @@ -35,6 +37,11 @@ interface TransformResult { convId: string } +interface EffortConfig { + effort?: Effort + autoEffortMapping?: boolean +} + type ToastFunction = (message: string, variant: 'info' | 'warning' | 'success' | 'error') => void function buildCodeWhispererRequest( @@ -317,7 +324,8 @@ export function transformToSdkRequest( auth: KiroAuthDetails, think = false, budget = 20000, - showToast?: ToastFunction + showToast?: ToastFunction, + effortConfig?: EffortConfig ): SdkPreparedRequest { const { request, resolved, convId } = buildCodeWhispererRequest( body, @@ -327,12 +335,23 @@ export function transformToSdkRequest( budget, showToast ) + + // Resolve effort level based on config and model capabilities + const effort = getEffectiveEffort( + resolved, + think, + budget, + effortConfig?.effort, + effortConfig?.autoEffortMapping ?? true + ) + return { conversationState: request.conversationState, profileArn: request.profileArn, streaming: true, effectiveModel: resolved, conversationId: convId, - region: extractRegionFromArn(auth.profileArn) ?? auth.region + region: extractRegionFromArn(auth.profileArn) ?? auth.region, + effort } } diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts index 5bfb5ac..d4eb39d 100644 --- a/src/plugin/sdk-client.ts +++ b/src/plugin/sdk-client.ts @@ -1,18 +1,29 @@ import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client' import { KIRO_CONSTANTS } from '../constants.js' -import type { KiroAuthDetails } from './types' +import type { Effort, KiroAuthDetails } from './types' -const clientCache = new Map() +/** + * Cache key includes effort to ensure separate clients for different effort levels, + * since middleware is configured at client creation time. + */ +interface ClientCacheEntry { + client: CodeWhispererStreamingClient + token: string + effort?: Effort +} + +const clientCache = new Map() const KIRO_CLI_MAX_ATTEMPTS = 3 export function createSdkClient( auth: KiroAuthDetails, - region: string + region: string, + effort?: Effort ): CodeWhispererStreamingClient { - const cacheKey = `${region}:${auth.email || 'default'}` + const cacheKey = `${region}:${auth.email || 'default'}:${effort || 'none'}` const cached = clientCache.get(cacheKey) - if (cached && cached.token === auth.access) { + if (cached && cached.token === auth.access && cached.effort === effort) { return cached.client } @@ -26,6 +37,7 @@ export function createSdkClient( customUserAgent: [[KIRO_CONSTANTS.USER_AGENT]] }) + // Add Kiro-specific headers client.middlewareStack.add( (next: any) => async (args: any) => { args.request.headers['x-amzn-kiro-agent-mode'] = 'vibe' @@ -34,7 +46,32 @@ export function createSdkClient( { step: 'build', name: 'addKiroHeaders' } ) - clientCache.set(cacheKey, { client, token }) + // Inject additionalModelRequestFields for effort-based thinking control + if (effort) { + client.middlewareStack.add( + (next: any) => async (args: any) => { + // The SDK serializes input to args.input, we need to modify the body + // before it's sent. The body is in args.request.body as a string. + if (args.request?.body) { + try { + const body = JSON.parse(args.request.body) + body.additionalModelRequestFields = { + output_config: { + effort + } + } + args.request.body = JSON.stringify(body) + } catch { + // If body parsing fails, continue without modification + } + } + return next(args) + }, + { step: 'build', name: 'addEffortConfig', priority: 'low' } + ) + } + + clientCache.set(cacheKey, { client, token, effort }) return client } diff --git a/src/plugin/types.ts b/src/plugin/types.ts index e8b05d3..a17e0d8 100644 --- a/src/plugin/types.ts +++ b/src/plugin/types.ts @@ -1,8 +1,9 @@ import z from 'zod' -import { RegionSchema } from './config/schema' +import { EffortSchema, RegionSchema } from './config/schema' export type KiroAuthMethod = 'idc' | 'desktop' export type KiroRegion = z.infer +export type Effort = z.infer export interface KiroAuthDetails { refresh: string @@ -119,6 +120,8 @@ export interface SdkPreparedRequest { effectiveModel: string conversationId: string region: string + /** Resolved effort level for thinking models */ + effort?: Effort } export type AccountSelectionStrategy = 'sticky' | 'round-robin' | 'lowest-usage' From 0f27667f0b329028b371f3edbd4c16d0a249f646 Mon Sep 17 00:00:00 2001 From: GuidanNick <224735395+guidan-nick@users.noreply.github.com> Date: Sun, 21 Jun 2026 01:14:54 +0200 Subject: [PATCH 2/5] add effort debug logging for troubleshooting --- package-lock.json | 2 ++ src/core/request/request-handler.ts | 3 +++ src/plugin/effort.ts | 4 ++-- src/plugin/sdk-client.ts | 4 ++++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0221348..88b0db9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2021,6 +2021,7 @@ "integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==", "dev": true, "license": "MIT", + "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -2251,6 +2252,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts index f57382b..cc8b28b 100644 --- a/src/core/request/request-handler.ts +++ b/src/core/request/request-handler.ts @@ -132,6 +132,9 @@ export class RequestHandler { if (apiTimestamp) { this.logSdkRequest(sdkPrep, acc, apiTimestamp) } + if (sdkPrep.effort) { + logger.log(`[Effort] Resolved effort: ${sdkPrep.effort} for model: ${sdkPrep.effectiveModel}`) + } try { const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort) diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts index 7f99f8a..24e97cd 100644 --- a/src/plugin/effort.ts +++ b/src/plugin/effort.ts @@ -96,7 +96,7 @@ export function budgetToEffort(budget: number, kiroModel: string): Effort | unde * Get the effective effort level based on config, budget, and model. * * Priority: - * 1. Explicit effort config (if set) + * 1. Explicit effort config (if set) - always applied regardless of thinking state * 2. Budget-to-effort mapping (if auto_effort_mapping enabled and thinking) * 3. 'medium' default (if thinking enabled) * 4. undefined (if not thinking) @@ -112,7 +112,7 @@ export function getEffectiveEffort( return undefined } - // Explicit config takes precedence + // Explicit config takes precedence - always applied even without thinking if (configEffort) { return resolveEffort(kiroModel, configEffort) } diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts index d4eb39d..5bdc1df 100644 --- a/src/plugin/sdk-client.ts +++ b/src/plugin/sdk-client.ts @@ -1,5 +1,6 @@ import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client' import { KIRO_CONSTANTS } from '../constants.js' +import * as logger from './logger.js' import type { Effort, KiroAuthDetails } from './types' /** @@ -48,6 +49,7 @@ export function createSdkClient( // Inject additionalModelRequestFields for effort-based thinking control if (effort) { + logger.log(`[Effort] Adding middleware to inject effort: ${effort}`) client.middlewareStack.add( (next: any) => async (args: any) => { // The SDK serializes input to args.input, we need to modify the body @@ -61,8 +63,10 @@ export function createSdkClient( } } args.request.body = JSON.stringify(body) + logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`) } catch { // If body parsing fails, continue without modification + logger.warn('[Effort] Failed to parse request body for effort injection') } } return next(args) From fad2d3015675fcccc51333b7cdfb70a68bb2577f Mon Sep 17 00:00:00 2001 From: GuidanNick <224735395+guidan-nick@users.noreply.github.com> Date: Sun, 21 Jun 2026 01:24:06 +0200 Subject: [PATCH 3/5] fix: read thinkingConfig from top-level body field OpenCode sends thinkingBudget in body.thinkingConfig (not body.providerOptions.thinkingConfig). Update budget extraction to check both locations. Also update budget-to-effort mapping to align with OpenCode's standard variant values: - 8192 (low) -> effort low - 16384 (medium) -> effort medium - 24576 (high) -> effort high - 32768 (max) -> effort max --- src/core/request/request-handler.ts | 13 +++++++++++-- src/plugin/effort.ts | 24 ++++++++++++++---------- src/plugin/sdk-client.ts | 8 ++++++++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts index cc8b28b..7ff0380 100644 --- a/src/core/request/request-handler.ts +++ b/src/core/request/request-handler.ts @@ -79,8 +79,17 @@ export class RequestHandler { ): Promise { const body = init?.body ? JSON.parse(init.body) : {} const model = this.extractModel(url) || body.model || 'claude-sonnet-4-5' - const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig - const budget = body.providerOptions?.thinkingConfig?.thinkingBudget || 20000 + const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig + const budget = + body.providerOptions?.thinkingConfig?.thinkingBudget || + body.thinkingConfig?.thinkingBudget || + body.thinkingConfig?.budget_tokens || + 20000 + + // Debug: log what OpenCode sends us + logger.log(`[Debug] body keys: ${Object.keys(body).join(', ')}`) + logger.log(`[Debug] body.thinkingConfig: ${JSON.stringify(body.thinkingConfig)}`) + logger.log(`[Debug] model: ${model}, think: ${think}, budget: ${budget}`) let retry = 0 let consecutiveNullAccounts = 0 diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts index 24e97cd..94d37bc 100644 --- a/src/plugin/effort.ts +++ b/src/plugin/effort.ts @@ -64,12 +64,18 @@ export function resolveEffort(kiroModel: string, requested: Effort): Effort | un /** * Map OpenCode thinking budget to Kiro effort level. * - * Budget ranges (approximate thinking token allocations): - * - low: minimal thinking - * - medium: ~20k tokens (OpenCode default) - * - high: ~50k tokens - * - xhigh: ~80k tokens (opus-4.7/4.8 only) - * - max: ~128k tokens + * OpenCode sends thinkingBudget from its variant config. Standard values: + * - low: 8192 + * - medium: 16384 + * - high: 24576 + * - max: 32768 + * + * We map these ranges to Kiro effort levels: + * - ≤10000 → low + * - ≤20000 → medium + * - ≤28000 → high + * - ≤32768 → max (or xhigh on opus-4.7/4.8, max otherwise) + * - >32768 → max */ export function budgetToEffort(budget: number, kiroModel: string): Effort | undefined { if (!supportsEffort(kiroModel)) { @@ -79,12 +85,10 @@ export function budgetToEffort(budget: number, kiroModel: string): Effort | unde let effort: Effort if (budget <= 10000) { effort = 'low' - } else if (budget <= 30000) { + } else if (budget <= 20000) { effort = 'medium' - } else if (budget <= 60000) { + } else if (budget <= 28000) { effort = 'high' - } else if (budget <= 100000) { - effort = supportsXHighEffort(kiroModel) ? 'xhigh' : 'max' } else { effort = 'max' } diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts index 5bdc1df..7a3281b 100644 --- a/src/plugin/sdk-client.ts +++ b/src/plugin/sdk-client.ts @@ -64,10 +64,18 @@ export function createSdkClient( } args.request.body = JSON.stringify(body) logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`) + // Dump the top-level keys of final request body for verification + logger.log(`[Effort] Final request body keys: ${Object.keys(body).join(', ')}`) + logger.log(`[Effort] Final body.additionalModelRequestFields: ${JSON.stringify(body.additionalModelRequestFields)}`) } catch { // If body parsing fails, continue without modification logger.warn('[Effort] Failed to parse request body for effort injection') } + } else { + logger.warn('[Effort] No args.request.body found - checking args structure') + logger.log(`[Effort] args keys: ${Object.keys(args || {}).join(', ')}`) + logger.log(`[Effort] args.request keys: ${Object.keys(args?.request || {}).join(', ')}`) + logger.log(`[Effort] args.input keys: ${Object.keys(args?.input || {}).join(', ')}`) } return next(args) }, From 494baf46222a44c4f002bcd885bf9dc2e66b1fae Mon Sep 17 00:00:00 2001 From: GuidanNick <224735395+guidan-nick@users.noreply.github.com> Date: Sun, 21 Jun 2026 08:18:17 +0200 Subject: [PATCH 4/5] fix effort docs and logging --- README.md | 54 +++++++++++++++++++++++++++++ src/__tests__/effort.test.ts | 8 ++--- src/core/request/request-handler.ts | 14 ++------ src/plugin/sdk-client.ts | 12 ------- 4 files changed, 61 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 4e646a4..89553df 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ models with substantial trial quotas. SQLite. - **Native Thinking Mode**: Full support for Claude reasoning capabilities via virtual model mappings. +- **Kiro Effort Mapping**: Maps OpenCode thinking budgets to Kiro's native effort + levels automatically. - **Automated Recovery**: Exponential backoff for rate limits and automated token refresh. @@ -46,6 +48,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -61,6 +64,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -81,6 +85,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -96,6 +101,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -111,6 +117,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -126,6 +133,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -146,6 +154,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: "variants": { "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, @@ -177,6 +186,48 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`: } ``` +### Thinking Effort Configuration + +Configure Kiro effort per model in your OpenCode provider model definitions by setting +`thinkingConfig.thinkingBudget` on each model variant. The plugin automatically maps +those budgets to Kiro's native `effort` field for supported Claude models, so you do +not need to hardcode a global `effort` value in `~/.config/opencode/kiro.json`. + +```json +{ + "provider": { + "kiro": { + "models": { + "claude-opus-4-7-thinking": { + "name": "Claude Opus 4.7 Thinking", + "limit": { "context": 1000000, "output": 64000 }, + "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] }, + "variants": { + "low": { "thinkingConfig": { "thinkingBudget": 8192 } }, + "medium": { "thinkingConfig": { "thinkingBudget": 16384 } }, + "high": { "thinkingConfig": { "thinkingBudget": 24576 } }, + "max": { "thinkingConfig": { "thinkingBudget": 32768 } } + } + } + } + } + } +} +``` + +Budget mapping: + +| OpenCode budget | Kiro effort | +| --------------- | ----------- | +| `<= 10000` | `low` | +| `<= 20000` | `medium` | +| `<= 28000` | `high` | +| `> 28000` | `max` | + +Use `~/.config/opencode/kiro.json` for plugin-wide behavior such as auth sync, +account selection, retry limits, and `auto_effort_mapping`. A top-level `effort` +setting is a global override for all supported models, not a per-model setting. + ## Setup 1. **Authentication via Kiro CLI (Recommended)**: @@ -294,6 +345,7 @@ Edit `~/.config/opencode/kiro.json`: "token_expiry_buffer_ms": 120000, "usage_sync_max_retries": 3, "usage_tracking_enabled": true, + "auto_effort_mapping": true, "enable_log_api_request": false } ``` @@ -318,6 +370,8 @@ Edit `~/.config/opencode/kiro.json`: - `auth_server_port_start`: Legacy/ignored (no local auth server). - `auth_server_port_range`: Legacy/ignored (no local auth server). - `usage_tracking_enabled`: Enable usage tracking and toast notifications. +- `auto_effort_mapping`: Automatically map OpenCode thinking budgets to Kiro effort + levels for supported models (default: `true`). - `enable_log_api_request`: Enable detailed API request logging. ## Storage diff --git a/src/__tests__/effort.test.ts b/src/__tests__/effort.test.ts index d475a58..7f109c2 100644 --- a/src/__tests__/effort.test.ts +++ b/src/__tests__/effort.test.ts @@ -58,10 +58,10 @@ describe('effort module', () => { test('maps budget ranges correctly', () => { expect(budgetToEffort(5000, 'claude-opus-4.8')).toBe('low') - expect(budgetToEffort(20000, 'claude-opus-4.8')).toBe('medium') - expect(budgetToEffort(50000, 'claude-opus-4.8')).toBe('high') - expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('xhigh') - expect(budgetToEffort(128000, 'claude-opus-4.8')).toBe('max') + expect(budgetToEffort(16384, 'claude-opus-4.8')).toBe('medium') + expect(budgetToEffort(24576, 'claude-opus-4.8')).toBe('high') + expect(budgetToEffort(32768, 'claude-opus-4.8')).toBe('max') + expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('max') }) test('maps to max instead of xhigh for non-xhigh models', () => { diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts index 7ff0380..7dcfdf4 100644 --- a/src/core/request/request-handler.ts +++ b/src/core/request/request-handler.ts @@ -7,7 +7,7 @@ import * as logger from '../../plugin/logger' import { transformToSdkRequest } from '../../plugin/request' import { createSdkClient } from '../../plugin/sdk-client' import { syncFromKiroCli } from '../../plugin/sync/kiro-cli' -import type { Effort, KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types' +import type { KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types' import { AccountSelector } from '../account/account-selector' import { UsageTracker } from '../account/usage-tracker' import { TokenRefresher } from '../auth/token-refresher' @@ -79,18 +79,14 @@ export class RequestHandler { ): Promise { const body = init?.body ? JSON.parse(init.body) : {} const model = this.extractModel(url) || body.model || 'claude-sonnet-4-5' - const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig + const think = + model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig const budget = body.providerOptions?.thinkingConfig?.thinkingBudget || body.thinkingConfig?.thinkingBudget || body.thinkingConfig?.budget_tokens || 20000 - // Debug: log what OpenCode sends us - logger.log(`[Debug] body keys: ${Object.keys(body).join(', ')}`) - logger.log(`[Debug] body.thinkingConfig: ${JSON.stringify(body.thinkingConfig)}`) - logger.log(`[Debug] model: ${model}, think: ${think}, budget: ${budget}`) - let retry = 0 let consecutiveNullAccounts = 0 const retryContext = this.retryStrategy.createContext() @@ -141,10 +137,6 @@ export class RequestHandler { if (apiTimestamp) { this.logSdkRequest(sdkPrep, acc, apiTimestamp) } - if (sdkPrep.effort) { - logger.log(`[Effort] Resolved effort: ${sdkPrep.effort} for model: ${sdkPrep.effectiveModel}`) - } - try { const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort) const command = new GenerateAssistantResponseCommand({ diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts index 7a3281b..d4eb39d 100644 --- a/src/plugin/sdk-client.ts +++ b/src/plugin/sdk-client.ts @@ -1,6 +1,5 @@ import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client' import { KIRO_CONSTANTS } from '../constants.js' -import * as logger from './logger.js' import type { Effort, KiroAuthDetails } from './types' /** @@ -49,7 +48,6 @@ export function createSdkClient( // Inject additionalModelRequestFields for effort-based thinking control if (effort) { - logger.log(`[Effort] Adding middleware to inject effort: ${effort}`) client.middlewareStack.add( (next: any) => async (args: any) => { // The SDK serializes input to args.input, we need to modify the body @@ -63,19 +61,9 @@ export function createSdkClient( } } args.request.body = JSON.stringify(body) - logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`) - // Dump the top-level keys of final request body for verification - logger.log(`[Effort] Final request body keys: ${Object.keys(body).join(', ')}`) - logger.log(`[Effort] Final body.additionalModelRequestFields: ${JSON.stringify(body.additionalModelRequestFields)}`) } catch { // If body parsing fails, continue without modification - logger.warn('[Effort] Failed to parse request body for effort injection') } - } else { - logger.warn('[Effort] No args.request.body found - checking args structure') - logger.log(`[Effort] args keys: ${Object.keys(args || {}).join(', ')}`) - logger.log(`[Effort] args.request keys: ${Object.keys(args?.request || {}).join(', ')}`) - logger.log(`[Effort] args.input keys: ${Object.keys(args?.input || {}).join(', ')}`) } return next(args) }, From 4504fb21407a74829a89910b77a7d1342cb79108 Mon Sep 17 00:00:00 2001 From: GuidanNick <224735395+guidan-nick@users.noreply.github.com> Date: Sun, 21 Jun 2026 08:54:58 +0200 Subject: [PATCH 5/5] fix effort request content length --- src/__tests__/sdk-client.test.ts | 45 ++++++++++++++++++++++++++++++++ src/plugin/sdk-client.ts | 2 +- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/__tests__/sdk-client.test.ts b/src/__tests__/sdk-client.test.ts index 52f4dd0..4a7c825 100644 --- a/src/__tests__/sdk-client.test.ts +++ b/src/__tests__/sdk-client.test.ts @@ -1,3 +1,4 @@ +import { GenerateAssistantResponseCommand } from '@aws/codewhisperer-streaming-client' import { describe, expect, test } from 'bun:test' import { clearSdkClientCache, createSdkClient } from '../plugin/sdk-client' import type { KiroAuthDetails } from '../plugin/types' @@ -25,4 +26,48 @@ describe('SDK client', () => { clearSdkClientCache() }) + + test('injects effort before content-length is computed', async () => { + clearSdkClientCache() + + const client = createSdkClient(auth(), 'us-east-1', 'max') + let capturedRequest: any + + client.middlewareStack.add( + () => async (args: any) => { + capturedRequest = args.request + throw new Error('captured-request') + }, + { step: 'finalizeRequest', name: 'captureRequest', priority: 'high' } + ) + + const command = new GenerateAssistantResponseCommand({ + conversationState: { + chatTriggerType: 'MANUAL', + conversationId: 'test-conversation', + currentMessage: { + userInputMessage: { + content: 'hello', + modelId: 'claude-opus-4.7', + origin: 'AI_EDITOR' + } + } + } + }) + + await client.send(command).catch((error) => { + if (error.message !== 'captured-request') throw error + }) + + const bodyText = + typeof capturedRequest.body === 'string' + ? capturedRequest.body + : Buffer.from(capturedRequest.body).toString('utf8') + const body = JSON.parse(bodyText) + + expect(body.additionalModelRequestFields.output_config.effort).toBe('max') + expect(Number(capturedRequest.headers['content-length'])).toBe(Buffer.byteLength(bodyText)) + + clearSdkClientCache() + }) }) diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts index d4eb39d..9ec4d56 100644 --- a/src/plugin/sdk-client.ts +++ b/src/plugin/sdk-client.ts @@ -67,7 +67,7 @@ export function createSdkClient( } return next(args) }, - { step: 'build', name: 'addEffortConfig', priority: 'low' } + { step: 'build', name: 'addEffortConfig', priority: 'high' } ) }