From dfa3f9551d51534187dbffc79ce4522eec655e49 Mon Sep 17 00:00:00 2001
From: GuidanNick <224735395+guidan-nick@users.noreply.github.com>
Date: Sun, 21 Jun 2026 01:08:12 +0200
Subject: [PATCH 1/5] feat: add configurable effort levels for thinking budget

Add native Kiro effort parameter support to control thinking/reasoning depth.
Kiro ignores Anthropic's budget_tokens/max_thinking_length and instead uses
its own effort levels (low/medium/high/xhigh/max) passed via
additionalModelRequestFields.output_config.effort.

Changes:
- Add effort and auto_effort_mapping config options to schema
- Add effort.ts with budget-to-effort mapping logic
- Modify sdk-client to inject additionalModelRequestFields via middleware
- Update request pipeline to resolve and pass effort through
- Add unit tests for effort resolution

Effort levels:
- low: minimal reasoning
- medium: balanced (default, ~20k tokens)
- high: deeper reasoning (~50k tokens)
- xhigh: extended (opus-4.7/4.8 only, ~80k tokens)
- max: maximum depth (128k tokens on opus-4.7/4.8)

Usage in kiro.json:
  { "effort": "max" }  // explicit effort level
  { "auto_effort_mapping": true }  // map OpenCode budget to effort (default)

Fixes thinking budget being capped at 20k regardless of OpenCode settings.
---
 src/__tests__/effort.test.ts        |  95 ++++++++++++++++++++
 src/core/request/request-handler.ts |   9 +-
 src/plugin/config/schema.ts         |  30 ++++++-
 src/plugin/effort.ts                | 132 ++++++++++++++++++++++++++++
 src/plugin/request.ts               |  23 ++++-
 src/plugin/sdk-client.ts            |  49 +++++++++--
 src/plugin/types.ts                 |   5 +-
 7 files changed, 329 insertions(+), 14 deletions(-)
 create mode 100644 src/__tests__/effort.test.ts
 create mode 100644 src/plugin/effort.ts

diff --git a/src/__tests__/effort.test.ts b/src/__tests__/effort.test.ts
new file mode 100644
index 0000000..d475a58
--- /dev/null
+++ b/src/__tests__/effort.test.ts
@@ -0,0 +1,95 @@
+import { describe, expect, test } from 'bun:test'
+import {
+  budgetToEffort,
+  getEffectiveEffort,
+  resolveEffort,
+  supportsEffort,
+  supportsXHighEffort
+} from '../plugin/effort.js'
+
+describe('effort module', () => {
+  describe('supportsEffort', () => {
+    test('returns true for supported models', () => {
+      expect(supportsEffort('claude-opus-4.8')).toBe(true)
+      expect(supportsEffort('claude-opus-4.7')).toBe(true)
+      expect(supportsEffort('claude-sonnet-4.6')).toBe(true)
+      expect(supportsEffort('claude-sonnet-4.6-1m')).toBe(true)
+    })
+
+    test('returns false for unsupported models', () => {
+      expect(supportsEffort('claude-haiku-4.5')).toBe(false)
+      expect(supportsEffort('unknown-model')).toBe(false)
+    })
+  })
+
+  describe('supportsXHighEffort', () => {
+    test('returns true for opus 4.7 and 4.8', () => {
+      expect(supportsXHighEffort('claude-opus-4.8')).toBe(true)
+      expect(supportsXHighEffort('claude-opus-4.7')).toBe(true)
+    })
+
+    test('returns false for other models', () => {
+      expect(supportsXHighEffort('claude-opus-4.6')).toBe(false)
+      expect(supportsXHighEffort('claude-sonnet-4.6')).toBe(false)
+    })
+  })
+
+  describe('resolveEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(resolveEffort('claude-haiku-4.5', 'max')).toBeUndefined()
+    })
+
+    test('returns effort as-is for supported levels', () => {
+      expect(resolveEffort('claude-opus-4.8', 'low')).toBe('low')
+      expect(resolveEffort('claude-opus-4.8', 'max')).toBe('max')
+      expect(resolveEffort('claude-opus-4.8', 'xhigh')).toBe('xhigh')
+    })
+
+    test('clamps xhigh to max for models without xhigh support', () => {
+      expect(resolveEffort('claude-sonnet-4.6', 'xhigh')).toBe('max')
+      expect(resolveEffort('claude-opus-4.6', 'xhigh')).toBe('max')
+    })
+  })
+
+  describe('budgetToEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(budgetToEffort(100000, 'claude-haiku-4.5')).toBeUndefined()
+    })
+
+    test('maps budget ranges correctly', () => {
+      expect(budgetToEffort(5000, 'claude-opus-4.8')).toBe('low')
+      expect(budgetToEffort(20000, 'claude-opus-4.8')).toBe('medium')
+      expect(budgetToEffort(50000, 'claude-opus-4.8')).toBe('high')
+      expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('xhigh')
+      expect(budgetToEffort(128000, 'claude-opus-4.8')).toBe('max')
+    })
+
+    test('maps to max instead of xhigh for non-xhigh models', () => {
+      expect(budgetToEffort(80000, 'claude-sonnet-4.6')).toBe('max')
+    })
+  })
+
+  describe('getEffectiveEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(getEffectiveEffort('claude-haiku-4.5', true, 100000)).toBeUndefined()
+    })
+
+    test('uses explicit config when provided', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 20000, 'max')).toBe('max')
+      expect(getEffectiveEffort('claude-opus-4.8', false, 20000, 'high')).toBe('high')
+    })
+
+    test('returns undefined when not thinking and no config', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', false, 20000)).toBeUndefined()
+    })
+
+    test('uses budget mapping when thinking and auto-mapping enabled', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, true)).toBe('max')
+      expect(getEffectiveEffort('claude-opus-4.8', true, 20000, undefined, true)).toBe('medium')
+    })
+
+    test('falls back to medium when auto-mapping disabled', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, false)).toBe('medium')
+    })
+  })
+})
diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts
index f1ff1da..f57382b 100644
--- a/src/core/request/request-handler.ts
+++ b/src/core/request/request-handler.ts
@@ -7,7 +7,7 @@ import * as logger from '../../plugin/logger'
 import { transformToSdkRequest } from '../../plugin/request'
 import { createSdkClient } from '../../plugin/sdk-client'
 import { syncFromKiroCli } from '../../plugin/sync/kiro-cli'
-import type { KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types'
+import type { Effort, KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types'
 import { AccountSelector } from '../account/account-selector'
 import { UsageTracker } from '../account/usage-tracker'
 import { TokenRefresher } from '../auth/token-refresher'
@@ -134,7 +134,7 @@ export class RequestHandler {
       }
 
       try {
-        const client = createSdkClient(auth, sdkPrep.region)
+        const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort)
         const command = new GenerateAssistantResponseCommand({
           conversationState: sdkPrep.conversationState as any,
           profileArn: sdkPrep.profileArn
@@ -219,7 +219,10 @@ export class RequestHandler {
     budget: number,
     showToast?: (message: string, variant: 'info' | 'warning' | 'success' | 'error') => void
   ): SdkPreparedRequest {
-    return transformToSdkRequest(body, model, auth, think, budget, showToast)
+    return transformToSdkRequest(body, model, auth, think, budget, showToast, {
+      effort: this.config.effort,
+      autoEffortMapping: this.config.auto_effort_mapping
+    })
   }
 
   private handleSuccessfulRequest(acc: ManagedAccount): void {
diff --git a/src/plugin/config/schema.ts b/src/plugin/config/schema.ts
index 2aa0e39..18cd3cf 100644
--- a/src/plugin/config/schema.ts
+++ b/src/plugin/config/schema.ts
@@ -3,6 +3,17 @@ import { z } from 'zod'
 export const AccountSelectionStrategySchema = z.enum(['sticky', 'round-robin', 'lowest-usage'])
 export type AccountSelectionStrategy = z.infer<typeof AccountSelectionStrategySchema>
 
+/**
+ * Kiro effort levels control thinking/reasoning depth.
+ * - low: minimal reasoning
+ * - medium: balanced (default when thinking enabled)
+ * - high: deeper reasoning
+ * - xhigh: extended reasoning (opus-4.7, opus-4.8 only)
+ * - max: maximum reasoning depth (128k thinking tokens on opus-4.7/4.8)
+ */
+export const EffortSchema = z.enum(['low', 'medium', 'high', 'xhigh', 'max'])
+export type Effort = z.infer<typeof EffortSchema>
+
 export const RegionSchema = z.enum([
   'us-east-1',
   'us-east-2',
@@ -70,7 +81,21 @@ export const KiroConfigSchema = z.object({
 
   usage_tracking_enabled: z.boolean().default(true),
   auto_sync_kiro_cli: z.boolean().default(true),
-  enable_log_api_request: z.boolean().default(false)
+  enable_log_api_request: z.boolean().default(false),
+
+  /**
+   * Default effort level for thinking models. Controls reasoning depth.
+   * When set, this overrides the automatic budget-based mapping.
+   * Values: 'low', 'medium', 'high', 'xhigh' (opus-4.7/4.8 only), 'max'
+   */
+  effort: EffortSchema.optional(),
+
+  /**
+   * Enable automatic effort mapping from OpenCode's thinking budget.
+   * When true (default), maps budget ranges to effort levels.
+   * When false, only uses explicit effort config or falls back to 'medium'.
+   */
+  auto_effort_mapping: z.boolean().default(true)
 })
 
 export type KiroConfig = z.infer<typeof KiroConfigSchema>
@@ -88,5 +113,6 @@ export const DEFAULT_CONFIG: KiroConfig = {
   auth_server_port_range: 10,
   usage_tracking_enabled: true,
   auto_sync_kiro_cli: true,
-  enable_log_api_request: false
+  enable_log_api_request: false,
+  auto_effort_mapping: true
 }
diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts
new file mode 100644
index 0000000..7f99f8a
--- /dev/null
+++ b/src/plugin/effort.ts
@@ -0,0 +1,132 @@
+import type { Effort } from './config/schema'
+
+/**
+ * Effort levels ordered from lowest to highest reasoning depth.
+ */
+export const EFFORT_LEVELS: readonly Effort[] = ['low', 'medium', 'high', 'xhigh', 'max'] as const
+
+/**
+ * Models that support the 5-value effort enum (including xhigh).
+ * These models support up to 128k thinking tokens with max effort.
+ */
+const XHIGH_CAPABLE_MODELS = new Set([
+  'claude-opus-4.7',
+  'claude-opus-4.8'
+])
+
+/**
+ * Models that support the 4-value effort enum (no xhigh).
+ * xhigh requests on these models are clamped to max.
+ */
+const EFFORT_CAPABLE_MODELS = new Set([
+  'claude-opus-4.5',
+  'claude-opus-4.6',
+  'claude-opus-4.6-1m',
+  'claude-sonnet-4.5',
+  'claude-sonnet-4.5-1m',
+  'claude-sonnet-4.6',
+  'claude-sonnet-4.6-1m',
+  ...XHIGH_CAPABLE_MODELS
+])
+
+/**
+ * Check if a model supports the effort parameter.
+ */
+export function supportsEffort(kiroModel: string): boolean {
+  return EFFORT_CAPABLE_MODELS.has(kiroModel)
+}
+
+/**
+ * Check if a model supports xhigh effort level.
+ */
+export function supportsXHighEffort(kiroModel: string): boolean {
+  return XHIGH_CAPABLE_MODELS.has(kiroModel)
+}
+
+/**
+ * Resolve effort level for a given model.
+ * - Returns undefined if model doesn't support effort
+ * - Clamps xhigh to max for models that don't support it
+ */
+export function resolveEffort(kiroModel: string, requested: Effort): Effort | undefined {
+  if (!supportsEffort(kiroModel)) {
+    return undefined
+  }
+
+  // xhigh is only supported on opus-4.7 and opus-4.8
+  if (requested === 'xhigh' && !supportsXHighEffort(kiroModel)) {
+    return 'max'
+  }
+
+  return requested
+}
+
+/**
+ * Map OpenCode thinking budget to Kiro effort level.
+ * 
+ * Budget ranges (approximate thinking token allocations):
+ * - low:    minimal thinking
+ * - medium: ~20k tokens (OpenCode default)
+ * - high:   ~50k tokens
+ * - xhigh:  ~80k tokens (opus-4.7/4.8 only)
+ * - max:    ~128k tokens
+ */
+export function budgetToEffort(budget: number, kiroModel: string): Effort | undefined {
+  if (!supportsEffort(kiroModel)) {
+    return undefined
+  }
+
+  let effort: Effort
+  if (budget <= 10000) {
+    effort = 'low'
+  } else if (budget <= 30000) {
+    effort = 'medium'
+  } else if (budget <= 60000) {
+    effort = 'high'
+  } else if (budget <= 100000) {
+    effort = supportsXHighEffort(kiroModel) ? 'xhigh' : 'max'
+  } else {
+    effort = 'max'
+  }
+
+  return effort
+}
+
+/**
+ * Get the effective effort level based on config, budget, and model.
+ * 
+ * Priority:
+ * 1. Explicit effort config (if set)
+ * 2. Budget-to-effort mapping (if auto_effort_mapping enabled and thinking)
+ * 3. 'medium' default (if thinking enabled)
+ * 4. undefined (if not thinking)
+ */
+export function getEffectiveEffort(
+  kiroModel: string,
+  thinking: boolean,
+  budget: number,
+  configEffort?: Effort,
+  autoEffortMapping = true
+): Effort | undefined {
+  if (!supportsEffort(kiroModel)) {
+    return undefined
+  }
+
+  // Explicit config takes precedence
+  if (configEffort) {
+    return resolveEffort(kiroModel, configEffort)
+  }
+
+  // If not thinking, no effort needed
+  if (!thinking) {
+    return undefined
+  }
+
+  // Auto-map budget to effort
+  if (autoEffortMapping) {
+    return budgetToEffort(budget, kiroModel)
+  }
+
+  // Default to medium when thinking without auto-mapping
+  return 'medium'
+}
diff --git a/src/plugin/request.ts b/src/plugin/request.ts
index 71542f7..a9459ef 100644
--- a/src/plugin/request.ts
+++ b/src/plugin/request.ts
@@ -16,6 +16,7 @@ import {
   convertToolsToCodeWhisperer,
   deduplicateToolResults
 } from '../infrastructure/transformers/tool-transformer.js'
+import { getEffectiveEffort } from './effort.js'
 import {
   convertImagesToKiroFormat,
   extractAllImages,
@@ -24,6 +25,7 @@ import {
 import { resolveKiroModel } from './models.js'
 import type {
   CodeWhispererRequest,
+  Effort,
   KiroAuthDetails,
   PreparedRequest,
   SdkPreparedRequest
@@ -35,6 +37,11 @@ interface TransformResult {
   convId: string
 }
 
+interface EffortConfig {
+  effort?: Effort
+  autoEffortMapping?: boolean
+}
+
 type ToastFunction = (message: string, variant: 'info' | 'warning' | 'success' | 'error') => void
 
 function buildCodeWhispererRequest(
@@ -317,7 +324,8 @@ export function transformToSdkRequest(
   auth: KiroAuthDetails,
   think = false,
   budget = 20000,
-  showToast?: ToastFunction
+  showToast?: ToastFunction,
+  effortConfig?: EffortConfig
 ): SdkPreparedRequest {
   const { request, resolved, convId } = buildCodeWhispererRequest(
     body,
@@ -327,12 +335,23 @@ export function transformToSdkRequest(
     budget,
     showToast
   )
+
+  // Resolve effort level based on config and model capabilities
+  const effort = getEffectiveEffort(
+    resolved,
+    think,
+    budget,
+    effortConfig?.effort,
+    effortConfig?.autoEffortMapping ?? true
+  )
+
   return {
     conversationState: request.conversationState,
     profileArn: request.profileArn,
     streaming: true,
     effectiveModel: resolved,
     conversationId: convId,
-    region: extractRegionFromArn(auth.profileArn) ?? auth.region
+    region: extractRegionFromArn(auth.profileArn) ?? auth.region,
+    effort
   }
 }
diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts
index 5bfb5ac..d4eb39d 100644
--- a/src/plugin/sdk-client.ts
+++ b/src/plugin/sdk-client.ts
@@ -1,18 +1,29 @@
 import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client'
 import { KIRO_CONSTANTS } from '../constants.js'
-import type { KiroAuthDetails } from './types'
+import type { Effort, KiroAuthDetails } from './types'
 
-const clientCache = new Map<string, { client: CodeWhispererStreamingClient; token: string }>()
+/**
+ * Cache key includes effort to ensure separate clients for different effort levels,
+ * since middleware is configured at client creation time.
+ */
+interface ClientCacheEntry {
+  client: CodeWhispererStreamingClient
+  token: string
+  effort?: Effort
+}
+
+const clientCache = new Map<string, ClientCacheEntry>()
 const KIRO_CLI_MAX_ATTEMPTS = 3
 
 export function createSdkClient(
   auth: KiroAuthDetails,
-  region: string
+  region: string,
+  effort?: Effort
 ): CodeWhispererStreamingClient {
-  const cacheKey = `${region}:${auth.email || 'default'}`
+  const cacheKey = `${region}:${auth.email || 'default'}:${effort || 'none'}`
   const cached = clientCache.get(cacheKey)
 
-  if (cached && cached.token === auth.access) {
+  if (cached && cached.token === auth.access && cached.effort === effort) {
     return cached.client
   }
 
@@ -26,6 +37,7 @@ export function createSdkClient(
     customUserAgent: [[KIRO_CONSTANTS.USER_AGENT]]
   })
 
+  // Add Kiro-specific headers
   client.middlewareStack.add(
     (next: any) => async (args: any) => {
       args.request.headers['x-amzn-kiro-agent-mode'] = 'vibe'
@@ -34,7 +46,32 @@ export function createSdkClient(
     { step: 'build', name: 'addKiroHeaders' }
   )
 
-  clientCache.set(cacheKey, { client, token })
+  // Inject additionalModelRequestFields for effort-based thinking control
+  if (effort) {
+    client.middlewareStack.add(
+      (next: any) => async (args: any) => {
+        // The SDK serializes input to args.input, we need to modify the body
+        // before it's sent. The body is in args.request.body as a string.
+        if (args.request?.body) {
+          try {
+            const body = JSON.parse(args.request.body)
+            body.additionalModelRequestFields = {
+              output_config: {
+                effort
+              }
+            }
+            args.request.body = JSON.stringify(body)
+          } catch {
+            // If body parsing fails, continue without modification
+          }
+        }
+        return next(args)
+      },
+      { step: 'build', name: 'addEffortConfig', priority: 'low' }
+    )
+  }
+
+  clientCache.set(cacheKey, { client, token, effort })
   return client
 }
 
diff --git a/src/plugin/types.ts b/src/plugin/types.ts
index e8b05d3..a17e0d8 100644
--- a/src/plugin/types.ts
+++ b/src/plugin/types.ts
@@ -1,8 +1,9 @@
 import z from 'zod'
-import { RegionSchema } from './config/schema'
+import { EffortSchema, RegionSchema } from './config/schema'
 
 export type KiroAuthMethod = 'idc' | 'desktop'
 export type KiroRegion = z.infer<typeof RegionSchema>
+export type Effort = z.infer<typeof EffortSchema>
 
 export interface KiroAuthDetails {
   refresh: string
@@ -119,6 +120,8 @@ export interface SdkPreparedRequest {
   effectiveModel: string
   conversationId: string
   region: string
+  /** Resolved effort level for thinking models */
+  effort?: Effort
 }
 
 export type AccountSelectionStrategy = 'sticky' | 'round-robin' | 'lowest-usage'

From 0f27667f0b329028b371f3edbd4c16d0a249f646 Mon Sep 17 00:00:00 2001
From: GuidanNick <224735395+guidan-nick@users.noreply.github.com>
Date: Sun, 21 Jun 2026 01:14:54 +0200
Subject: [PATCH 2/5] add effort debug logging for troubleshooting

---
 package-lock.json                   | 2 ++
 src/core/request/request-handler.ts | 3 +++
 src/plugin/effort.ts                | 4 ++--
 src/plugin/sdk-client.ts            | 4 ++++
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 0221348..88b0db9 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -2021,6 +2021,7 @@
       "integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "prettier": "bin/prettier.cjs"
       },
@@ -2251,6 +2252,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts
index f57382b..cc8b28b 100644
--- a/src/core/request/request-handler.ts
+++ b/src/core/request/request-handler.ts
@@ -132,6 +132,9 @@ export class RequestHandler {
       if (apiTimestamp) {
         this.logSdkRequest(sdkPrep, acc, apiTimestamp)
       }
+      if (sdkPrep.effort) {
+        logger.log(`[Effort] Resolved effort: ${sdkPrep.effort} for model: ${sdkPrep.effectiveModel}`)
+      }
 
       try {
         const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort)
diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts
index 7f99f8a..24e97cd 100644
--- a/src/plugin/effort.ts
+++ b/src/plugin/effort.ts
@@ -96,7 +96,7 @@ export function budgetToEffort(budget: number, kiroModel: string): Effort | unde
  * Get the effective effort level based on config, budget, and model.
  * 
  * Priority:
- * 1. Explicit effort config (if set)
+ * 1. Explicit effort config (if set) - always applied regardless of thinking state
  * 2. Budget-to-effort mapping (if auto_effort_mapping enabled and thinking)
  * 3. 'medium' default (if thinking enabled)
  * 4. undefined (if not thinking)
@@ -112,7 +112,7 @@ export function getEffectiveEffort(
     return undefined
   }
 
-  // Explicit config takes precedence
+  // Explicit config takes precedence - always applied even without thinking
   if (configEffort) {
     return resolveEffort(kiroModel, configEffort)
   }
diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts
index d4eb39d..5bdc1df 100644
--- a/src/plugin/sdk-client.ts
+++ b/src/plugin/sdk-client.ts
@@ -1,5 +1,6 @@
 import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client'
 import { KIRO_CONSTANTS } from '../constants.js'
+import * as logger from './logger.js'
 import type { Effort, KiroAuthDetails } from './types'
 
 /**
@@ -48,6 +49,7 @@ export function createSdkClient(
 
   // Inject additionalModelRequestFields for effort-based thinking control
   if (effort) {
+    logger.log(`[Effort] Adding middleware to inject effort: ${effort}`)
     client.middlewareStack.add(
       (next: any) => async (args: any) => {
         // The SDK serializes input to args.input, we need to modify the body
@@ -61,8 +63,10 @@ export function createSdkClient(
               }
             }
             args.request.body = JSON.stringify(body)
+            logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`)
           } catch {
             // If body parsing fails, continue without modification
+            logger.warn('[Effort] Failed to parse request body for effort injection')
           }
         }
         return next(args)

From fad2d3015675fcccc51333b7cdfb70a68bb2577f Mon Sep 17 00:00:00 2001
From: GuidanNick <224735395+guidan-nick@users.noreply.github.com>
Date: Sun, 21 Jun 2026 01:24:06 +0200
Subject: [PATCH 3/5] fix: read thinkingConfig from top-level body field

OpenCode sends thinkingBudget in body.thinkingConfig (not body.providerOptions.thinkingConfig).
Update budget extraction to check both locations.

Also update budget-to-effort mapping to align with OpenCode's standard variant values:
- 8192 (low) -> effort low
- 16384 (medium) -> effort medium
- 24576 (high) -> effort high
- 32768 (max) -> effort max
---
 src/core/request/request-handler.ts | 13 +++++++++++--
 src/plugin/effort.ts                | 24 ++++++++++++++----------
 src/plugin/sdk-client.ts            |  8 ++++++++
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts
index cc8b28b..7ff0380 100644
--- a/src/core/request/request-handler.ts
+++ b/src/core/request/request-handler.ts
@@ -79,8 +79,17 @@ export class RequestHandler {
   ): Promise<Response> {
     const body = init?.body ? JSON.parse(init.body) : {}
     const model = this.extractModel(url) || body.model || 'claude-sonnet-4-5'
-    const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig
-    const budget = body.providerOptions?.thinkingConfig?.thinkingBudget || 20000
+    const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig
+    const budget =
+      body.providerOptions?.thinkingConfig?.thinkingBudget ||
+      body.thinkingConfig?.thinkingBudget ||
+      body.thinkingConfig?.budget_tokens ||
+      20000
+
+    // Debug: log what OpenCode sends us
+    logger.log(`[Debug] body keys: ${Object.keys(body).join(', ')}`)
+    logger.log(`[Debug] body.thinkingConfig: ${JSON.stringify(body.thinkingConfig)}`)
+    logger.log(`[Debug] model: ${model}, think: ${think}, budget: ${budget}`)
 
     let retry = 0
     let consecutiveNullAccounts = 0
diff --git a/src/plugin/effort.ts b/src/plugin/effort.ts
index 24e97cd..94d37bc 100644
--- a/src/plugin/effort.ts
+++ b/src/plugin/effort.ts
@@ -64,12 +64,18 @@ export function resolveEffort(kiroModel: string, requested: Effort): Effort | un
 /**
  * Map OpenCode thinking budget to Kiro effort level.
  * 
- * Budget ranges (approximate thinking token allocations):
- * - low:    minimal thinking
- * - medium: ~20k tokens (OpenCode default)
- * - high:   ~50k tokens
- * - xhigh:  ~80k tokens (opus-4.7/4.8 only)
- * - max:    ~128k tokens
+ * OpenCode sends thinkingBudget from its variant config. Standard values:
+ * - low:    8192
+ * - medium: 16384
+ * - high:   24576
+ * - max:    32768
+ * 
+ * We map these ranges to Kiro effort levels:
+ * - ≤10000  → low
+ * - ≤20000  → medium
+ * - ≤28000  → high
+ * - ≤32768  → max (or xhigh on opus-4.7/4.8, max otherwise)
+ * - >32768  → max
  */
 export function budgetToEffort(budget: number, kiroModel: string): Effort | undefined {
   if (!supportsEffort(kiroModel)) {
@@ -79,12 +85,10 @@ export function budgetToEffort(budget: number, kiroModel: string): Effort | unde
   let effort: Effort
   if (budget <= 10000) {
     effort = 'low'
-  } else if (budget <= 30000) {
+  } else if (budget <= 20000) {
     effort = 'medium'
-  } else if (budget <= 60000) {
+  } else if (budget <= 28000) {
     effort = 'high'
-  } else if (budget <= 100000) {
-    effort = supportsXHighEffort(kiroModel) ? 'xhigh' : 'max'
   } else {
     effort = 'max'
   }
diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts
index 5bdc1df..7a3281b 100644
--- a/src/plugin/sdk-client.ts
+++ b/src/plugin/sdk-client.ts
@@ -64,10 +64,18 @@ export function createSdkClient(
             }
             args.request.body = JSON.stringify(body)
             logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`)
+            // Dump the top-level keys of final request body for verification
+            logger.log(`[Effort] Final request body keys: ${Object.keys(body).join(', ')}`)
+            logger.log(`[Effort] Final body.additionalModelRequestFields: ${JSON.stringify(body.additionalModelRequestFields)}`)
           } catch {
             // If body parsing fails, continue without modification
             logger.warn('[Effort] Failed to parse request body for effort injection')
           }
+        } else {
+          logger.warn('[Effort] No args.request.body found - checking args structure')
+          logger.log(`[Effort] args keys: ${Object.keys(args || {}).join(', ')}`)
+          logger.log(`[Effort] args.request keys: ${Object.keys(args?.request || {}).join(', ')}`)
+          logger.log(`[Effort] args.input keys: ${Object.keys(args?.input || {}).join(', ')}`)
         }
         return next(args)
       },

From 494baf46222a44c4f002bcd885bf9dc2e66b1fae Mon Sep 17 00:00:00 2001
From: GuidanNick <224735395+guidan-nick@users.noreply.github.com>
Date: Sun, 21 Jun 2026 08:18:17 +0200
Subject: [PATCH 4/5] fix effort docs and logging

---
 README.md                           | 54 +++++++++++++++++++++++++++++
 src/__tests__/effort.test.ts        |  8 ++---
 src/core/request/request-handler.ts | 14 ++------
 src/plugin/sdk-client.ts            | 12 -------
 4 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 4e646a4..89553df 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ models with substantial trial quotas.
   SQLite.
 - **Native Thinking Mode**: Full support for Claude reasoning capabilities via virtual
   model mappings.
+- **Kiro Effort Mapping**: Maps OpenCode thinking budgets to Kiro's native effort
+  levels automatically.
 - **Automated Recovery**: Exponential backoff for rate limits and automated token
   refresh.
 
@@ -46,6 +48,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -61,6 +64,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -81,6 +85,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -96,6 +101,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -111,6 +117,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -126,6 +133,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -146,6 +154,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -177,6 +186,48 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
 }
 ```
 
+### Thinking Effort Configuration
+
+Configure Kiro effort per model in your OpenCode provider model definitions by setting
+`thinkingConfig.thinkingBudget` on each model variant. The plugin automatically maps
+those budgets to Kiro's native `effort` field for supported Claude models, so you do
+not need to hardcode a global `effort` value in `~/.config/opencode/kiro.json`.
+
+```json
+{
+  "provider": {
+    "kiro": {
+      "models": {
+        "claude-opus-4-7-thinking": {
+          "name": "Claude Opus 4.7 Thinking",
+          "limit": { "context": 1000000, "output": 64000 },
+          "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] },
+          "variants": {
+            "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
+            "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
+            "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+Budget mapping:
+
+| OpenCode budget | Kiro effort |
+| --------------- | ----------- |
+| `<= 10000` | `low` |
+| `<= 20000` | `medium` |
+| `<= 28000` | `high` |
+| `> 28000` | `max` |
+
+Use `~/.config/opencode/kiro.json` for plugin-wide behavior such as auth sync,
+account selection, retry limits, and `auto_effort_mapping`. A top-level `effort`
+setting is a global override for all supported models, not a per-model setting.
+
 ## Setup
 
 1. **Authentication via Kiro CLI (Recommended)**:
@@ -294,6 +345,7 @@ Edit `~/.config/opencode/kiro.json`:
   "token_expiry_buffer_ms": 120000,
   "usage_sync_max_retries": 3,
   "usage_tracking_enabled": true,
+  "auto_effort_mapping": true,
   "enable_log_api_request": false
 }
 ```
@@ -318,6 +370,8 @@ Edit `~/.config/opencode/kiro.json`:
 - `auth_server_port_start`: Legacy/ignored (no local auth server).
 - `auth_server_port_range`: Legacy/ignored (no local auth server).
 - `usage_tracking_enabled`: Enable usage tracking and toast notifications.
+- `auto_effort_mapping`: Automatically map OpenCode thinking budgets to Kiro effort
+  levels for supported models (default: `true`).
 - `enable_log_api_request`: Enable detailed API request logging.
 
 ## Storage
diff --git a/src/__tests__/effort.test.ts b/src/__tests__/effort.test.ts
index d475a58..7f109c2 100644
--- a/src/__tests__/effort.test.ts
+++ b/src/__tests__/effort.test.ts
@@ -58,10 +58,10 @@ describe('effort module', () => {
 
     test('maps budget ranges correctly', () => {
       expect(budgetToEffort(5000, 'claude-opus-4.8')).toBe('low')
-      expect(budgetToEffort(20000, 'claude-opus-4.8')).toBe('medium')
-      expect(budgetToEffort(50000, 'claude-opus-4.8')).toBe('high')
-      expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('xhigh')
-      expect(budgetToEffort(128000, 'claude-opus-4.8')).toBe('max')
+      expect(budgetToEffort(16384, 'claude-opus-4.8')).toBe('medium')
+      expect(budgetToEffort(24576, 'claude-opus-4.8')).toBe('high')
+      expect(budgetToEffort(32768, 'claude-opus-4.8')).toBe('max')
+      expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('max')
     })
 
     test('maps to max instead of xhigh for non-xhigh models', () => {
diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts
index 7ff0380..7dcfdf4 100644
--- a/src/core/request/request-handler.ts
+++ b/src/core/request/request-handler.ts
@@ -7,7 +7,7 @@ import * as logger from '../../plugin/logger'
 import { transformToSdkRequest } from '../../plugin/request'
 import { createSdkClient } from '../../plugin/sdk-client'
 import { syncFromKiroCli } from '../../plugin/sync/kiro-cli'
-import type { Effort, KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types'
+import type { KiroAuthDetails, ManagedAccount, SdkPreparedRequest } from '../../plugin/types'
 import { AccountSelector } from '../account/account-selector'
 import { UsageTracker } from '../account/usage-tracker'
 import { TokenRefresher } from '../auth/token-refresher'
@@ -79,18 +79,14 @@ export class RequestHandler {
   ): Promise<Response> {
     const body = init?.body ? JSON.parse(init.body) : {}
     const model = this.extractModel(url) || body.model || 'claude-sonnet-4-5'
-    const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig
+    const think =
+      model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig
     const budget =
       body.providerOptions?.thinkingConfig?.thinkingBudget ||
       body.thinkingConfig?.thinkingBudget ||
       body.thinkingConfig?.budget_tokens ||
       20000
 
-    // Debug: log what OpenCode sends us
-    logger.log(`[Debug] body keys: ${Object.keys(body).join(', ')}`)
-    logger.log(`[Debug] body.thinkingConfig: ${JSON.stringify(body.thinkingConfig)}`)
-    logger.log(`[Debug] model: ${model}, think: ${think}, budget: ${budget}`)
-
     let retry = 0
     let consecutiveNullAccounts = 0
     const retryContext = this.retryStrategy.createContext()
@@ -141,10 +137,6 @@ export class RequestHandler {
       if (apiTimestamp) {
         this.logSdkRequest(sdkPrep, acc, apiTimestamp)
       }
-      if (sdkPrep.effort) {
-        logger.log(`[Effort] Resolved effort: ${sdkPrep.effort} for model: ${sdkPrep.effectiveModel}`)
-      }
-
       try {
         const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort)
         const command = new GenerateAssistantResponseCommand({
diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts
index 7a3281b..d4eb39d 100644
--- a/src/plugin/sdk-client.ts
+++ b/src/plugin/sdk-client.ts
@@ -1,6 +1,5 @@
 import { CodeWhispererStreamingClient } from '@aws/codewhisperer-streaming-client'
 import { KIRO_CONSTANTS } from '../constants.js'
-import * as logger from './logger.js'
 import type { Effort, KiroAuthDetails } from './types'
 
 /**
@@ -49,7 +48,6 @@ export function createSdkClient(
 
   // Inject additionalModelRequestFields for effort-based thinking control
   if (effort) {
-    logger.log(`[Effort] Adding middleware to inject effort: ${effort}`)
     client.middlewareStack.add(
       (next: any) => async (args: any) => {
         // The SDK serializes input to args.input, we need to modify the body
@@ -63,19 +61,9 @@ export function createSdkClient(
               }
             }
             args.request.body = JSON.stringify(body)
-            logger.log(`[Effort] Injected additionalModelRequestFields.output_config.effort=${effort}`)
-            // Dump the top-level keys of final request body for verification
-            logger.log(`[Effort] Final request body keys: ${Object.keys(body).join(', ')}`)
-            logger.log(`[Effort] Final body.additionalModelRequestFields: ${JSON.stringify(body.additionalModelRequestFields)}`)
           } catch {
             // If body parsing fails, continue without modification
-            logger.warn('[Effort] Failed to parse request body for effort injection')
           }
-        } else {
-          logger.warn('[Effort] No args.request.body found - checking args structure')
-          logger.log(`[Effort] args keys: ${Object.keys(args || {}).join(', ')}`)
-          logger.log(`[Effort] args.request keys: ${Object.keys(args?.request || {}).join(', ')}`)
-          logger.log(`[Effort] args.input keys: ${Object.keys(args?.input || {}).join(', ')}`)
         }
         return next(args)
       },

From 4504fb21407a74829a89910b77a7d1342cb79108 Mon Sep 17 00:00:00 2001
From: GuidanNick <224735395+guidan-nick@users.noreply.github.com>
Date: Sun, 21 Jun 2026 08:54:58 +0200
Subject: [PATCH 5/5] fix effort request content length

---
 src/__tests__/sdk-client.test.ts | 45 ++++++++++++++++++++++++++++++++
 src/plugin/sdk-client.ts         |  2 +-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/__tests__/sdk-client.test.ts b/src/__tests__/sdk-client.test.ts
index 52f4dd0..4a7c825 100644
--- a/src/__tests__/sdk-client.test.ts
+++ b/src/__tests__/sdk-client.test.ts
@@ -1,3 +1,4 @@
+import { GenerateAssistantResponseCommand } from '@aws/codewhisperer-streaming-client'
 import { describe, expect, test } from 'bun:test'
 import { clearSdkClientCache, createSdkClient } from '../plugin/sdk-client'
 import type { KiroAuthDetails } from '../plugin/types'
@@ -25,4 +26,48 @@ describe('SDK client', () => {
 
     clearSdkClientCache()
   })
+
+  test('injects effort before content-length is computed', async () => {
+    clearSdkClientCache()
+
+    const client = createSdkClient(auth(), 'us-east-1', 'max')
+    let capturedRequest: any
+
+    client.middlewareStack.add(
+      () => async (args: any) => {
+        capturedRequest = args.request
+        throw new Error('captured-request')
+      },
+      { step: 'finalizeRequest', name: 'captureRequest', priority: 'high' }
+    )
+
+    const command = new GenerateAssistantResponseCommand({
+      conversationState: {
+        chatTriggerType: 'MANUAL',
+        conversationId: 'test-conversation',
+        currentMessage: {
+          userInputMessage: {
+            content: 'hello',
+            modelId: 'claude-opus-4.7',
+            origin: 'AI_EDITOR'
+          }
+        }
+      }
+    })
+
+    await client.send(command).catch((error) => {
+      if (error.message !== 'captured-request') throw error
+    })
+
+    const bodyText =
+      typeof capturedRequest.body === 'string'
+        ? capturedRequest.body
+        : Buffer.from(capturedRequest.body).toString('utf8')
+    const body = JSON.parse(bodyText)
+
+    expect(body.additionalModelRequestFields.output_config.effort).toBe('max')
+    expect(Number(capturedRequest.headers['content-length'])).toBe(Buffer.byteLength(bodyText))
+
+    clearSdkClientCache()
+  })
 })
diff --git a/src/plugin/sdk-client.ts b/src/plugin/sdk-client.ts
index d4eb39d..9ec4d56 100644
--- a/src/plugin/sdk-client.ts
+++ b/src/plugin/sdk-client.ts
@@ -67,7 +67,7 @@ export function createSdkClient(
         }
         return next(args)
       },
-      { step: 'build', name: 'addEffortConfig', priority: 'low' }
+      { step: 'build', name: 'addEffortConfig', priority: 'high' }
     )
   }