tickernelz · tickernelz · Jun 22, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ models with substantial trial quotas.
   SQLite.
 - **Native Thinking Mode**: Full support for Claude reasoning capabilities via virtual
   model mappings.
+- **Kiro Effort Mapping**: Maps OpenCode thinking budgets to Kiro's native effort
+  levels automatically.
 - **Automated Recovery**: Exponential backoff for rate limits and automated token
   refresh.
 
@@ -46,6 +48,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -61,6 +64,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -81,6 +85,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -96,6 +101,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -111,6 +117,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -126,6 +133,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -146,6 +154,7 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
           "variants": {
             "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
             "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
             "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
           }
         },
@@ -177,6 +186,48 @@ Add the plugin to your `opencode.json` or `opencode.jsonc`:
 }
 ```
 
+### Thinking Effort Configuration
+
+Configure Kiro effort per model in your OpenCode provider model definitions by setting
+`thinkingConfig.thinkingBudget` on each model variant. The plugin automatically maps
+those budgets to Kiro's native `effort` field for supported Claude models, so you do
+not need to hardcode a global `effort` value in `~/.config/opencode/kiro.json`.
+
+```json
+{
+  "provider": {
+    "kiro": {
+      "models": {
+        "claude-opus-4-7-thinking": {
+          "name": "Claude Opus 4.7 Thinking",
+          "limit": { "context": 1000000, "output": 64000 },
+          "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] },
+          "variants": {
+            "low": { "thinkingConfig": { "thinkingBudget": 8192 } },
+            "medium": { "thinkingConfig": { "thinkingBudget": 16384 } },
+            "high": { "thinkingConfig": { "thinkingBudget": 24576 } },
+            "max": { "thinkingConfig": { "thinkingBudget": 32768 } }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+Budget mapping:
+
+| OpenCode budget | Kiro effort |
+| --------------- | ----------- |
+| `<= 10000` | `low` |
+| `<= 20000` | `medium` |
+| `<= 28000` | `high` |
+| `> 28000` | `max` |
+
+Use `~/.config/opencode/kiro.json` for plugin-wide behavior such as auth sync,
+account selection, retry limits, and `auto_effort_mapping`. A top-level `effort`
+setting is a global override for all supported models, not a per-model setting.
+
 ## Setup
 
 1. **Authentication via Kiro CLI (Recommended)**:
@@ -294,6 +345,7 @@ Edit `~/.config/opencode/kiro.json`:
   "token_expiry_buffer_ms": 120000,
   "usage_sync_max_retries": 3,
   "usage_tracking_enabled": true,
+  "auto_effort_mapping": true,
   "enable_log_api_request": false
 }
 ```
@@ -318,6 +370,8 @@ Edit `~/.config/opencode/kiro.json`:
 - `auth_server_port_start`: Legacy/ignored (no local auth server).
 - `auth_server_port_range`: Legacy/ignored (no local auth server).
 - `usage_tracking_enabled`: Enable usage tracking and toast notifications.
+- `auto_effort_mapping`: Automatically map OpenCode thinking budgets to Kiro effort
+  levels for supported models (default: `true`).
 - `enable_log_api_request`: Enable detailed API request logging.
 
 ## Storage

diff --git a/package-lock.json b/package-lock.json
diff --git a/src/__tests__/effort.test.ts b/src/__tests__/effort.test.ts
@@ -0,0 +1,95 @@
+import { describe, expect, test } from 'bun:test'
+import {
+  budgetToEffort,
+  getEffectiveEffort,
+  resolveEffort,
+  supportsEffort,
+  supportsXHighEffort
+} from '../plugin/effort.js'
+
+describe('effort module', () => {
+  describe('supportsEffort', () => {
+    test('returns true for supported models', () => {
+      expect(supportsEffort('claude-opus-4.8')).toBe(true)
+      expect(supportsEffort('claude-opus-4.7')).toBe(true)
+      expect(supportsEffort('claude-sonnet-4.6')).toBe(true)
+      expect(supportsEffort('claude-sonnet-4.6-1m')).toBe(true)
+    })
+
+    test('returns false for unsupported models', () => {
+      expect(supportsEffort('claude-haiku-4.5')).toBe(false)
+      expect(supportsEffort('unknown-model')).toBe(false)
+    })
+  })
+
+  describe('supportsXHighEffort', () => {
+    test('returns true for opus 4.7 and 4.8', () => {
+      expect(supportsXHighEffort('claude-opus-4.8')).toBe(true)
+      expect(supportsXHighEffort('claude-opus-4.7')).toBe(true)
+    })
+
+    test('returns false for other models', () => {
+      expect(supportsXHighEffort('claude-opus-4.6')).toBe(false)
+      expect(supportsXHighEffort('claude-sonnet-4.6')).toBe(false)
+    })
+  })
+
+  describe('resolveEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(resolveEffort('claude-haiku-4.5', 'max')).toBeUndefined()
+    })
+
+    test('returns effort as-is for supported levels', () => {
+      expect(resolveEffort('claude-opus-4.8', 'low')).toBe('low')
+      expect(resolveEffort('claude-opus-4.8', 'max')).toBe('max')
+      expect(resolveEffort('claude-opus-4.8', 'xhigh')).toBe('xhigh')
+    })
+
+    test('clamps xhigh to max for models without xhigh support', () => {
+      expect(resolveEffort('claude-sonnet-4.6', 'xhigh')).toBe('max')
+      expect(resolveEffort('claude-opus-4.6', 'xhigh')).toBe('max')
+    })
+  })
+
+  describe('budgetToEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(budgetToEffort(100000, 'claude-haiku-4.5')).toBeUndefined()
+    })
+
+    test('maps budget ranges correctly', () => {
+      expect(budgetToEffort(5000, 'claude-opus-4.8')).toBe('low')
+      expect(budgetToEffort(16384, 'claude-opus-4.8')).toBe('medium')
+      expect(budgetToEffort(24576, 'claude-opus-4.8')).toBe('high')
+      expect(budgetToEffort(32768, 'claude-opus-4.8')).toBe('max')
+      expect(budgetToEffort(80000, 'claude-opus-4.8')).toBe('max')
+    })
+
+    test('maps to max instead of xhigh for non-xhigh models', () => {
+      expect(budgetToEffort(80000, 'claude-sonnet-4.6')).toBe('max')
+    })
+  })
+
+  describe('getEffectiveEffort', () => {
+    test('returns undefined for unsupported models', () => {
+      expect(getEffectiveEffort('claude-haiku-4.5', true, 100000)).toBeUndefined()
+    })
+
+    test('uses explicit config when provided', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 20000, 'max')).toBe('max')
+      expect(getEffectiveEffort('claude-opus-4.8', false, 20000, 'high')).toBe('high')
+    })
+
+    test('returns undefined when not thinking and no config', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', false, 20000)).toBeUndefined()
+    })
+
+    test('uses budget mapping when thinking and auto-mapping enabled', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, true)).toBe('max')
+      expect(getEffectiveEffort('claude-opus-4.8', true, 20000, undefined, true)).toBe('medium')
+    })
+
+    test('falls back to medium when auto-mapping disabled', () => {
+      expect(getEffectiveEffort('claude-opus-4.8', true, 128000, undefined, false)).toBe('medium')
+    })
+  })
+})
diff --git a/src/__tests__/sdk-client.test.ts b/src/__tests__/sdk-client.test.ts
@@ -1,3 +1,4 @@
+import { GenerateAssistantResponseCommand } from '@aws/codewhisperer-streaming-client'
 import { describe, expect, test } from 'bun:test'
 import { clearSdkClientCache, createSdkClient } from '../plugin/sdk-client'
 import type { KiroAuthDetails } from '../plugin/types'
@@ -25,4 +26,48 @@ describe('SDK client', () => {
 
     clearSdkClientCache()
   })
+
+  test('injects effort before content-length is computed', async () => {
+    clearSdkClientCache()
+
+    const client = createSdkClient(auth(), 'us-east-1', 'max')
+    let capturedRequest: any
+
+    client.middlewareStack.add(
+      () => async (args: any) => {
+        capturedRequest = args.request
+        throw new Error('captured-request')
+      },
+      { step: 'finalizeRequest', name: 'captureRequest', priority: 'high' }
+    )
+
+    const command = new GenerateAssistantResponseCommand({
+      conversationState: {
+        chatTriggerType: 'MANUAL',
+        conversationId: 'test-conversation',
+        currentMessage: {
+          userInputMessage: {
+            content: 'hello',
+            modelId: 'claude-opus-4.7',
+            origin: 'AI_EDITOR'
+          }
+        }
+      }
+    })
+
+    await client.send(command).catch((error) => {
+      if (error.message !== 'captured-request') throw error
+    })
+
+    const bodyText =
+      typeof capturedRequest.body === 'string'
+        ? capturedRequest.body
+        : Buffer.from(capturedRequest.body).toString('utf8')
+    const body = JSON.parse(bodyText)
+
+    expect(body.additionalModelRequestFields.output_config.effort).toBe('max')
+    expect(Number(capturedRequest.headers['content-length'])).toBe(Buffer.byteLength(bodyText))
+
+    clearSdkClientCache()
+  })
 })
diff --git a/src/core/request/request-handler.ts b/src/core/request/request-handler.ts
@@ -79,8 +79,13 @@ export class RequestHandler {
   ): Promise<Response> {
     const body = init?.body ? JSON.parse(init.body) : {}
     const model = this.extractModel(url) || body.model || 'claude-sonnet-4-5'
-    const think = model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig
-    const budget = body.providerOptions?.thinkingConfig?.thinkingBudget || 20000
+    const think =
+      model.endsWith('-thinking') || !!body.providerOptions?.thinkingConfig || !!body.thinkingConfig
+    const budget =
+      body.providerOptions?.thinkingConfig?.thinkingBudget ||
+      body.thinkingConfig?.thinkingBudget ||
+      body.thinkingConfig?.budget_tokens ||
+      20000
 
     let retry = 0
     let consecutiveNullAccounts = 0
@@ -132,9 +137,8 @@ export class RequestHandler {
       if (apiTimestamp) {
         this.logSdkRequest(sdkPrep, acc, apiTimestamp)
       }
-
       try {
-        const client = createSdkClient(auth, sdkPrep.region)
+        const client = createSdkClient(auth, sdkPrep.region, sdkPrep.effort)
         const command = new GenerateAssistantResponseCommand({
           conversationState: sdkPrep.conversationState as any,
           profileArn: sdkPrep.profileArn
@@ -219,7 +223,10 @@ export class RequestHandler {
     budget: number,
     showToast?: (message: string, variant: 'info' | 'warning' | 'success' | 'error') => void
   ): SdkPreparedRequest {
-    return transformToSdkRequest(body, model, auth, think, budget, showToast)
+    return transformToSdkRequest(body, model, auth, think, budget, showToast, {
+      effort: this.config.effort,
+      autoEffortMapping: this.config.auto_effort_mapping
+    })
   }
 
   private handleSuccessfulRequest(acc: ManagedAccount): void {

diff --git a/src/plugin/config/schema.ts b/src/plugin/config/schema.ts
@@ -3,6 +3,17 @@ import { z } from 'zod'
 export const AccountSelectionStrategySchema = z.enum(['sticky', 'round-robin', 'lowest-usage'])
 export type AccountSelectionStrategy = z.infer<typeof AccountSelectionStrategySchema>
 
+/**
+ * Kiro effort levels control thinking/reasoning depth.
+ * - low: minimal reasoning
+ * - medium: balanced (default when thinking enabled)
+ * - high: deeper reasoning
+ * - xhigh: extended reasoning (opus-4.7, opus-4.8 only)
+ * - max: maximum reasoning depth (128k thinking tokens on opus-4.7/4.8)
+ */
+export const EffortSchema = z.enum(['low', 'medium', 'high', 'xhigh', 'max'])
+export type Effort = z.infer<typeof EffortSchema>
+
 export const RegionSchema = z.enum([
   'us-east-1',
   'us-east-2',
@@ -70,7 +81,21 @@ export const KiroConfigSchema = z.object({
 
   usage_tracking_enabled: z.boolean().default(true),
   auto_sync_kiro_cli: z.boolean().default(true),
-  enable_log_api_request: z.boolean().default(false)
+  enable_log_api_request: z.boolean().default(false),
+
+  /**
+   * Default effort level for thinking models. Controls reasoning depth.
+   * When set, this overrides the automatic budget-based mapping.
+   * Values: 'low', 'medium', 'high', 'xhigh' (opus-4.7/4.8 only), 'max'
+   */
+  effort: EffortSchema.optional(),
+
+  /**
+   * Enable automatic effort mapping from OpenCode's thinking budget.
+   * When true (default), maps budget ranges to effort levels.
+   * When false, only uses explicit effort config or falls back to 'medium'.
+   */
+  auto_effort_mapping: z.boolean().default(true)
 })
 
 export type KiroConfig = z.infer<typeof KiroConfigSchema>
@@ -88,5 +113,6 @@ export const DEFAULT_CONFIG: KiroConfig = {
   auth_server_port_range: 10,
   usage_tracking_enabled: true,
   auto_sync_kiro_cli: true,
-  enable_log_api_request: false
+  enable_log_api_request: false,
+  auto_effort_mapping: true
 }