FlatbreadLabs · tonyketcham · May 11, 2026 · May 8, 2026 · May 8, 2026 · May 9, 2026
diff --git a/.cursor/skills/proof/SKILL.md b/.cursor/skills/proof/SKILL.md
@@ -29,9 +29,15 @@ You (the parent agent) author the DAG inline using your understanding of the use
 {
   "title": "<short human-readable title for the run>",
   "models": {
-    "HIGH": "gpt-5.3-codex",
+    "HIGH": {
+      "id": "gpt-5.4",
+      "params": [{ "id": "reasoning", "value": "high" }]
+    },
     "MED": "composer-2",
-    "LOW": "auto-low"
+    "LOW": {
+      "id": "gpt-5.4-nano",
+      "params": [{ "id": "reasoning", "value": "low" }]
+    }
   },
   "tasks": [
     {
@@ -49,7 +55,7 @@ Rules:
 - Every `depends_on` entry must reference another task's `id`.
 - No cycles. The runner rejects cyclic DAGs at parse time.
 - `complexity` controls the model the subagent uses (see table below). Pick `HIGH` for novel/complex reasoning, `MED` for typical implementation, `LOW` for mechanical/lookup tasks.
-- Optional top-level `models` can override the default complexity → model map for this DAG.
+- Optional top-level `models` can override the default complexity → model map for this DAG. Values can be plain SDK model id strings or model selection objects of the shape `{ "id": "...", "params": [{ "id": "...", "value": "..." }] }`, with `params` omitted when unused.
 - `subtask_prompt` should read like a standalone request — the runner automatically prepends a short summary of upstream task outputs, so you do not need to repeat them.
 - Do **not** put two tasks that write to the same file in the same rank (siblings within a rank run concurrently and would race).
 
@@ -167,11 +173,24 @@ After the runner exits, briefly summarize what completed/failed and re-link the
 | MED        | `composer-2`      |
 | LOW        | `gpt-5.4-nano`    |
 
-Override any subset inline with top-level DAG `models`, or pass a reusable profile with `--models-file <path>`. Precedence is defaults < DAG `models` < `--models-file`. The Cursor model catalog can vary by account.
+Override any subset inline with top-level DAG `models`, or pass a reusable profile with `--models-file <path>`. Values can be plain SDK model id strings or SDK model selections with `params`. At run time, Proof calls `Cursor.models.list()`, validates ids and param values, and expands partial selections to the closest valid preset variant using the model's default variant for omitted params. Precedence is defaults < DAG `models` < `--models-file`. The Cursor model catalog can vary by account.
+
+To use a cheaper high-capability GPT model, use the base SDK id plus params, not a suffix-style id:
+
+```json
+{
+  "models": {
+    "HIGH": {
+      "id": "gpt-5.4",
+      "params": [{ "id": "reasoning", "value": "high" }]
+    }
+  }
+}
+```
 
 ### Discovering valid model ids
 
-Many Cursor CLI catalog models encode reasoning effort and Max Mode as **slug suffixes** (e.g. `claude-opus-4-7-thinking-max`, `gpt-5.5-extra-high`, `gpt-5.3-codex-xhigh`), but the Cursor SDK may accept only base slugs. Do not compose SDK model ids from CLI suffixes by hand. For SDK-bound code, prefer `Cursor.models.list()` or the SDK's `ConfigurationError` catalog over `cursor-agent --list-models`.
+Many Cursor CLI catalog models encode reasoning effort and Max Mode as **slug suffixes** (e.g. `claude-opus-4-7-thinking-max`, `gpt-5.5-extra-high`, `gpt-5.3-codex-xhigh`), but the Cursor SDK may accept only base slugs plus `params`. Do not compose SDK model ids from CLI suffixes by hand: use `{ "id": "gpt-5.4", "params": [{ "id": "reasoning", "value": "high" }] }`, not `gpt-5.4-high`. For SDK-bound code, prefer `Cursor.models.list()` or the SDK's `ConfigurationError` catalog over `cursor-agent --list-models`.
 
 Ways to enumerate model ids:
 

diff --git a/.cursor/skills/proof/examples/dag-flatbread-flow-pmf-audit.json b/.cursor/skills/proof/examples/dag-flatbread-flow-pmf-audit.json
@@ -2,9 +2,9 @@
   "title": "Flatbread Flow PMF Audit (no sub-sub-agents)",
   "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
   "models": {
-    "HIGH": "claude-opus-4-7",
-    "MED": "gpt-5.5",
-    "LOW": "gpt-5.4-mini"
+    "HIGH": { "id": "claude-opus-4-7" },
+    "MED": { "id": "gpt-5.5" },
+    "LOW": { "id": "gpt-5.4-mini" }
   },
   "tasks": [
     {

diff --git a/.cursor/skills/proof/examples/flatbread/dag-codegen-change.json b/.cursor/skills/proof/examples/flatbread/dag-codegen-change.json
@@ -2,9 +2,9 @@
   "title": "Flatbread codegen-only change (no sub-sub-agents)",
   "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
   "models": {
-    "HIGH": "claude-opus-4-7",
-    "MED": "gpt-5.5",
-    "LOW": "gpt-5.4-mini"
+    "HIGH": { "id": "claude-opus-4-7" },
+    "MED": { "id": "gpt-5.5" },
+    "LOW": { "id": "gpt-5.4-mini" }
   },
   "tasks": [
     {

diff --git a/.cursor/skills/proof/examples/flatbread/dag-docs-sync.json b/.cursor/skills/proof/examples/flatbread/dag-docs-sync.json
@@ -2,9 +2,9 @@
   "title": "Flatbread docs / README sync (no sub-sub-agents)",
   "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
   "models": {
-    "HIGH": "claude-opus-4-7",
-    "MED": "gpt-5.5",
-    "LOW": "gpt-5.4-mini"
+    "HIGH": { "id": "claude-opus-4-7" },
+    "MED": { "id": "gpt-5.5" },
+    "LOW": { "id": "gpt-5.4-mini" }
   },
   "tasks": [
     {

diff --git a/.cursor/skills/proof/examples/flatbread/dag-schema-migration.json b/.cursor/skills/proof/examples/flatbread/dag-schema-migration.json
@@ -2,9 +2,9 @@
   "title": "Flatbread schema-breaking migration (no sub-sub-agents; pause at human checkpoint after contract-synth)",
   "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
   "models": {
-    "HIGH": "claude-opus-4-7",
-    "MED": "gpt-5.5",
-    "LOW": "gpt-5.4-mini"
+    "HIGH": { "id": "claude-opus-4-7" },
+    "MED": { "id": "gpt-5.5" },
+    "LOW": { "id": "gpt-5.4-mini" }
   },
   "tasks": [
     {

diff --git a/packages/proof/README.md b/packages/proof/README.md
@@ -68,6 +68,33 @@ Every DAG has a `title` and a `tasks` array. Each task needs:
 
 Proof computes ranks with Kahn topological sort and runs sibling tasks in the same rank concurrently. Avoid placing two sibling tasks in the same rank if they write the same files.
 
+Optional top-level `models` can override the default complexity map with plain
+SDK model id strings or SDK model selections:
+
+```json
+{
+  "models": {
+    "HIGH": {
+      "id": "gpt-5.4",
+      "params": [{ "id": "reasoning", "value": "high" }]
+    },
+    "MED": "composer-2",
+    "LOW": {
+      "id": "gpt-5.4-nano",
+      "params": [{ "id": "reasoning", "value": "low" }]
+    }
+  }
+}
+```
+
+Use the object shape when you need `params`; use a string when the model id is
+enough. For example, use `{ "id": "gpt-5.4", "params": [{ "id": "reasoning", "value": "high" }] }`, not a suffix-style id like `gpt-5.4-high`.
+
+When a DAG runs, Proof calls `Cursor.models.list()`, validates model ids and
+param values, and expands partial selections to the closest valid SDK preset
+variant using that model's default variant for omitted params. `--init-only`
+does not call the SDK, so it can still render a canvas without `CURSOR_API_KEY`.
+
 Optional task kinds add control gates:
 
 - `kind: "oracle"` runs a shell command and records pass/fail evidence.
@@ -118,8 +145,9 @@ Proof also exposes helpers for tooling:
 ```ts
 import {
   computeRanks,
-  createModelResolver,
+  createModelSelectionResolver,
   parseDAG,
+  resolveModelSelectionFromCatalog,
   runDryCheck,
   type DAG,
   type TaskState,

diff --git a/packages/proof/src/canvas_writer.ts b/packages/proof/src/canvas_writer.ts
@@ -10,7 +10,15 @@
 
 import { writeFile, mkdir } from 'node:fs/promises';
 import { dirname } from 'node:path';
-import type { Complexity, DAG, TaskKind } from './dag.js';
+import {
+  formatModelSelection,
+  normalizeModelSelection,
+  type Complexity,
+  type DAG,
+  type ModelSelection,
+  type ModelSpec,
+  type TaskKind,
+} from './dag.js';
 
 export type TaskStatus =
   | 'PENDING'
@@ -27,6 +35,7 @@ export interface TaskState {
   subtask_prompt: string;
   status: TaskStatus;
   model: string;
+  modelSelection?: ModelSelection;
   /** `'task'` (default), `'pause'`, or `'oracle'`. Undefined is normalized to `'task'`. */
   kind?: TaskKind;
   /**
@@ -91,25 +100,34 @@ export interface RunState {
 
 export function initialRunState(
   dag: DAG,
-  modelFor: (c: Complexity) => string
+  modelFor: (c: Complexity) => ModelSpec
 ): RunState {
   return {
     title: dag.title,
     startedAt: Date.now(),
-    tasks: dag.tasks.map((t) => ({
-      id: t.id,
-      depends_on: t.depends_on,
-      complexity: t.complexity,
-      subtask_prompt: t.subtask_prompt,
-      status: 'PENDING',
-      model: modelFor(t.complexity),
-      // Normalize undefined kind → 'task' so downstream consumers (canvas
-      // template, runner dispatcher) never have to ?? again.
-      kind: t.kind ?? 'task',
-      // Surface oracle-only fields so the canvas can render the gate's
-      // command / expectation without reading the streamed result body.
-      ...(t.kind === 'oracle' ? { command: t.command, expect: t.expect } : {}),
-    })),
+    tasks: dag.tasks.map((t) => {
+      const modelSelection = normalizeModelSelection(
+        modelFor(t.complexity),
+        `model for task ${t.id}`
+      );
+      return {
+        id: t.id,
+        depends_on: t.depends_on,
+        complexity: t.complexity,
+        subtask_prompt: t.subtask_prompt,
+        status: 'PENDING',
+        model: formatModelSelection(modelSelection),
+        modelSelection,
+        // Normalize undefined kind → 'task' so downstream consumers (canvas
+        // template, runner dispatcher) never have to ?? again.
+        kind: t.kind ?? 'task',
+        // Surface oracle-only fields so the canvas can render the gate's
+        // command / expectation without reading the streamed result body.
+        ...(t.kind === 'oracle'
+          ? { command: t.command, expect: t.expect }
+          : {}),
+      };
+    }),
   };
 }
 
@@ -215,13 +233,25 @@ type TaskStatus =
 type Complexity = 'HIGH' | 'MED' | 'LOW';
 type TaskKind = 'task' | 'pause' | 'oracle';
 
+// Keep in sync with ModelParameterValue / ModelSelection in dag.ts.
+interface ModelParameterValue {
+  id: string;
+  value: string;
+}
+
+interface ModelSelection {
+  id: string;
+  params?: ModelParameterValue[];
+}
+
 interface TaskState {
   id: string;
   depends_on: string[];
   complexity: Complexity;
   subtask_prompt: string;
   status: TaskStatus;
   model: string;
+  modelSelection?: ModelSelection;
   kind?: TaskKind;
   command?: string;
   expect?: string;
@@ -684,6 +714,12 @@ function TaskList({
                     : ''}
                   {(t.iteration ?? 0) > 0 ? ' · iteration ' + t.iteration : ''}
                 </Text>
+                {t.modelSelection?.params && t.modelSelection.params.length > 0 ? (
+                  <Text size="small" tone="tertiary" style={{ paddingLeft: 12 }}>
+                    {'Params: ' +
+                      t.modelSelection.params.map((p) => p.id + '=' + p.value).join(', ')}
+                  </Text>
+                ) : null}
                 {effectiveKind(t) === 'pause' && t.checkpointPath ? (
                   <Stack gap={4}>
                     <Text size="small" weight="semibold">