+
+
-
+
);
}`;
diff --git a/.cursor/skills/dag-task-runner/scripts/oracle_task.ts b/.cursor/skills/dag-task-runner/scripts/oracle_task.ts
index e2672784..f53d9a63 100644
--- a/.cursor/skills/dag-task-runner/scripts/oracle_task.ts
+++ b/.cursor/skills/dag-task-runner/scripts/oracle_task.ts
@@ -98,7 +98,7 @@ export async function runOracleTask(
deps.writer.schedule(deps.cloneState(deps.state));
console.log(
- `[dag-runner] oracle ${task.id} → exec \`${command}\` (expect /${expectSrc}/)`
+ `[proof] oracle ${task.id} → exec \`${command}\` (expect /${expectSrc}/)`
);
const outcome = await execShell(command, options);
@@ -142,7 +142,7 @@ export async function runOracleTask(
deps.writer.schedule(deps.cloneState(deps.state));
console.log(
- `[dag-runner] oracle ${task.id} → ${pass ? 'PASS' : 'FAIL'} (exit ${
+ `[proof] oracle ${task.id} → ${pass ? 'PASS' : 'FAIL'} (exit ${
outcome.exitCode ?? 'null'
}, ${ts.durationMs}ms${outcome.timedOut ? ', TIMED OUT' : ''})`
);
diff --git a/.cursor/skills/dag-task-runner/scripts/pause_task.ts b/.cursor/skills/dag-task-runner/scripts/pause_task.ts
index eb213d2e..66bc53a7 100644
--- a/.cursor/skills/dag-task-runner/scripts/pause_task.ts
+++ b/.cursor/skills/dag-task-runner/scripts/pause_task.ts
@@ -60,7 +60,7 @@ export async function runPauseTask(
deps.writer.schedule(deps.cloneState(deps.state));
console.log(
- `[dag-runner] pause ${task.id} → AWAITING_APPROVAL; delete ${sentinelPath} to release the gate`
+ `[proof] pause ${task.id} → AWAITING_APPROVAL; delete ${sentinelPath} to release the gate`
);
const deadline = Date.now() + options.taskTimeoutMs;
@@ -92,7 +92,7 @@ export async function runPauseTask(
ts.resultText = renderApprovedResultText(sentinelPath, ts.finishedAt);
deps.writer.schedule(deps.cloneState(deps.state));
console.log(
- `[dag-runner] pause ${task.id} → FINISHED (sentinel removed, ${ts.durationMs}ms gated)`
+ `[proof] pause ${task.id} → FINISHED (sentinel removed, ${ts.durationMs}ms gated)`
);
return;
}
diff --git a/.cursor/skills/dag-task-runner/scripts/run_dag.ts b/.cursor/skills/dag-task-runner/scripts/run_dag.ts
index 4b1c85d5..b4c4c59b 100644
--- a/.cursor/skills/dag-task-runner/scripts/run_dag.ts
+++ b/.cursor/skills/dag-task-runner/scripts/run_dag.ts
@@ -48,7 +48,7 @@
* `## Stderr (tail)` headings round-trip
* through the same parser as regular tasks.
* --checkpoint-dir Directory for `kind: 'pause'` sentinel files
- * (default `.dag-runner/` under --cwd).
+ * (default `.proof/` under --cwd).
* --converge-on After the main DAG run, parse the named task's
* `resultText` for `## Blockers` /
* `## High-severity findings`. If non-empty,
@@ -59,7 +59,7 @@
* --max-iterations is reached.
* --max-iterations Convergence iteration ceiling (default: 3).
* --state-path Persist resumable runner state after each rank.
- * Defaults to `.dag-runner/run-state.json` when
+ * Defaults to `.proof/run-state.json` when
* --restart-on-runner-change is enabled.
* --resume-state Resume from a previously persisted state file.
* --restart-on-runner-change
@@ -125,6 +125,17 @@ import {
} from './self_hosting.js';
const SCRIPTS_DIR = dirname(fileURLToPath(import.meta.url));
+/**
+ * Source-of-truth directory for `--restart-on-runner-change` snapshotting.
+ *
+ * When the runner ships compiled (typical install-time use), `SCRIPTS_DIR`
+ * resolves to `/dist`. The TS source lives in `/src`, so the
+ * change detector points there. When running directly from source via
+ * `tsx src/run_dag.ts`, both directories coincide.
+ */
+const RUNNER_SOURCE_DIR = SCRIPTS_DIR.endsWith('/src')
+ ? SCRIPTS_DIR
+ : resolve(SCRIPTS_DIR, '..', 'src');
interface CliArgs {
dag: string;
@@ -148,7 +159,7 @@ interface CliArgs {
streamIdleTimeoutMs: number;
initOnly: boolean;
dryCheckCmds: boolean;
- /** Absolute dir for `kind: 'pause'` sentinel files. Defaults to `/.dag-runner`. */
+ /** Absolute dir for `kind: 'pause'` sentinel files. Defaults to `/.proof`. */
checkpointDir: string;
/** When set, the runner re-executes ancestors after the named task to converge on a clean review. */
convergeOn?: string;
@@ -239,7 +250,7 @@ function parseArgs(argv: string[]): CliArgs {
const checkpointRaw = args['checkpoint-dir'];
const checkpointDir = isAbsolute(checkpointRaw ?? '')
? (checkpointRaw as string)
- : resolve(cwd, checkpointRaw ?? '.dag-runner');
+ : resolve(cwd, checkpointRaw ?? '.proof');
const convergeRaw = args['converge-on'];
const convergeOn =
convergeRaw !== undefined && convergeRaw !== '' && convergeRaw !== 'true'
@@ -258,7 +269,7 @@ function parseArgs(argv: string[]): CliArgs {
statePathRaw !== undefined && statePathRaw !== '' && statePathRaw !== 'true'
? statePathRaw
: restartOnRunnerChange
- ? resumeState ?? '.dag-runner/run-state.json'
+ ? resumeState ?? '.proof/run-state.json'
: undefined;
return {
@@ -359,7 +370,7 @@ function ensureCursorRipgrepPathEnv(): void {
const bundlePkg = cursorSdkRipgrepBundlePackage();
if (!bundlePkg) {
console.warn(
- '[dag-runner] No bundled ripgrep target for platform; set CURSOR_RIPGREP_PATH to an absolute `rg` path if local agents fail.'
+ '[proof] No bundled ripgrep target for platform; set CURSOR_RIPGREP_PATH to an absolute `rg` path if local agents fail.'
);
return;
}
@@ -377,7 +388,7 @@ function ensureCursorRipgrepPathEnv(): void {
// Optional dependency missing for this OS/arch — user can set CURSOR_RIPGREP_PATH.
}
console.warn(
- `[dag-runner] Could not resolve bundled ripgrep from ${bundlePkg}. Install optional @cursor deps or export CURSOR_RIPGREP_PATH=/absolute/path/to/rg`
+ `[proof] Could not resolve bundled ripgrep from ${bundlePkg}. Install optional @cursor deps or export CURSOR_RIPGREP_PATH=/absolute/path/to/rg`
);
}
@@ -510,7 +521,7 @@ async function main(): Promise {
: undefined;
if (fullOutputAbsoluteDir && !args.initOnly) {
await mkdir(fullOutputAbsoluteDir, { recursive: true });
- console.log(`[dag-runner] full-output-dir → ${fullOutputAbsoluteDir}`);
+ console.log(`[proof] full-output-dir → ${fullOutputAbsoluteDir}`);
}
const findingsAbsoluteDir =
@@ -519,7 +530,7 @@ async function main(): Promise {
: undefined;
if (findingsAbsoluteDir && !args.initOnly) {
await mkdir(findingsAbsoluteDir, { recursive: true });
- console.log(`[dag-runner] findings-dir → ${findingsAbsoluteDir}`);
+ console.log(`[proof] findings-dir → ${findingsAbsoluteDir}`);
}
const statePathAbsolute =
@@ -538,7 +549,7 @@ async function main(): Promise {
state.tasks.map((t) => [t.id, t])
);
const runnerSnapshot = args.restartOnRunnerChange
- ? await snapshotRunnerRuntimeFiles(SCRIPTS_DIR)
+ ? await snapshotRunnerRuntimeFiles(RUNNER_SOURCE_DIR)
: undefined;
const writer = new CanvasWriter(args.canvasPath, args.debounceMs);
@@ -546,14 +557,14 @@ async function main(): Promise {
let interrupting = false;
console.log(
- `[dag-runner] DAG "${dag.title}" — ${dag.tasks.length} tasks across ${ranks.length} rank(s)`
+ `[proof] DAG "${dag.title}" — ${dag.tasks.length} tasks across ${ranks.length} rank(s)`
);
- console.log(`[dag-runner] canvas → ${args.canvasPath}`);
+ console.log(`[proof] canvas → ${args.canvasPath}`);
if (resumeStateAbsolute) {
- console.log(`[dag-runner] resumed state ← ${resumeStateAbsolute}`);
+ console.log(`[proof] resumed state ← ${resumeStateAbsolute}`);
}
if (statePathAbsolute) {
- console.log(`[dag-runner] state-path → ${statePathAbsolute}`);
+ console.log(`[proof] state-path → ${statePathAbsolute}`);
}
// Always write the initial all-PENDING canvas first. This is what the parent
@@ -563,7 +574,7 @@ async function main(): Promise {
await persistState('initial state');
if (args.initOnly) {
- console.log('[dag-runner] --init-only: initial canvas written, exiting');
+ console.log('[proof] --init-only: initial canvas written, exiting');
return;
}
@@ -574,18 +585,16 @@ async function main(): Promise {
// ERROR, finalize the canvas, and exit cleanly.
const onUnhandledRejection = (reason: unknown) => {
const msg = reason instanceof Error ? reason.message : String(reason);
- console.error(`[dag-runner] (suppressed unhandled SDK rejection) ${msg}`);
+ console.error(`[proof] (suppressed unhandled SDK rejection) ${msg}`);
};
const onUncaughtException = (err: Error): void => {
const msg = err?.stack ?? err?.message ?? String(err);
- console.error(`[dag-runner] uncaught exception: ${msg}`);
+ console.error(`[proof] uncaught exception: ${msg}`);
void failAndExit(1, 'FAILED', `Runner crashed: ${err.message}`);
};
const onSignal = (signal: NodeJS.Signals): void => {
const exitCode = signal === 'SIGINT' ? 130 : 143;
- console.error(
- `[dag-runner] received ${signal}; finalizing canvas before exit`
- );
+ console.error(`[proof] received ${signal}; finalizing canvas before exit`);
void failAndExit(
exitCode,
'INTERRUPTED',
@@ -612,9 +621,9 @@ async function main(): Promise {
await writer.flush();
await persistState(`runner source changed after ${boundary}`);
console.log(
- `[dag-runner] runner source changed after ${boundary}; persisted state and exiting ${EXIT_RUNNER_RESTART}`
+ `[proof] runner source changed after ${boundary}; persisted state and exiting ${EXIT_RUNNER_RESTART}`
);
- console.log(`[dag-runner] changed runner files: ${changed.join(', ')}`);
+ console.log(`[proof] changed runner files: ${changed.join(', ')}`);
process.exit(EXIT_RUNNER_RESTART);
}
@@ -633,7 +642,7 @@ async function main(): Promise {
const flushMsg =
flushErr instanceof Error ? flushErr.message : String(flushErr);
console.error(
- `[dag-runner] failed to flush canvas during shutdown: ${flushMsg}`
+ `[proof] failed to flush canvas during shutdown: ${flushMsg}`
);
} finally {
finalized = true;
@@ -733,7 +742,7 @@ async function main(): Promise {
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(
- `[dag-runner] findings sidecar write failed for ${task.id}: ${msg}`
+ `[proof] findings sidecar write failed for ${task.id}: ${msg}`
);
}
}
@@ -749,14 +758,14 @@ async function main(): Promise {
});
if (runnableRank.length === 0) {
console.log(
- `[dag-runner] rank ${rankIdx + 1}/${ranks.length}: ${rank
+ `[proof] rank ${rankIdx + 1}/${ranks.length}: ${rank
.map((t) => t.id)
.join(', ')} (already complete; skipping)`
);
continue;
}
console.log(
- `[dag-runner] rank ${rankIdx + 1}/${ranks.length}: ${runnableRank
+ `[proof] rank ${rankIdx + 1}/${ranks.length}: ${runnableRank
.map((t) => t.id)
.join(', ')}`
);
@@ -836,19 +845,17 @@ async function main(): Promise {
const succeeded = state.tasks.length - errors.length - budgetHits.length;
console.log(
- `[dag-runner] done — ${succeeded}/${
+ `[proof] done — ${succeeded}/${
state.tasks.length
} succeeded in ${formatMs(state.finishedAt - state.startedAt)}`
);
if (errors.length > 0) {
- console.log(`[dag-runner] errors: ${errors.map((e) => e.id).join(', ')}`);
+ console.log(`[proof] errors: ${errors.map((e) => e.id).join(', ')}`);
process.exitCode = 1;
}
if (budgetHits.length > 0) {
console.log(
- `[dag-runner] budget-exceeded: ${budgetHits
- .map((b) => b.id)
- .join(', ')}`
+ `[proof] budget-exceeded: ${budgetHits.map((b) => b.id).join(', ')}`
);
// Distinct from the generic ERROR exit (1) so wrapper scripts can
// branch on budget. We only upgrade `0`; a prior ERROR-driven `1`
@@ -859,7 +866,7 @@ async function main(): Promise {
}
if (fullOutputAbsoluteDir) {
console.log(
- `[dag-runner] full transcripts + index (_index.md) → ${fullOutputAbsoluteDir}`
+ `[proof] full transcripts + index (_index.md) → ${fullOutputAbsoluteDir}`
);
}
} catch (err) {
@@ -874,7 +881,7 @@ async function main(): Promise {
writer.schedule(structuredCloneState(state));
await writer.flush();
finalized = true;
- console.error(`[dag-runner] ${err.message}`);
+ console.error(`[proof] ${err.message}`);
process.exit(EXIT_BUDGET_EXCEEDED);
}
const msg = err instanceof Error ? err.message : String(err);
@@ -1243,9 +1250,7 @@ async function bestEffortCancel(
} catch (cancelErr) {
const msg =
cancelErr instanceof Error ? cancelErr.message : String(cancelErr);
- console.error(
- `[dag-runner] failed to cancel timed-out task ${taskId}: ${msg}`
- );
+ console.error(`[proof] failed to cancel timed-out task ${taskId}: ${msg}`);
}
}
@@ -1388,7 +1393,7 @@ async function runConvergenceLoop(
if (!convergeTs) {
// Defensive — main() already validates this, but the loop must not crash.
console.error(
- `[dag-runner] --converge-on "${convergeOn}" not found in state; skipping convergence loop`
+ `[proof] --converge-on "${convergeOn}" not found in state; skipping convergence loop`
);
return;
}
@@ -1419,7 +1424,7 @@ async function runConvergenceLoop(
);
if (!findings.hasIssues) {
console.log(
- `[dag-runner] converge-on ${convergeOn}: clean — no Blockers / High-severity findings after ${
+ `[proof] converge-on ${convergeOn}: clean — no Blockers / High-severity findings after ${
iter - 1
} re-iteration(s)`
);
@@ -1448,13 +1453,13 @@ async function runConvergenceLoop(
convergeTs.errorMessage = `Convergence iteration ${iter} would exceed budget.maxIterations=${budget.maxIterations}`;
writer.schedule(structuredCloneState(state));
console.log(
- `[dag-runner] converge-on ${convergeOn}: BUDGET-EXCEEDED — iteration ${iter} would exceed budget.maxIterations=${budget.maxIterations}`
+ `[proof] converge-on ${convergeOn}: BUDGET-EXCEEDED — iteration ${iter} would exceed budget.maxIterations=${budget.maxIterations}`
);
return;
}
console.log(
- `[dag-runner] converge iteration ${iter}/${maxIterations}: ${findings.blockerLines.length} blocker(s), ${findings.highSeverityLines.length} high-severity finding(s) — re-running ${reExecIds.size} task(s)`
+ `[proof] converge iteration ${iter}/${maxIterations}: ${findings.blockerLines.length} blocker(s), ${findings.highSeverityLines.length} high-severity finding(s) — re-running ${reExecIds.size} task(s)`
);
const convergenceContext = buildConvergenceContext(
@@ -1528,16 +1533,16 @@ async function runConvergenceLoop(
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(
- `[dag-runner] findings sidecar re-write failed for ${convergeOn} after BUDGET-EXCEEDED: ${msg}`
+ `[proof] findings sidecar re-write failed for ${convergeOn} after BUDGET-EXCEEDED: ${msg}`
);
}
}
console.log(
- `[dag-runner] converge-on ${convergeOn}: BUDGET-EXCEEDED — exhausted --max-iterations=${maxIterations} with ${finalFindings.blockerLines.length} blocker(s), ${finalFindings.highSeverityLines.length} high-severity finding(s)`
+ `[proof] converge-on ${convergeOn}: BUDGET-EXCEEDED — exhausted --max-iterations=${maxIterations} with ${finalFindings.blockerLines.length} blocker(s), ${finalFindings.highSeverityLines.length} high-severity finding(s)`
);
} else {
console.log(
- `[dag-runner] converge-on ${convergeOn}: clean after ${maxIterations} re-iteration(s)`
+ `[proof] converge-on ${convergeOn}: clean after ${maxIterations} re-iteration(s)`
);
}
}
@@ -1557,9 +1562,7 @@ function skipTask(
ts.durationMs = 0;
ts.errorMessage = `Skipped: upstream task(s) ${failedDeps.join(', ')} failed`;
console.log(
- `[dag-runner] skipping ${task.id} — upstream ${failedDeps.join(
- ', '
- )} failed`
+ `[proof] skipping ${task.id} — upstream ${failedDeps.join(', ')} failed`
);
writer.schedule(structuredCloneState(state));
if (!fullOutputAbsoluteDir) return Promise.resolve();
@@ -1749,9 +1752,7 @@ function structuredCloneState(state: RunState): RunState {
main().catch((err) => {
console.error(
- `[dag-runner] fatal: ${
- err instanceof Error ? err.stack ?? err.message : err
- }`
+ `[proof] fatal: ${err instanceof Error ? err.stack ?? err.message : err}`
);
process.exit(1);
});
diff --git a/.cursor/skills/dag-task-runner/scripts/run_dag_supervisor.ts b/.cursor/skills/dag-task-runner/scripts/run_dag_supervisor.ts
index 5313b72e..851523a3 100644
--- a/.cursor/skills/dag-task-runner/scripts/run_dag_supervisor.ts
+++ b/.cursor/skills/dag-task-runner/scripts/run_dag_supervisor.ts
@@ -6,9 +6,18 @@
* exits with EXIT_RUNNER_RESTART (75) after persisting state whenever runner
* runtime files change. This supervisor relaunches the runner with
* `--resume-state` so the next process executes the newly edited source.
+ *
+ * The supervisor automatically picks the right launcher based on its own
+ * file extension:
+ *
+ * - `.js` (compiled / packaged) — spawn `process.execPath` against the
+ * sibling `run_dag.js` so consumers do not need `tsx` on PATH.
+ * - `.ts` (dev / `tsx src/run_dag_supervisor.ts`) — spawn the locally
+ * installed `tsx` binary against the sibling `run_dag.ts`.
*/
import { spawn } from 'node:child_process';
+import { existsSync } from 'node:fs';
import { mkdir } from 'node:fs/promises';
import { dirname, isAbsolute, join, resolve } from 'node:path';
import process from 'node:process';
@@ -16,10 +25,28 @@ import { fileURLToPath } from 'node:url';
import { EXIT_RUNNER_RESTART } from './self_hosting.js';
-const SCRIPTS_DIR = dirname(fileURLToPath(import.meta.url));
-const TSX_BIN = join(SCRIPTS_DIR, 'node_modules', '.bin', 'tsx');
-const RUNNER = join(SCRIPTS_DIR, 'run_dag.ts');
-const DEFAULT_STATE_PATH = '.dag-runner/run-state.json';
+const SUPERVISOR_PATH = fileURLToPath(import.meta.url);
+const SCRIPTS_DIR = dirname(SUPERVISOR_PATH);
+const IS_TS_SOURCE = SUPERVISOR_PATH.endsWith('.ts');
+const RUNNER = join(SCRIPTS_DIR, IS_TS_SOURCE ? 'run_dag.ts' : 'run_dag.js');
+
+/**
+ * Resolve `tsx` lazily — only required when running the supervisor directly
+ * from `src/*.ts` (dev mode). The compiled `.js` build path uses
+ * `process.execPath` instead and does not need `tsx` on disk.
+ */
+function resolveTsxBin(): string {
+ const candidates = [
+ join(SCRIPTS_DIR, '..', 'node_modules', '.bin', 'tsx'),
+ join(SCRIPTS_DIR, '..', '..', '..', 'node_modules', '.bin', 'tsx'),
+ ];
+ for (const candidate of candidates) {
+ if (existsSync(candidate)) return candidate;
+ }
+ return 'tsx';
+}
+
+const DEFAULT_STATE_PATH = '.proof/run-state.json';
const DEFAULT_MAX_RESTARTS = 20;
interface SupervisorArgs {
@@ -97,7 +124,8 @@ function resolveAgainstCwd(path: string, cwd: string): string {
async function runOnce(argv: readonly string[]): Promise {
return new Promise((resolveCode) => {
- const child = spawn(TSX_BIN, [RUNNER, ...argv], {
+ const command = IS_TS_SOURCE ? resolveTsxBin() : process.execPath;
+ const child = spawn(command, [RUNNER, ...argv], {
cwd: process.cwd(),
env: process.env,
stdio: 'inherit',
@@ -111,7 +139,7 @@ async function runOnce(argv: readonly string[]): Promise {
});
child.on('error', (err) => {
console.error(
- `[dag-runner-supervisor] failed to launch runner: ${err.message}`
+ `[proof-supervisor] failed to launch runner: ${err.message}`
);
resolveCode(1);
});
@@ -131,7 +159,7 @@ async function main(): Promise {
if (restart > 0) {
argv = setFlag(argv, '--resume-state', absoluteStatePath);
console.log(
- `[dag-runner-supervisor] restart ${restart}/${parsed.maxRestarts} from ${absoluteStatePath}`
+ `[proof-supervisor] restart ${restart}/${parsed.maxRestarts} from ${absoluteStatePath}`
);
}
@@ -142,14 +170,14 @@ async function main(): Promise {
}
console.error(
- `[dag-runner-supervisor] exceeded --max-runner-restarts=${parsed.maxRestarts}`
+ `[proof-supervisor] exceeded --max-runner-restarts=${parsed.maxRestarts}`
);
process.exit(1);
}
main().catch((err) => {
console.error(
- `[dag-runner-supervisor] fatal: ${
+ `[proof-supervisor] fatal: ${
err instanceof Error ? err.stack ?? err.message : err
}`
);
diff --git a/.cursor/skills/dag-task-runner/scripts/self_hosting.ts b/.cursor/skills/dag-task-runner/scripts/self_hosting.ts
index d790863f..c5c5f0b2 100644
--- a/.cursor/skills/dag-task-runner/scripts/self_hosting.ts
+++ b/.cursor/skills/dag-task-runner/scripts/self_hosting.ts
@@ -70,8 +70,13 @@ export async function snapshotRunnerRuntimeFiles(
const snapshot: RunnerFileSnapshot = new Map();
for (const rel of RUNNER_RUNTIME_FILES) {
const path = join(scriptsDir, rel);
- const s = await stat(path);
- snapshot.set(path, { path, size: s.size, mtimeMs: s.mtimeMs });
+ try {
+ const s = await stat(path);
+ snapshot.set(path, { path, size: s.size, mtimeMs: s.mtimeMs });
+ } catch {
+ // Missing source file (e.g. installed package with `dist`-only layout).
+ // Skip silently — the change detector simply will not flag this file.
+ }
}
return snapshot;
}
diff --git a/.cursor/skills/proof/SKILL.md b/.cursor/skills/proof/SKILL.md
new file mode 100644
index 00000000..36fb1962
--- /dev/null
+++ b/.cursor/skills/proof/SKILL.md
@@ -0,0 +1,238 @@
+---
+name: proof
+description: Decompose a user's task into a DAG of subtasks and execute them with Cursor SDK local subagents in topological order, rendering live streaming status to a canvas. Each task has a complexity (HIGH/MED/LOW) that maps to a model. Use when the user asks to fan out work, decompose a task into a DAG, run subagents in parallel, or break a large task into a dependency graph.
+---
+
+# Proof
+
+Decomposes a user-described task into a JSON DAG, then runs each node as a Cursor SDK local subagent (with parents' outputs stitched into the child's prompt). Live DAG state — including each running subagent's streaming output — is rendered into a `.canvas.tsx` that the runner rewrites on every status transition; the IDE hot-recompiles so the user sees subagents move through `PENDING -> RUNNING -> FINISHED/ERROR` in real time.
+
+The runtime ships as the workspace package `@flatbread/proof` (`packages/proof`). It exposes two CLIs — `proof` (runner) and `proof-supervisor` (self-hosting wrapper) — plus a public library API for tooling that wants to author or inspect DAGs programmatically.
+
+## When to use
+
+Trigger when the user says any of:
+
+- "decompose this task", "break this into a DAG", "fan out subagents"
+- "run this as a graph of subtasks"
+- a multi-step request where some steps clearly depend on others and others can run in parallel
+
+Skip when the task is a single-shot edit, a quick question, or already linear enough that one agent turn would handle it.
+
+## Workflow
+
+### Step 1 — Generate a DAG JSON
+
+You (the parent agent) author the DAG inline using your understanding of the user's task. Schema:
+
+```json
+{
+ "title": "",
+ "models": {
+ "HIGH": "gpt-5.3-codex",
+ "MED": "composer-2",
+ "LOW": "auto-low"
+ },
+ "tasks": [
+ {
+ "id": "",
+ "depends_on": ["", "..."],
+ "complexity": "HIGH | MED | LOW",
+ "subtask_prompt": ""
+ }
+ ]
+}
+```
+
+Rules:
+
+- Every `depends_on` entry must reference another task's `id`.
+- No cycles. The runner rejects cyclic DAGs at parse time.
+- `complexity` controls the model the subagent uses (see table below). Pick `HIGH` for novel/complex reasoning, `MED` for typical implementation, `LOW` for mechanical/lookup tasks.
+- Optional top-level `models` can override the default complexity → model map for this DAG.
+- `subtask_prompt` should read like a standalone request — the runner automatically prepends a short summary of upstream task outputs, so you do not need to repeat them.
+- Do **not** put two tasks that write to the same file in the same rank (siblings within a rank run concurrently and would race).
+
+#### Maximize parallelism — this is the whole point of the runner
+
+The runner executes tasks within a rank **concurrently** via `Promise.all`. A linear `A → B → C → D` DAG wastes that capability. Before finalizing the DAG, actively decompose the problem to surface independent work:
+
+1. **Default to no dependencies.** Add a `depends_on` entry **only** when the child task literally cannot start without the parent's output. "Logically follows" is not a dependency.
+2. **Split read-only research and discovery into a wide first rank.** Codebase grepping, doc reading, dependency scans, schema lookups, test inventory — these almost always share rank 1 with no edges between them.
+3. **Fan out post-implementation work.** Tests, docs, changelog entries, type updates, lint fixes typically all depend on the same implementation task and on nothing else — put them in one rank, not a chain.
+4. **Use diamonds, not lines.** If two tasks both feed into a third, model that explicitly: rank 1 has the two parents, rank 2 is the merge.
+5. **Same-rank file-write safety.** The one hard constraint: don't put two tasks in the same rank if they would write the same file. Either serialize them with a `depends_on`, or merge them into one task.
+
+Quality bar: when you sketch the rank structure (rank 1 → rank 2 → …), at least one rank should contain more than one task in any non-trivial problem. If your DAG is a single chain of 1-task ranks, you almost certainly missed parallelism — go back and look again.
+
+The example shipped with the runner (`examples/example_dag.json`) demonstrates the pattern: rank 1 fans out to two read-only research tasks, rank 2 merges them into a design, rank 3 implements, and rank 4 fans out again to tests + docs.
+
+Write the JSON to a temp file **and immediately generate the initial canvas** so the user can open it while subagents spin up. Run all of the following in a single shell block:
+
+```bash
+# 0. Pick a canvas path
+CANVAS_PATH="$HOME/.cursor/projects//canvases/dag-.canvas.tsx"
+
+# 1. Write the DAG JSON
+cat > /tmp/dag-.json <<'JSON'
+{ "title": "...", "tasks": [ ... ] }
+JSON
+
+# 2. Build the @flatbread/proof package once per workspace install
+# (skipped if dist/ is already present; safe to re-run).
+[ -f "$(git rev-parse --show-toplevel)/packages/proof/dist/run_dag.js" ] || \
+ pnpm -F @flatbread/proof build
+
+# 3. Generate the initial all-PENDING canvas (no CURSOR_API_KEY needed)
+pnpm exec proof \
+ --init-only \
+ --dag /tmp/dag-.json \
+ --canvas-path "$CANVAS_PATH"
+
+# 4. Best-effort auto-open of the canvas file; ignore failure in headless/non-macOS environments
+open "$CANVAS_PATH" >/dev/null 2>&1 || true
+```
+
+The canvas path is:
+
+```
+~/.cursor/projects//canvases/dag-.canvas.tsx
+```
+
+`` is derived from the cwd's absolute path with `/` and other special chars replaced by `-`. To compute it, take `pwd`, strip the leading `/`, and replace each remaining `/` with `-`. Example: cwd `/Users/me/Code/myapp` → slug `Users-me-Code-myapp`. Use the same `` you used for the DAG JSON filename so they're easy to correlate.
+
+### Step 2 — Surface the canvas link in chat
+
+Now that the file exists on disk, post a Markdown hyperlink with the exact text `Open Canvas` and a `file://` URL, plus the absolute path for fallback:
+
+> I created a live canvas: [Open Canvas](file:///Users//.cursor/projects//canvases/dag-.canvas.tsx)
+> Fallback path: `/Users//.cursor/projects//canvases/dag-.canvas.tsx`
+
+Always use the link text `Open Canvas`. Use the absolute path in both the `file://` URL and fallback path, never `~/`. Do this **before** Step 3 so the user can open the canvas while subagents are still spinning up. The Step 1 shell block already attempts to auto-open the canvas with `open`; if that fails, continue and rely on the chat link.
+
+### Step 3 — Run the DAG
+
+Ensure `CURSOR_API_KEY` is set (the runner fails fast if missing), then launch:
+
+```bash
+[ -n "$CURSOR_API_KEY" ] || { [ -f .env ] && set -a && source .env && set +a; }
+
+pnpm exec proof \
+ --dag /tmp/dag-.json \
+ --canvas-path "$CANVAS_PATH"
+```
+
+If the DAG is expected to edit the runner itself (`packages/proof/src/**`), launch through the supervisor instead so source edits take effect at a process boundary:
+
+```bash
+pnpm exec proof-supervisor \
+ --dag /tmp/dag-.json \
+ --canvas-path "$CANVAS_PATH" \
+ --state-path "$HOME/.cursor/projects//dag-state/.json"
+```
+
+The supervisor passes `--restart-on-runner-change` to the runner. When runner runtime files change after a rank or convergence iteration, the child runner persists state, marks the canvas `RESTARTING RUNNER`, exits `75`, and the supervisor relaunches with `--resume-state` so pending tasks continue under the new source. After editing `packages/proof/src/**`, run `pnpm -F @flatbread/proof build` so the relaunch picks up the new code.
+
+Same `--canvas-path` as Step 1. The runner:
+
+1. Validates the DAG and reuses the existing canvas file.
+2. For each rank (Kahn topo-sort), launches ready tasks concurrently as local Cursor SDK agents and rewrites the canvas as each one transitions, streaming assistant text into each task card live.
+3. Automatically skips tasks whose upstream dependencies failed (marks them `ERROR` with a "Skipped: upstream task(s) … failed" message).
+4. Captures each subagent's final assistant text, status, token usage, and duration.
+5. Writes a final canvas with summary stats.
+6. On SIGINT/SIGTERM/SIGHUP, cancels all in-flight subagents before finalizing the canvas.
+
+#### CLI knobs
+
+| Flag | Default | Purpose |
+| ------------------------------- | ------------------ | ------------------------------------------------------------------------- |
+| `--models-file ` | — | JSON file containing a partial complexity → model override map. |
+| `--state-path ` | — | Persist resumable state after rank boundaries. |
+| `--resume-state ` | — | Resume from a persisted state file. |
+| `--restart-on-runner-change` | `false` | Exit `75` after runner runtime files change so a supervisor can relaunch. |
+| `--task-timeout-ms ` | `1200000` (20 min) | Marks a task `ERROR` if it runs too long. |
+| `--stream-publish-ms ` | `500` | Throttles live canvas streaming writes. |
+| `--stream-idle-timeout-ms ` | `300000` (5 min) | Marks a task `ERROR` if no stream events arrive. |
+| `--debounce ` | `200` | Canvas write debounce interval. |
+
+### Step 4 — Summarize
+
+After the runner exits, briefly summarize what completed/failed and re-link the canvas with the exact text `[Open Canvas](file:///Users//.cursor/projects//canvases/dag-.canvas.tsx)` so the user can scroll back to it. Include the absolute fallback path only if useful.
+
+## Complexity → model
+
+| Complexity | Model |
+| ---------- | ----------------- |
+| HIGH | `claude-opus-4-7` |
+| MED | `composer-2` |
+| LOW | `gpt-5.4-nano` |
+
+Override any subset inline with top-level DAG `models`, or pass a reusable profile with `--models-file `. Precedence is defaults < DAG `models` < `--models-file`. The Cursor model catalog can vary by account.
+
+### Discovering valid model ids
+
+Many Cursor CLI catalog models encode reasoning effort and Max Mode as **slug suffixes** (e.g. `claude-opus-4-7-thinking-max`, `gpt-5.5-extra-high`, `gpt-5.3-codex-xhigh`), but the Cursor SDK may accept only base slugs. Do not compose SDK model ids from CLI suffixes by hand. For SDK-bound code, prefer `Cursor.models.list()` or the SDK's `ConfigurationError` catalog over `cursor-agent --list-models`.
+
+Ways to enumerate model ids:
+
+```bash
+# CLI catalog — useful for CLI runs, not authoritative for @cursor/sdk
+cursor-agent --list-models
+
+# SDK-flavored alternative — also prints any per-model `parameters` and preset `variants`
+pnpm -F @flatbread/proof models:list # all ids
+pnpm -F @flatbread/proof models:list # detail for one model
+pnpm -F @flatbread/proof models:list --json
+```
+
+## Auth
+
+The runner reads `CURSOR_API_KEY` from the environment. Set it however you usually manage secrets:
+
+```bash
+export CURSOR_API_KEY=crsr_...
+```
+
+If the current workspace has a `.env` containing it, source that first:
+
+```bash
+set -a && source .env && set +a
+```
+
+## CLI options
+
+| Flag | Default | Notes |
+| ---------------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------- |
+| `--dag` | required | Path to the DAG JSON file. |
+| `--canvas-path` | composed from below | Full absolute path to the canvas file. Preferred — used by the parent-managed flow. |
+| `--canvas` | — | Canvas filename stem (no `.canvas.tsx`). Used only if `--canvas-path` is omitted. |
+| `--canvases-dir` | derived from cwd | Override the canvases output directory. Used only with `--canvas`. |
+| `--cwd` | `process.cwd()` | Working dir each subagent operates in. |
+| `--models-file` | — | JSON file containing a partial complexity → model override map. |
+| `--debounce` | `200` (ms) | Canvas write debounce interval. |
+| `--init-only` | `false` | Write the initial all-`PENDING` canvas and exit. No `CURSOR_API_KEY` required. |
+| `--state-path` | — | Persist resumable runner state. Defaults to `.proof/run-state.json` when `--restart-on-runner-change` is set. |
+| `--resume-state` | — | Load a persisted `RunState` and skip already terminal tasks. |
+| `--restart-on-runner-change` | `false` | Detect runner runtime file changes after safe boundaries and exit `75` for supervisor restart. |
+| `--max-runner-restarts` | `20` | Supervisor-only cap for relaunches from `proof-supervisor`. |
+| `--task-timeout-ms` | `1200000` (20 min) | Marks a task `ERROR` if it exceeds this duration. |
+| `--stream-publish-ms` | `500` (ms) | Throttles live canvas streaming writes to avoid excessive cloning. |
+| `--stream-idle-timeout-ms` | `300000` (5 min) | Marks a task `ERROR` if no stream events arrive within this window. |
+
+## Caveats
+
+- Local runtime only — every subagent runs against `--cwd` (defaults to wherever you invoke the runner).
+- Sibling tasks in the same rank run in parallel; do not let them write the same files.
+- Inline MCP servers and sub-sub-agents are not configured by this runner.
+- A failed task automatically skips all downstream dependents (they are marked `ERROR` with a "Skipped: upstream task(s) … failed" message). This prevents wasted API calls on tasks whose inputs are missing.
+- Per-task streamed text is capped at `STREAM_CAP = 4000` chars to keep the canvas file modest. Upstream context passed to child tasks is capped at 2000 chars per parent.
+- Timed-out tasks are marked `ERROR` instead of staying indefinitely in `RUNNING`.
+- SIGINT/SIGTERM/SIGHUP gracefully cancel all in-flight subagents and finalize the canvas before exiting.
+- Unexpected unhandled rejections from SDK internals are suppressed to prevent runner crashes; uncaught exceptions are logged and trigger a clean shutdown.
+
+## Reference
+
+- Package: `@flatbread/proof` at `packages/proof`
+- DAG schema example: `examples/example_dag.json`
+- Library exports: `import { parseDAG, computeRanks, ... } from '@flatbread/proof'`
+- Cursor SDK docs: https://cursor.com/docs/api/sdk/typescript
diff --git a/.cursor/skills/proof/examples/dag-flatbread-flow-pmf-audit.json b/.cursor/skills/proof/examples/dag-flatbread-flow-pmf-audit.json
new file mode 100644
index 00000000..7e609e53
--- /dev/null
+++ b/.cursor/skills/proof/examples/dag-flatbread-flow-pmf-audit.json
@@ -0,0 +1,52 @@
+{
+ "title": "Flatbread Flow PMF Audit (no sub-sub-agents)",
+ "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
+ "models": {
+ "HIGH": "claude-opus-4-7",
+ "MED": "gpt-5.5",
+ "LOW": "gpt-5.4-mini"
+ },
+ "tasks": [
+ {
+ "id": "map-current-flow",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files (frontmatter `readonly: true` is advisory in DAG runs).\n\nMap the current end-to-end flow: how developers define content models, sources, transformers, generated APIs/types, querying, examples, and runtime usage. Distinguish where GraphQL is structurally required vs. one of several interfaces. Read repo docs and source as needed. For `## Current contract` capture: data source config shape, root query naming, ID/ref semantics, filter capabilities, generated TypeScript shape, CLI behavior, and obvious developer-path friction. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "relational-content-needs",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nIndependently audit what a developer who wants Git-native relational content would need from Flatbread. Stay in Flatbread's vocabulary (`Content`, `BaseContentNode`, `Source`, `Transformer`, `Override` per `packages/core/src/types.ts`) — do NOT import database vocabulary like tables/foreign keys/joins/constraints/indexes/import-export. Compare needs against what the repo provides today: content collections, refs between collections, query/filter ergonomics, type safety, validation, the local edit/query loop, codegen, and example integration. Map needs to the schema's headings: `## Current contract` is what exists today, `## Proposed contract` is what would close the highest-leverage gaps, `## Migration impact` is what users would have to change, `## Validation plan` is how to prove each gap closure works."
+ },
+ {
+ "id": "docs-onboarding-audit",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nAudit the repository docs, examples, tests, package scripts, and README/onboarding path for a first-time developer. Focus on whether the relational content promise is obvious, whether the first success path is short, and where the developer is forced into GraphQL-specific concepts before they get value. `## Current contract` is the documented promise + steps; `## Proposed contract` is what the docs should promise instead; `## Migration impact` is the docs/example surface area to touch."
+ },
+ {
+ "id": "market-positioning-audit",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nUsing the repo as the primary evidence plus general product reasoning, audit Flatbread's likely product-market fit for developers who want Git-native relational content for TypeScript apps. Consider adjacent alternatives: Contentlayer, Velite, Keystatic, MDX-based content layers, Sanity/Contentful-style headless CMSes, Astro Content Collections, and (only when honest) embedded databases like SQLite. `## Current contract` is the implicit positioning today; `## Proposed contract` is the sharpest defensible positioning; `## Migration impact` is what the README/landing copy would need to say."
+ },
+ {
+ "id": "synthesize-pmf-gaps",
+ "depends_on": [
+ "map-current-flow",
+ "relational-content-needs",
+ "docs-onboarding-audit",
+ "market-positioning-audit"
+ ],
+ "complexity": "HIGH",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner` operating as the rank-2 merge node. Follow its output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`.\n\nSynthesize upstream audit findings into a prioritized PMF gap report. Call out GraphQL coupling honestly only when upstream evidence supports it. For each gap include: severity (P0/P1/P2), evidence (file refs from upstream), product implication, and the contract change it implies.\n\nIMPORTANT for survival under the 2000-char downstream stitch: keep `## Current contract` to a single 1-2 line summary so the gap table at the top of `## Proposed contract` lands within the first 2000 chars for `recommend-roadmap`."
+ },
+ {
+ "id": "recommend-roadmap",
+ "depends_on": ["synthesize-pmf-gaps"],
+ "complexity": "HIGH",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner` producing a roadmap recommendation. Follow its output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`.\n\nBased on the synthesized PMF gaps, recommend a concise product direction and roadmap. Output a sharper positioning statement, 3-5 product primitives to add or clarify (each with file/package anchor), near-term experiments, and what not to build yet. `## Migration impact` should map each recommendation to the affected packages so a follow-up `flatbread-major-migration` DAG (template at `.cursor/skills/proof/examples/flatbread/dag-schema-migration.json`) can be authored from this output without re-deriving scope."
+ }
+ ]
+}
diff --git a/.cursor/skills/proof/examples/example_dag.json b/.cursor/skills/proof/examples/example_dag.json
new file mode 100644
index 00000000..6c0e43bd
--- /dev/null
+++ b/.cursor/skills/proof/examples/example_dag.json
@@ -0,0 +1,41 @@
+{
+ "title": "Build a tiny CLI todo app",
+ "tasks": [
+ {
+ "id": "research-stack",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "Sketch the smallest reasonable design for a single-file Node.js CLI todo app that stores items in a local JSON file. List the supported commands (add, list, done, rm), the JSON schema, and the file layout. Output as markdown bullets only — do not write any code yet."
+ },
+ {
+ "id": "research-cli-conventions",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "Summarize the conventions a small Node CLI should follow: shebang line, exit codes, stdout vs stderr usage, --help output shape, and how to parse argv without a dependency. Output as markdown bullets only — do not write code."
+ },
+ {
+ "id": "design",
+ "depends_on": ["research-stack", "research-cli-conventions"],
+ "complexity": "MED",
+ "subtask_prompt": "Combine the upstream research into a one-page implementation plan for the todo CLI. Specify file paths, function signatures, error handling, and the JSON storage shape. Output a markdown design doc — still no code."
+ },
+ {
+ "id": "implement",
+ "depends_on": ["design"],
+ "complexity": "MED",
+ "subtask_prompt": "Implement the design as `todo.mjs` in the current working directory. It must be a single file with no dependencies, support the four commands from the design, and persist to `./todos.json`. After writing the file, run it once with `node todo.mjs --help` and include the output in your reply."
+ },
+ {
+ "id": "tests",
+ "depends_on": ["implement"],
+ "complexity": "LOW",
+ "subtask_prompt": "Add a `test_todo.mjs` script in the cwd that exercises add → list → done → rm against `todo.mjs` using a temp JSON file. Use only the Node `node:test` and `node:assert` modules. Run it with `node --test test_todo.mjs` and include the output in your reply."
+ },
+ {
+ "id": "docs",
+ "depends_on": ["implement"],
+ "complexity": "LOW",
+ "subtask_prompt": "Write a short `README.md` in the cwd describing what `todo.mjs` does, the supported commands with examples, and where data is stored. Do not modify `todo.mjs`."
+ }
+ ]
+}
diff --git a/.cursor/skills/proof/examples/flatbread/dag-codegen-change.json b/.cursor/skills/proof/examples/flatbread/dag-codegen-change.json
new file mode 100644
index 00000000..5448195d
--- /dev/null
+++ b/.cursor/skills/proof/examples/flatbread/dag-codegen-change.json
@@ -0,0 +1,74 @@
+{
+ "title": "Flatbread codegen-only change (no sub-sub-agents)",
+ "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
+ "models": {
+ "HIGH": "claude-opus-4-7",
+ "MED": "gpt-5.5",
+ "LOW": "gpt-5.4-mini"
+ },
+ "tasks": [
+ {
+ "id": "diag-core-types",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files (frontmatter `readonly: true` is advisory in DAG runs).\n\nDiagnose every `@flatbread/core` type that `packages/codegen` consumes (e.g. `CodegenOptions`, `CodegenResult`, `CodegenStrategy`, `Content`, `BaseContentNode`, `Override`, `Source`, `Transformer`). Note which generated artifact in `examples/nextjs/generated/**` is downstream of each. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-codegen-input",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose how `packages/codegen/**` currently consumes inputs from `packages/core` for the proposed change: . Capture introspection vs. type sourcing, document discovery, generated artifact paths landed into `examples/nextjs/generated/**`. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-generated-output",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose the current generated output shape consumed by `examples/nextjs`. Capture every generated file path, every TS export name the example imports, and every GraphQL document the example references. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "contract-synth",
+ "depends_on": [
+ "diag-core-types",
+ "diag-codegen-input",
+ "diag-generated-output"
+ ],
+ "complexity": "HIGH",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner` operating as the rank-2 merge node. Follow its output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`.\n\nProduce the codegen before/after contract for the proposed change: . `## Proposed contract` must lead with an executor-actionable diff: changed file paths grouped by directory, changed TS export names, changed GraphQL document shape. `## Human checkpoints` must call out DevEx Validation gate before release."
+ },
+ {
+ "id": "wait-contract-approval",
+ "depends_on": ["contract-synth"],
+ "kind": "pause"
+ },
+ {
+ "id": "impl-codegen",
+ "depends_on": ["wait-contract-approval"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`. Group multi-file references under brace expansion.\n\nImplement the contract from the upstream synthesis exactly. Do not expand scope. Touch only `packages/codegen/**` and the generation pipeline. Run `pnpm --filter @flatbread/codegen test` and lint edited files."
+ },
+ {
+ "id": "impl-example-regen",
+ "depends_on": ["impl-codegen"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`.\n\nRegenerate `examples/nextjs` GraphQL artifacts via `pnpm --filter nextjs exec flatbread codegen` (the `--filter` is required because `flatbread.config.js` only exists at `examples/nextjs/flatbread.config.js`; `loadConfig` does not search up). Do NOT use `pnpm codegen`, which is `--watch` per `examples/nextjs/package.json:7` and would hang the DAG node. Do not hand-edit generated files. List every generated file that changed (group under brace expansion) plus any example source file that needed an import or query update."
+ },
+ {
+ "id": "verify-codegen-tests",
+ "depends_on": ["impl-codegen"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nRun `pnpm --filter @flatbread/codegen test`. Report failures, snapshot diffs, and any contract drift between the synthesized contract and what landed. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches the parent can append directly."
+ },
+ {
+ "id": "verify-example-build",
+ "depends_on": ["impl-example-regen"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nRun `pnpm --filter nextjs build` (binds port `5057` via `flatbread start -- next build` per `examples/nextjs/package.json:8`). Stop the build before exit. Report TS errors, missing imports, or query/document mismatches caused by the regenerated artifacts. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches."
+ },
+ {
+ "id": "browser-verify",
+ "depends_on": ["verify-example-build"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-browser-verifier`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Commands run`, `## Routes checked`, `## Observed behavior`, `## Mismatches`, `## Screenshots`, `## Residual risk`.\n\nRun `pnpm browser:doctor` first to fail fast if the browser CLI is unavailable. Start the example dev server in the background: `pnpm --filter nextjs dev` (binds port `5057` HTTP, `5058` HTTPS per `packages/flatbread/src/cli/index.ts:128-135`); the upstream `verify-example-build` task already finished and freed the port. Wait for the server to come up before driving `pnpm exec agent-browser`. Verify documented queries and the rendered example pages still match the README. Tear the dev server down before completing the task. If the browser CLI is unavailable, your `## Residual risk` MUST lead with `BROWSER UNAVAILABLE`."
+ }
+ ]
+}
diff --git a/.cursor/skills/proof/examples/flatbread/dag-docs-sync.json b/.cursor/skills/proof/examples/flatbread/dag-docs-sync.json
new file mode 100644
index 00000000..0dc276c0
--- /dev/null
+++ b/.cursor/skills/proof/examples/flatbread/dag-docs-sync.json
@@ -0,0 +1,51 @@
+{
+ "title": "Flatbread docs / README sync (no sub-sub-agents)",
+ "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
+ "models": {
+ "HIGH": "claude-opus-4-7",
+ "MED": "gpt-5.5",
+ "LOW": "gpt-5.4-mini"
+ },
+ "tasks": [
+ {
+ "id": "diag-readme-claims",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files (frontmatter `readonly: true` is advisory in DAG runs).\n\nList every claim, command, code snippet, and example query in the root `README.md` and each `packages/*/README.md`. For each, mark whether the current implementation still matches. Reference files as `path/to/file.md:line`."
+ },
+ {
+ "id": "diag-example-paths",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nWalk `examples/nextjs` end-to-end and capture the actual first-success path a developer sees: setup commands, codegen invocation (note `package.json:7` is `--watch`; the docs should point users at the appropriate command), dev command, port (`5057` HTTP, `5058` HTTPS sibling), sample query, sample edit. Compare against what the docs claim."
+ },
+ {
+ "id": "diag-positioning",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nAudit positioning language in `README.md`, package READMEs, and any landing copy. Flag any phrasing that overclaims database-replacement (tables, foreign keys, joins, constraints, indexes, import/export) or treats GraphQL as the entire product identity."
+ },
+ {
+ "id": "docs-plan",
+ "depends_on": [
+ "diag-readme-claims",
+ "diag-example-paths",
+ "diag-positioning"
+ ],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner` operating as the rank-2 merge node. Follow its output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`.\n\nProduce a concrete docs-edit plan. `## Proposed contract` must lead with a flat list of `path/to/file.md:line — change` (group adjacent edits) so the executor can apply edits without re-reading the diagnoses."
+ },
+ {
+ "id": "impl-docs",
+ "depends_on": ["docs-plan"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`. Group multi-file references under brace expansion.\n\nApply the docs plan exactly. Touch only `*.md` files. Do not modify code. Run the project's lint/format on changed files."
+ },
+ {
+ "id": "review-docs",
+ "depends_on": ["impl-docs"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nReview the docs diff for: stale commands, drifting code snippets, broken cross-links, and any positioning that overclaims database semantics. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches the parent can append directly."
+ }
+ ]
+}
diff --git a/.cursor/skills/proof/examples/flatbread/dag-schema-migration.json b/.cursor/skills/proof/examples/flatbread/dag-schema-migration.json
new file mode 100644
index 00000000..97a8e0e9
--- /dev/null
+++ b/.cursor/skills/proof/examples/flatbread/dag-schema-migration.json
@@ -0,0 +1,153 @@
+{
+ "title": "Flatbread schema-breaking migration (no sub-sub-agents; pause at human checkpoint after contract-synth)",
+ "framing": "Treat Flatbread as Git-native relational content for TypeScript apps, backed by flat files. GraphQL is one interface, not the whole product identity.",
+ "models": {
+ "HIGH": "claude-opus-4-7",
+ "MED": "gpt-5.5",
+ "LOW": "gpt-5.4-mini"
+ },
+ "tasks": [
+ {
+ "id": "diag-schema",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files (frontmatter `readonly: true` is advisory in DAG runs).\n\nDiagnose `packages/core/src/generators/schema.ts` for the proposed change: . For `## Current contract` capture root query naming, ID/ref semantics, filter capabilities, and any user-visible GraphQL surface this file generates. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-resolvers",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/core/src/resolvers/arguments.ts` for the proposed change: . For `## Current contract` capture filter shape, supported operators, and any internal contracts other resolvers depend on. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-types",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/core/src/types.ts` for the proposed change: . Capture which exported types cross package boundaries to `@flatbread/codegen`, the CLI, transformers, and examples. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-codegen",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/codegen/**` for the proposed change: . Capture how generated TypeScript and GraphQL documents are produced, which inputs from `@flatbread/core` they depend on, and how the generated artifacts land in `examples/nextjs`. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-cli",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/flatbread/src/cli/index.ts` and the GraphQL server wiring for the proposed change: . Capture the Flatbread start command behavior, codegen invocation, `/graphql` endpoint, port `5057` (HTTP) and `5058` (HTTPS sibling at `packages/flatbread/src/cli/index.ts:128-135`). Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-examples",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `examples/nextjs` for the proposed change: . Capture which generated GraphQL documents and types the example consumes, which queries would need to change, and which README/docs snippets would drift. Note: `examples/nextjs/package.json:7` defines `pnpm codegen` as `flatbread codegen --watch` (hangs in DAG runs); list non-`--watch` invocations instead."
+ },
+ {
+ "id": "diag-docs",
+ "depends_on": [],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `README.md` and each `packages/*/README.md` for the proposed change: . List every snippet, command, or claim that would no longer be true. Reference files as `path/to/file.md:line`."
+ },
+ {
+ "id": "diag-transformers",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/transformer-markdown/**` and `packages/transformer-yaml/**` for the proposed change: . Capture how each implements `Transformer` (interface at `packages/core/src/types.ts:73-82`), especially `preknownSchemaFragments` (`packages/core/src/types.ts:79`), and which extensions they own. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-source-plugins",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/source-filesystem/**` for the proposed change: . Capture how it implements `Source` (interface at `packages/core/src/types.ts:95-101`), especially `fetch` and `fetchByType`, and which file-discovery assumptions would shift. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "diag-config",
+ "depends_on": [],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`. Do not edit files.\n\nDiagnose `packages/config/**` (especially `packages/config/src/validate.ts`) for the proposed change: . Capture the validated `FlatbreadConfig` shape, every required field, and how validation diagnostics surface to the CLI. Reference files as `path/to/file.ts:line`."
+ },
+ {
+ "id": "contract-synth",
+ "depends_on": [
+ "diag-schema",
+ "diag-resolvers",
+ "diag-types",
+ "diag-codegen",
+ "diag-cli",
+ "diag-examples",
+ "diag-docs",
+ "diag-transformers",
+ "diag-source-plugins",
+ "diag-config"
+ ],
+ "complexity": "HIGH",
+ "subtask_prompt": "You are acting as `flatbread-architecture-planner` operating as the rank-2 contract synthesis node. Follow its output schema. Output must lead with these `##` headings verbatim: `## Current contract`, `## Proposed contract`, `## Migration impact`, `## Validation plan`, `## Human checkpoints`.\n\nMerge upstream diagnoses into a single before/after contract for IDs, refs, filters, root query names, generated TypeScript, config shape, transformer/source contracts, and CLI behavior. `## Proposed contract` must lead with the literal markdown table `| field | before | after | breaking? | files_to_change |` as the first content under the heading, followed by executor-actionable details (changed file paths grouped by directory, changed export/type names, changed CLI flags) before any prose. `## Human checkpoints` must explicitly call out Schema Contract approval before any executor runs.\n\nIMPORTANT for survival under the 2000-char downstream stitch: keep `## Current contract` to a single 1-2 line summary so the executor-actionable diff at the top of `## Proposed contract` lands within the first 2000 chars for every downstream `impl-*` task."
+ },
+ {
+ "id": "wait-contract-approval",
+ "depends_on": ["contract-synth"],
+ "kind": "pause"
+ },
+ {
+ "id": "impl-core",
+ "depends_on": ["wait-contract-approval"],
+ "complexity": "HIGH",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`. Group multi-file references under brace expansion (e.g. `packages/core/src/{generators/schema.ts,resolvers/arguments.ts,types.ts}`).\n\nImplement the contract from `contract-synth` exactly. Do not expand scope. Touch only `packages/core/src/**`. Serialize edits in this order inside this single task: schema.ts → arguments.ts → types.ts. Run `pnpm --filter @flatbread/core build` to confirm the package compiles (note: `packages/core/package.json` has no `test` script as of this writing) and lint edited files. Record the build outcome under `## Checks run`."
+ },
+ {
+ "id": "impl-docs",
+ "depends_on": ["wait-contract-approval"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`.\n\nUpdate `README.md`, every `packages/*/README.md`, and any migration notes the contract requires. Touch only `*.md` files — no code edits. Group changes under brace expansion in `## Files changed`."
+ },
+ {
+ "id": "impl-codegen",
+ "depends_on": ["impl-core"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`.\n\nImplement the codegen-side of the contract. Touch only `packages/codegen/**`. Run `pnpm --filter @flatbread/codegen test` and lint edited files."
+ },
+ {
+ "id": "impl-cli",
+ "depends_on": ["impl-core"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`.\n\nImplement the CLI-side of the contract. Touch only `packages/flatbread/src/**`. Do not run `flatbread start` (port 5057 is reserved for the rank-7 `verify-cli` task). Lint edited files."
+ },
+ {
+ "id": "impl-examples",
+ "depends_on": ["impl-codegen"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-migration-executor`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Files changed`, `## Contract implemented`, `## Checks run`, `## Checks skipped`, `## Residual risk`, `## Release gate state`.\n\nRegenerate `examples/nextjs` GraphQL artifacts via `pnpm --filter nextjs exec flatbread codegen` (the `--filter` is required because `flatbread.config.js` only exists at `examples/nextjs/flatbread.config.js`; `loadConfig` does not search up). Do NOT use `pnpm codegen`, which is `--watch` per `examples/nextjs/package.json:7` and would hang the DAG node. Update any example source file whose imports or queries broke. Group generated paths under brace expansion in `## Files changed`."
+ },
+ {
+ "id": "verify-schema-snap",
+ "depends_on": ["impl-core"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nDiff the generated GraphQL schema against the synthesized contract. Flag any drift. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches the parent can append directly."
+ },
+ {
+ "id": "verify-codegen",
+ "depends_on": ["impl-codegen"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nRun `pnpm --filter @flatbread/codegen test` and report failures. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches."
+ },
+ {
+ "id": "verify-readme",
+ "depends_on": ["impl-docs"],
+ "complexity": "LOW",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nDiff every README example and command against actual runtime behavior implied by the synthesized contract. Flag stale snippets, broken links, and any positioning drift. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches."
+ },
+ {
+ "id": "verify-cli",
+ "depends_on": ["impl-cli", "verify-codegen"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-adversarial-reviewer`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Blockers`, `## High-severity findings`, `## Medium-severity findings`, `## Low-severity findings`, `## Residual risk`, `## Recommended next DAG tasks`.\n\nSmoke-test `pnpm --filter nextjs dev` and the `/graphql` endpoint on port `5057` (HTTP) and `5058` (HTTPS). This task is the sole port-5057 occupant of its rank — no other task may bind that port concurrently. Stop the server before exit so `browser-verify` can take the port. If anything fails, populate `## Recommended next DAG tasks` with `id` + one-line subtask_prompt sketches."
+ },
+ {
+ "id": "browser-verify",
+ "depends_on": ["impl-examples", "verify-cli"],
+ "complexity": "MED",
+ "subtask_prompt": "You are acting as `flatbread-browser-verifier`. Follow its responsibilities and output schema. Output must lead with these `##` headings verbatim: `## Commands run`, `## Routes checked`, `## Observed behavior`, `## Mismatches`, `## Screenshots`, `## Residual risk`.\n\nRun `pnpm browser:doctor` first to fail fast if the browser CLI is unavailable. Start the example dev server in the background: `pnpm --filter nextjs dev` (binds port `5057` HTTP, `5058` HTTPS per `packages/flatbread/src/cli/index.ts:128-135`); the upstream `verify-cli` task already stopped its server before exit, so the port is free. Wait for the server to come up before driving `pnpm exec agent-browser`. Verify documented queries and rendered example pages still match READMEs. Tear the dev server down before completing the task. If the browser CLI is unavailable, your `## Residual risk` MUST lead with `BROWSER UNAVAILABLE` so the parent re-queues. This is the terminal release-gate node."
+ }
+ ]
+}
diff --git a/examples/nextjs/lib/graphql.ts b/examples/nextjs/lib/graphql.ts
index b340d266..a33538ac 100644
--- a/examples/nextjs/lib/graphql.ts
+++ b/examples/nextjs/lib/graphql.ts
@@ -22,17 +22,21 @@ export async function graphqlFetch(
variables?: Record,
endpoint: string = 'http://localhost:5057/graphql'
): Promise {
+ const controller = new AbortController();
+ const timeout = setTimeout(() => controller.abort(), 15000);
+
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Accept: 'application/json',
},
+ signal: controller.signal,
body: JSON.stringify({
query,
variables,
}),
- });
+ }).finally(() => clearTimeout(timeout));
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
diff --git a/package.json b/package.json
index 3ffb14c8..e8c869cb 100644
--- a/package.json
+++ b/package.json
@@ -42,6 +42,7 @@
"@flatbread/codegen": "workspace:*",
"@flatbread/config": "workspace:*",
"@flatbread/core": "workspace:*",
+ "@flatbread/proof": "workspace:*",
"@flatbread/resolver-svimg": "workspace:*",
"@flatbread/source-filesystem": "workspace:*",
"@flatbread/transformer-markdown": "workspace:*",
diff --git a/packages/proof/bin/proof-supervisor.js b/packages/proof/bin/proof-supervisor.js
new file mode 100755
index 00000000..468147a8
--- /dev/null
+++ b/packages/proof/bin/proof-supervisor.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+import { resolve } from 'path';
+import { existsSync } from 'fs';
+
+if (process.env.FLATBREAD_CI) {
+ const cliPath = resolve(
+ process.cwd(),
+ 'node_modules',
+ '@flatbread',
+ 'proof',
+ 'dist',
+ 'run_dag_supervisor.js'
+ );
+
+ if (existsSync(cliPath)) {
+ import('../dist/run_dag_supervisor.js');
+ } else {
+ console.log('@flatbread/proof supervisor CLI is not available');
+ }
+} else {
+ import('../dist/run_dag_supervisor.js');
+}
diff --git a/packages/proof/bin/proof.js b/packages/proof/bin/proof.js
new file mode 100755
index 00000000..b350074a
--- /dev/null
+++ b/packages/proof/bin/proof.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+import { resolve } from 'path';
+import { existsSync } from 'fs';
+
+if (process.env.FLATBREAD_CI) {
+ const cliPath = resolve(
+ process.cwd(),
+ 'node_modules',
+ '@flatbread',
+ 'proof',
+ 'dist',
+ 'run_dag.js'
+ );
+
+ if (existsSync(cliPath)) {
+ import('../dist/run_dag.js');
+ } else {
+ console.log('@flatbread/proof CLI is not available');
+ }
+} else {
+ import('../dist/run_dag.js');
+}
diff --git a/packages/proof/package.json b/packages/proof/package.json
new file mode 100644
index 00000000..2a8ce6d0
--- /dev/null
+++ b/packages/proof/package.json
@@ -0,0 +1,51 @@
+{
+ "name": "@flatbread/proof",
+ "version": "0.1.0-alpha.0",
+ "description": "Decompose a task into a DAG of subagents and prove they did the work — live canvas, oracles, pause gates, and convergence loops.",
+ "type": "module",
+ "scripts": {
+ "build": "tsup",
+ "dev": "tsup --watch src",
+ "typecheck": "tsc -p tsconfig.json --noEmit",
+ "models:list": "tsx src/list_models.ts"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/FlatbreadLabs/flatbread.git",
+ "directory": "packages/proof"
+ },
+ "homepage": "https://github.com/FlatbreadLabs/flatbread/tree/main/packages/proof#readme",
+ "author": "Tony Ketcham ",
+ "license": "MIT",
+ "bugs": {
+ "url": "https://github.com/FlatbreadLabs/flatbread/issues"
+ },
+ "exports": {
+ ".": "./dist/index.js"
+ },
+ "main": "dist/index.js",
+ "module": "dist/index.js",
+ "types": "dist/index.d.ts",
+ "bin": {
+ "proof": "bin/proof.js",
+ "proof-supervisor": "bin/proof-supervisor.js"
+ },
+ "files": [
+ "bin",
+ "dist",
+ "src",
+ "*.d.ts"
+ ],
+ "engines": {
+ "node": ">=18"
+ },
+ "dependencies": {
+ "@cursor/sdk": "^1.0.9"
+ },
+ "devDependencies": {
+ "@types/node": "^22.10.0",
+ "tsup": "^8.3.0",
+ "tsx": "^4.19.0",
+ "typescript": "^5.7.0"
+ }
+}
diff --git a/packages/proof/src/canvas_writer.ts b/packages/proof/src/canvas_writer.ts
new file mode 100644
index 00000000..76c152ba
--- /dev/null
+++ b/packages/proof/src/canvas_writer.ts
@@ -0,0 +1,920 @@
+/**
+ * Renders the runner's in-memory state into a self-contained `.canvas.tsx`
+ * file. The IDE hot-recompiles on file change, so calling write() repeatedly
+ * gives the user a live view of the DAG run.
+ *
+ * The canvas is fully static React + cursor/canvas — all state is inlined as
+ * a `const STATE = {...}` literal. Only that literal changes between writes;
+ * the rendered template is identical.
+ */
+
+import { writeFile, mkdir } from 'node:fs/promises';
+import { dirname } from 'node:path';
+import type { Complexity, DAG, TaskKind } from './dag.js';
+
+export type TaskStatus =
+ | 'PENDING'
+ | 'RUNNING'
+ | 'FINISHED'
+ | 'ERROR'
+ | 'AWAITING_APPROVAL'
+ | 'BUDGET-EXCEEDED';
+
+export interface TaskState {
+ id: string;
+ depends_on: string[];
+ complexity: Complexity;
+ subtask_prompt: string;
+ status: TaskStatus;
+ model: string;
+ /** `'task'` (default), `'pause'`, or `'oracle'`. Undefined is normalized to `'task'`. */
+ kind?: TaskKind;
+ /**
+ * Shell command for `kind: 'oracle'` tasks. Surfaced in the canvas so the
+ * gate's pass/fail criterion is visible without reading the result body.
+ * Undefined for every other kind.
+ */
+ command?: string;
+ /** Regex source the oracle's output is matched against (defaults to `.*`). */
+ expect?: string;
+ startedAt?: number;
+ finishedAt?: number;
+ resultText?: string;
+ errorMessage?: string;
+ inputTokens?: number;
+ outputTokens?: number;
+ durationMs?: number;
+ /**
+ * Convergence-loop re-execution counter. 0/undefined = original run; bumped
+ * by 1 each time `--converge-on` re-runs this task to address upstream
+ * reviewer findings.
+ */
+ iteration?: number;
+ /**
+ * Absolute path to the sentinel file the runner created for a `kind: 'pause'`
+ * task. Set when status === `AWAITING_APPROVAL`; persisted afterwards so the
+ * canvas can show "approved by removing ".
+ */
+ checkpointPath?: string;
+}
+
+export interface RunState {
+ title: string;
+ startedAt: number;
+ finishedAt?: number;
+ /**
+ * Aggregate outcome of the entire run.
+ *
+ * - `SUCCESS` — every task finished cleanly.
+ * - `FAILED` — at least one task ended in `ERROR`.
+ * - `INTERRUPTED` — the runner caught a fatal signal (SIGINT/SIGTERM/SIGHUP).
+ * - `BUDGET_EXCEEDED` — a budget ceiling was crossed: either the
+ * `--converge-on` loop exhausted `--max-iterations` with the convergence
+ * task still reporting blockers, OR `dag.budget.maxTokensTotal` was
+ * exceeded. Both paths exit with `EXIT_BUDGET_EXCEEDED` (4) so
+ * wrappers can branch on budget overflows without parsing logs. Hyphen
+ * form (`BUDGET-EXCEEDED`) is reserved for the per-task `TaskStatus`;
+ * the run-level field uses underscores to match the rest of this enum.
+ * - `RESTARTING_RUNNER` — runner runtime files changed mid-run; the
+ * supervisor should relaunch the runner from persisted state so the next
+ * process executes the newly edited source.
+ */
+ runOutcome?:
+ | 'SUCCESS'
+ | 'FAILED'
+ | 'INTERRUPTED'
+ | 'BUDGET_EXCEEDED'
+ | 'RESTARTING_RUNNER';
+ runMessage?: string;
+ tasks: TaskState[];
+}
+
+export function initialRunState(
+ dag: DAG,
+ modelFor: (c: Complexity) => string
+): RunState {
+ return {
+ title: dag.title,
+ startedAt: Date.now(),
+ tasks: dag.tasks.map((t) => ({
+ id: t.id,
+ depends_on: t.depends_on,
+ complexity: t.complexity,
+ subtask_prompt: t.subtask_prompt,
+ status: 'PENDING',
+ model: modelFor(t.complexity),
+ // Normalize undefined kind → 'task' so downstream consumers (canvas
+ // template, runner dispatcher) never have to ?? again.
+ kind: t.kind ?? 'task',
+ // Surface oracle-only fields so the canvas can render the gate's
+ // command / expectation without reading the streamed result body.
+ ...(t.kind === 'oracle' ? { command: t.command, expect: t.expect } : {}),
+ })),
+ };
+}
+
+/**
+ * Debounced writer. Multiple write() calls inside the debounce window collapse
+ * into one filesystem write — the latest state always wins.
+ */
+export class CanvasWriter {
+ private pending: RunState | null = null;
+ private timer: NodeJS.Timeout | null = null;
+ private inFlight: Promise = Promise.resolve();
+ private writeSeq = 0;
+ private lastFailedWriteSeq = 0;
+ private lastWriteError: unknown = null;
+
+ constructor(
+ private readonly canvasPath: string,
+ private readonly debounceMs: number = 200
+ ) {}
+
+ schedule(state: RunState): void {
+ this.pending = state;
+ if (this.timer) return;
+ this.timer = setTimeout(() => {
+ this.timer = null;
+ const snapshot = this.pending;
+ this.pending = null;
+ if (snapshot) {
+ this.enqueueWrite(snapshot);
+ }
+ }, this.debounceMs);
+ }
+
+ /** Force-flush any pending write and await disk completion. */
+ async flush(): Promise {
+ if (this.timer) {
+ clearTimeout(this.timer);
+ this.timer = null;
+ }
+ const snapshot = this.pending;
+ this.pending = null;
+ const targetWriteSeq = snapshot
+ ? this.enqueueWrite(snapshot)
+ : this.writeSeq;
+ await this.inFlight;
+ if (targetWriteSeq > 0 && this.lastFailedWriteSeq === targetWriteSeq) {
+ throw this.lastWriteError;
+ }
+ }
+
+ private enqueueWrite(state: RunState): number {
+ const seq = ++this.writeSeq;
+ this.inFlight = this.inFlight.then(async () => {
+ try {
+ await this.writeNow(state);
+ if (this.lastFailedWriteSeq < seq) {
+ this.lastWriteError = null;
+ }
+ } catch (err) {
+ this.lastFailedWriteSeq = seq;
+ this.lastWriteError = err;
+ }
+ });
+ return seq;
+ }
+
+ private async writeNow(state: RunState): Promise {
+ const source = renderCanvasSource(state);
+ await mkdir(dirname(this.canvasPath), { recursive: true });
+ await writeFile(this.canvasPath, source, 'utf8');
+ }
+}
+
+function renderCanvasSource(state: RunState): string {
+ const stateLiteral = JSON.stringify(state, null, 2);
+ return `${HEADER}\n\nconst STATE: RunState = ${stateLiteral};\n\n${BODY}\n`;
+}
+
+const HEADER = `/* AUTO-GENERATED by @flatbread/proof. Do not edit by hand — the runner overwrites this file. */
+import {
+ Card,
+ CardBody,
+ CardHeader,
+ Divider,
+ H1,
+ H2,
+ Pill,
+ Stack,
+ Stat,
+ Text,
+ computeDAGLayout,
+ useHostTheme,
+} from 'cursor/canvas';
+import { useEffect, useMemo, useState } from 'react';
+
+type TaskStatus =
+ | 'PENDING'
+ | 'RUNNING'
+ | 'FINISHED'
+ | 'ERROR'
+ | 'AWAITING_APPROVAL'
+ | 'BUDGET-EXCEEDED';
+type Complexity = 'HIGH' | 'MED' | 'LOW';
+type TaskKind = 'task' | 'pause' | 'oracle';
+
+interface TaskState {
+ id: string;
+ depends_on: string[];
+ complexity: Complexity;
+ subtask_prompt: string;
+ status: TaskStatus;
+ model: string;
+ kind?: TaskKind;
+ command?: string;
+ expect?: string;
+ startedAt?: number;
+ finishedAt?: number;
+ resultText?: string;
+ errorMessage?: string;
+ inputTokens?: number;
+ outputTokens?: number;
+ durationMs?: number;
+ iteration?: number;
+ checkpointPath?: string;
+}
+
+interface RunState {
+ title: string;
+ startedAt: number;
+ finishedAt?: number;
+ runOutcome?:
+ | 'SUCCESS'
+ | 'FAILED'
+ | 'INTERRUPTED'
+ | 'BUDGET_EXCEEDED'
+ | 'RESTARTING_RUNNER';
+ runMessage?: string;
+ tasks: TaskState[];
+}`;
+
+const BODY = String.raw`const NODE_H = 64;
+const SCROLL_STORAGE_KEY = '@flatbread/proof:scroll-y';
+const COMPLETED_DOT_COLOR = '#22c55e';
+const AWAITING_DOT_COLOR = '#f59e0b';
+const BUDGET_DOT_COLOR = '#ef4444';
+const COMPACT_BREAKPOINT_PX = 720;
+
+function effectiveKind(t: TaskState): TaskKind {
+ return t.kind ?? 'task';
+}
+
+function pillToneFor(status: TaskStatus): 'neutral' | 'info' | 'success' | 'warning' {
+ switch (status) {
+ case 'PENDING':
+ return 'neutral';
+ case 'RUNNING':
+ return 'info';
+ case 'FINISHED':
+ return 'success';
+ case 'ERROR':
+ return 'warning';
+ case 'AWAITING_APPROVAL':
+ return 'warning';
+ case 'BUDGET-EXCEEDED':
+ return 'warning';
+ }
+}
+
+function complexityTone(c: Complexity): 'neutral' | 'info' | 'warning' {
+ switch (c) {
+ case 'HIGH':
+ return 'warning';
+ case 'MED':
+ return 'info';
+ case 'LOW':
+ return 'neutral';
+ }
+}
+
+function formatDuration(ms?: number): string {
+ if (ms === undefined) return '—';
+ if (ms < 1000) return ms + 'ms';
+ const s = ms / 1000;
+ if (s < 60) return s.toFixed(1) + 's';
+ const m = Math.floor(s / 60);
+ const rem = Math.round(s - m * 60);
+ return m + 'm ' + rem + 's';
+}
+
+function elapsed(state: RunState): number {
+ const end = state.finishedAt ?? Date.now();
+ return end - state.startedAt;
+}
+
+function totalTokens(state: RunState): { input: number; output: number } {
+ let input = 0;
+ let output = 0;
+ for (const t of state.tasks) {
+ input += t.inputTokens ?? 0;
+ output += t.outputTokens ?? 0;
+ }
+ return { input, output };
+}
+
+function taskElementId(taskId: string): string {
+ return 'task-card-' + taskId;
+}
+
+function useViewportWidth(): number {
+ const [width, setWidth] = useState(1024);
+
+ useEffect(() => {
+ if (typeof window === 'undefined') return;
+ const update = (): void => setWidth(window.innerWidth);
+ update();
+ window.addEventListener('resize', update);
+ return () => window.removeEventListener('resize', update);
+ }, []);
+
+ return width;
+}
+
+function getScrollY(): number {
+ if (typeof window === 'undefined') return 0;
+ return Math.max(window.scrollY ?? 0, 0);
+}
+
+function saveScrollY(): void {
+ if (typeof window === 'undefined') return;
+ try {
+ window.sessionStorage.setItem(SCROLL_STORAGE_KEY, String(getScrollY()));
+ } catch {
+ // ignore storage failures
+ }
+}
+
+function restoreScrollY(): void {
+ if (typeof window === 'undefined') return;
+ let target = 0;
+ try {
+ const raw = window.sessionStorage.getItem(SCROLL_STORAGE_KEY);
+ if (!raw) return;
+ const parsed = Number(raw);
+ if (!Number.isFinite(parsed) || parsed <= 0) return;
+ target = Math.floor(parsed);
+ } catch {
+ return;
+ }
+
+ // Retry because hot-reload can run before content height has settled.
+ let attempts = 0;
+ const maxAttempts = 8;
+ const tick = (): void => {
+ attempts += 1;
+ const scrollHeight = Math.max(
+ document.documentElement?.scrollHeight ?? 0,
+ document.body?.scrollHeight ?? 0,
+ );
+ const maxY = Math.max(scrollHeight - window.innerHeight, 0);
+ if (maxY <= 0) {
+ if (attempts < maxAttempts) window.requestAnimationFrame(tick);
+ return;
+ }
+ const desiredY = Math.min(target, maxY);
+ window.scrollTo({ top: desiredY, behavior: 'auto' });
+ if (attempts < maxAttempts && Math.abs(getScrollY() - desiredY) > 2) {
+ window.requestAnimationFrame(tick);
+ }
+ };
+ window.requestAnimationFrame(tick);
+}
+
+function DAGGraph({
+ state,
+ onNodeClick,
+}: {
+ state: RunState;
+ onNodeClick?: (taskId: string) => void;
+}): JSX.Element {
+ const theme = useHostTheme();
+ const viewportWidth = useViewportWidth();
+ const isCompact = viewportWidth < COMPACT_BREAKPOINT_PX;
+ const nodeWidth = isCompact ? 168 : 200;
+ const nodeGap = isCompact ? 24 : 40;
+ const rankGap = isCompact ? 60 : 72;
+ const layoutPadding = isCompact ? 12 : 24;
+ const titleLimit = Math.max(12, Math.floor((nodeWidth - 44) / 7));
+ const layout = computeDAGLayout({
+ nodes: state.tasks.map((t) => ({ id: t.id })),
+ edges: state.tasks.flatMap((t) =>
+ t.depends_on.map((d) => ({ from: d, to: t.id })),
+ ),
+ direction: 'vertical',
+ nodeWidth,
+ nodeHeight: NODE_H,
+ rankGap,
+ nodeGap,
+ padding: layoutPadding,
+ });
+
+ const byId = new Map(state.tasks.map((t) => [t.id, t]));
+
+ function nodeFill(status: TaskStatus): string {
+ switch (status) {
+ case 'PENDING':
+ return theme.fill.tertiary;
+ case 'RUNNING':
+ return theme.fill.secondary;
+ case 'FINISHED':
+ return theme.fill.secondary;
+ case 'ERROR':
+ return theme.fill.secondary;
+ case 'AWAITING_APPROVAL':
+ return theme.fill.secondary;
+ case 'BUDGET-EXCEEDED':
+ return theme.fill.secondary;
+ }
+ }
+
+ function nodeStroke(status: TaskStatus): string {
+ switch (status) {
+ case 'PENDING':
+ return theme.stroke.tertiary;
+ case 'RUNNING':
+ return theme.accent.primary;
+ case 'FINISHED':
+ return COMPLETED_DOT_COLOR;
+ case 'ERROR':
+ return theme.stroke.primary;
+ case 'AWAITING_APPROVAL':
+ return AWAITING_DOT_COLOR;
+ case 'BUDGET-EXCEEDED':
+ return BUDGET_DOT_COLOR;
+ }
+ }
+
+ function statusGlyph(status: TaskStatus): string {
+ switch (status) {
+ case 'PENDING':
+ return '○';
+ case 'RUNNING':
+ return '◐';
+ case 'FINISHED':
+ return '●';
+ case 'ERROR':
+ return '×';
+ case 'AWAITING_APPROVAL':
+ return '⏸';
+ case 'BUDGET-EXCEEDED':
+ return '⊘';
+ }
+ }
+
+ function statusGlyphColor(status: TaskStatus): string {
+ switch (status) {
+ case 'PENDING':
+ return theme.text.tertiary;
+ case 'RUNNING':
+ return theme.accent.primary;
+ case 'FINISHED':
+ return COMPLETED_DOT_COLOR;
+ case 'ERROR':
+ return theme.text.primary;
+ case 'AWAITING_APPROVAL':
+ return AWAITING_DOT_COLOR;
+ case 'BUDGET-EXCEEDED':
+ return BUDGET_DOT_COLOR;
+ }
+ }
+
+ return (
+