diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 383ac35b4c..332a6101fd 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -214,7 +214,7 @@ "@qvac/rag": "^0.6.2", "@qvac/registry-client": "^0.6.0", "@qvac/transcription-parakeet": "^0.7.1", - "@qvac/transcription-whispercpp": "^0.8.0", + "@qvac/transcription-whispercpp": "^0.9.0", "@qvac/translation-nmtcpp": "^5.0.1", "@qvac/tts-ggml": "^0.2.0", "@qvac/vla-ggml": "^0.3.2", diff --git a/packages/sdk/schemas/transcription.ts b/packages/sdk/schemas/transcription.ts index b54480f945..9ace1640dc 100644 --- a/packages/sdk/schemas/transcription.ts +++ b/packages/sdk/schemas/transcription.ts @@ -37,6 +37,14 @@ export const transcribeStatsSchema = z.object({ encoderTime: z.number().optional(), decoderTime: z.number().optional(), melSpecTime: z.number().optional(), + // Backend selection captured once at model load (whisper engine). + // `0` CPU / `1` GPU; `backendId` codes the GPU family (see the addon's + // BackendId enum). gpu memory fields are `-1` when the backend exposes + // no accounting, and absent entirely for the parakeet engine. + backendDevice: z.number().optional(), + backendId: z.number().optional(), + gpuMemTotalMb: z.number().optional(), + gpuMemFreeMb: z.number().optional(), }); export const transcribeSegmentSchema = z.object({ diff --git a/packages/sdk/server/bare/ops/transcribe.ts b/packages/sdk/server/bare/ops/transcribe.ts index 2f7efa68cc..070ee620dc 100644 --- a/packages/sdk/server/bare/ops/transcribe.ts +++ b/packages/sdk/server/bare/ops/transcribe.ts @@ -242,6 +242,10 @@ export async function* transcribe( ...(response.stats?.encoderMs !== undefined && { encoderTime: response.stats.encoderMs }), ...(response.stats?.decoderMs !== undefined && { decoderTime: response.stats.decoderMs }), ...(response.stats?.melSpecMs !== undefined && { melSpecTime: response.stats.melSpecMs }), + ...(response.stats?.backendDevice !== undefined && { backendDevice: response.stats.backendDevice }), + ...(response.stats?.backendId !== undefined && { backendId: response.stats.backendId }), + ...(response.stats?.gpuMemTotalMb !== undefined && { gpuMemTotalMb: response.stats.gpuMemTotalMb }), + ...(response.stats?.gpuMemFreeMb !== undefined && { gpuMemFreeMb: response.stats.gpuMemFreeMb }), }; return buildStreamResult(modelExecutionMs, stats); diff --git a/packages/sdk/server/bare/types/addon-responses.ts b/packages/sdk/server/bare/types/addon-responses.ts index 3f850fa48e..789775c232 100644 --- a/packages/sdk/server/bare/types/addon-responses.ts +++ b/packages/sdk/server/bare/types/addon-responses.ts @@ -60,6 +60,10 @@ export interface TranscribeStats { encoderMs?: number; decoderMs?: number; melSpecMs?: number; + backendDevice?: number; + backendId?: number; + gpuMemTotalMb?: number; + gpuMemFreeMb?: number; } export interface TranscribeAddonSegment { diff --git a/packages/sdk/test/unit/transcription-stats-schema.test.ts b/packages/sdk/test/unit/transcription-stats-schema.test.ts new file mode 100644 index 0000000000..bb40841914 --- /dev/null +++ b/packages/sdk/test/unit/transcription-stats-schema.test.ts @@ -0,0 +1,42 @@ +import test from "brittle"; +import { transcribeStatsSchema } from "@/schemas/transcription"; + +test("transcribeStatsSchema: round-trips whisper backend/GPU stats", (t) => { + const result = transcribeStatsSchema.parse({ + realTimeFactor: 0.25, + tokensPerSecond: 120, + backendDevice: 1, + backendId: 3, + gpuMemTotalMb: 8192, + gpuMemFreeMb: 4096, + }); + t.is(result.backendDevice, 1); + t.is(result.backendId, 3); + t.is(result.gpuMemTotalMb, 8192); + t.is(result.gpuMemFreeMb, 4096); +}); + +test("transcribeStatsSchema: backend/GPU fields are optional (CPU / parakeet path)", (t) => { + const result = transcribeStatsSchema.parse({ realTimeFactor: 1.5 }); + t.absent(result.backendDevice); + t.absent(result.backendId); + t.absent(result.gpuMemTotalMb); + t.absent(result.gpuMemFreeMb); +}); + +test("transcribeStatsSchema: accepts the -1 no-accounting sentinel for gpu memory", (t) => { + const result = transcribeStatsSchema.parse({ + backendDevice: 1, + backendId: 2, + gpuMemTotalMb: -1, + gpuMemFreeMb: -1, + }); + t.is(result.gpuMemTotalMb, -1); + t.is(result.gpuMemFreeMb, -1); +}); + +test("transcribeStatsSchema: rejects non-numeric backend fields", (t) => { + t.exception(() => + transcribeStatsSchema.parse({ backendId: "vulkan" }), + ); +});