diff --git a/src/lib/chat/index.ts b/src/lib/chat/index.ts index e6d9af2e..b616bd92 100644 --- a/src/lib/chat/index.ts +++ b/src/lib/chat/index.ts @@ -23,7 +23,7 @@ export interface ChatStrategy { chat( payload: ChatRequest, abortSignal: AbortSignal, - onChunk: (content: string) => void + onChunk: (part: { content?: string; thinking?: string }) => void ): Promise; getModels(): Promise; diff --git a/src/lib/chat/ollama.ts b/src/lib/chat/ollama.ts index 40153895..27455b86 100644 --- a/src/lib/chat/ollama.ts +++ b/src/lib/chat/ollama.ts @@ -13,6 +13,12 @@ import type { Model } from '$lib/settings'; import type { ChatStrategy } from './index'; +// Ollama streaming chat can return message.thinking and message.content (thinking-capable models) +interface StreamMessage { + content?: string; + thinking?: string; +} + export interface OllamaOptions { numa: boolean; num_ctx: number; @@ -55,12 +61,14 @@ export class OllamaStrategy implements ChatStrategy { async chat( payload: ChatRequest, abortSignal: AbortSignal, - onChunk: (content: string) => void + onChunk: (part: { content?: string; thinking?: string }) => void ): Promise { + // Enable thinking stream for reasoning models (qwen3, deepseek-r1, etc.) + const body = { ...payload, think: true }; const response = await fetch(`${this.server.baseUrl}/api/chat`, { method: 'POST', headers: { 'Content-Type': 'text/event-stream' }, - body: JSON.stringify(payload), + body: JSON.stringify(body), signal: abortSignal }); @@ -83,8 +91,16 @@ export class OllamaStrategy implements ChatStrategy { const chatResponses = value.split('\n').filter((line) => line); for (const chatResponse of chatResponses) { - const { message } = JSON.parse(chatResponse) as ChatResponse; - onChunk(message.content); + const parsed = JSON.parse(chatResponse) as ChatResponse & { message?: StreamMessage }; + const message = parsed.message; + if (!message) continue; + // Stream thinking and content separately; API sends them in separate chunks + if (message.thinking != null) { + onChunk({ thinking: message.thinking }); + } + if (message.content != null) { + onChunk({ content: message.content }); + } } } } diff --git a/src/lib/chat/openai.ts b/src/lib/chat/openai.ts index f4d7cda4..ff617c7c 100644 --- a/src/lib/chat/openai.ts +++ b/src/lib/chat/openai.ts @@ -23,7 +23,7 @@ export class OpenAIStrategy implements ChatStrategy { async chat( payload: ChatRequest, abortSignal: AbortSignal, - onChunk: (content: string) => void + onChunk: (part: { content?: string; thinking?: string }) => void ): Promise { const formattedMessages = payload.messages.map( (message: Message): ChatCompletionMessageParam => { @@ -68,7 +68,8 @@ export class OpenAIStrategy implements ChatStrategy { for await (const chunk of response) { if (abortSignal.aborted) break; - onChunk(chunk.choices[0].delta.content || ''); + const text = chunk.choices[0].delta.content || ''; + if (text) onChunk({ content: text }); } } diff --git a/src/routes/sessions/[id]/+page.svelte b/src/routes/sessions/[id]/+page.svelte index edeaca84..f09ca940 100644 --- a/src/routes/sessions/[id]/+page.svelte +++ b/src/routes/sessions/[id]/+page.svelte @@ -209,10 +209,15 @@ } ); - await strategy.chat(chatRequest, editor.abortController.signal, async (chunk) => { - // Process the chunk using the FSM-based processor - reasoningProcessor.processChunk(chunk); - await scrollToBottom(); + await strategy.chat(chatRequest, editor.abortController.signal, async (part) => { + if (part.thinking != null) { + editor.reasoning += part.thinking; + await scrollToBottom(); + } + if (part.content != null) { + reasoningProcessor.processChunk(part.content); + await scrollToBottom(); + } }); // Finalize processing of any remaining content