fmaclen · 1nsaint · Feb 27, 2026
diff --git a/src/lib/chat/index.ts b/src/lib/chat/index.ts
@@ -23,7 +23,7 @@ export interface ChatStrategy {
 	chat(
 		payload: ChatRequest,
 		abortSignal: AbortSignal,
-		onChunk: (content: string) => void
+		onChunk: (part: { content?: string; thinking?: string }) => void
 	): Promise<void>;
 
 	getModels(): Promise<Model[]>;

diff --git a/src/lib/chat/ollama.ts b/src/lib/chat/ollama.ts
@@ -13,6 +13,12 @@ import type { Model } from '$lib/settings';
 
 import type { ChatStrategy } from './index';
 
+// Ollama streaming chat can return message.thinking and message.content (thinking-capable models)
+interface StreamMessage {
+	content?: string;
+	thinking?: string;
+}
+
 export interface OllamaOptions {
 	numa: boolean;
 	num_ctx: number;
@@ -55,12 +61,14 @@ export class OllamaStrategy implements ChatStrategy {
 	async chat(
 		payload: ChatRequest,
 		abortSignal: AbortSignal,
-		onChunk: (content: string) => void
+		onChunk: (part: { content?: string; thinking?: string }) => void
 	): Promise<void> {
+		// Enable thinking stream for reasoning models (qwen3, deepseek-r1, etc.)
+		const body = { ...payload, think: true };
 		const response = await fetch(`${this.server.baseUrl}/api/chat`, {
 			method: 'POST',
 			headers: { 'Content-Type': 'text/event-stream' },
-			body: JSON.stringify(payload),
+			body: JSON.stringify(body),
 			signal: abortSignal
 		});
 
@@ -83,8 +91,16 @@ export class OllamaStrategy implements ChatStrategy {
 			const chatResponses = value.split('\n').filter((line) => line);
 
 			for (const chatResponse of chatResponses) {
-				const { message } = JSON.parse(chatResponse) as ChatResponse;
-				onChunk(message.content);
+				const parsed = JSON.parse(chatResponse) as ChatResponse & { message?: StreamMessage };
+				const message = parsed.message;
+				if (!message) continue;
+				// Stream thinking and content separately; API sends them in separate chunks
+				if (message.thinking != null) {
+					onChunk({ thinking: message.thinking });
+				}
+				if (message.content != null) {
+					onChunk({ content: message.content });
+				}
 			}
 		}
 	}

diff --git a/src/lib/chat/openai.ts b/src/lib/chat/openai.ts
@@ -23,7 +23,7 @@ export class OpenAIStrategy implements ChatStrategy {
 	async chat(
 		payload: ChatRequest,
 		abortSignal: AbortSignal,
-		onChunk: (content: string) => void
+		onChunk: (part: { content?: string; thinking?: string }) => void
 	): Promise<void> {
 		const formattedMessages = payload.messages.map(
 			(message: Message): ChatCompletionMessageParam => {
@@ -68,7 +68,8 @@ export class OpenAIStrategy implements ChatStrategy {
 
 		for await (const chunk of response) {
 			if (abortSignal.aborted) break;
-			onChunk(chunk.choices[0].delta.content || '');
+			const text = chunk.choices[0].delta.content || '';
+			if (text) onChunk({ content: text });
 		}
 	}
 

diff --git a/src/routes/sessions/[id]/+page.svelte b/src/routes/sessions/[id]/+page.svelte
@@ -209,10 +209,15 @@
 				}
 			);
 
-			await strategy.chat(chatRequest, editor.abortController.signal, async (chunk) => {
-				// Process the chunk using the FSM-based processor
-				reasoningProcessor.processChunk(chunk);
-				await scrollToBottom();
+			await strategy.chat(chatRequest, editor.abortController.signal, async (part) => {
+				if (part.thinking != null) {
+					editor.reasoning += part.thinking;
+					await scrollToBottom();
+				}
+				if (part.content != null) {
+					reasoningProcessor.processChunk(part.content);
+					await scrollToBottom();
+				}
 			});
 
 			// Finalize processing of any remaining content