streaming works for ollama :)

2025-09-18 09:26:28 +08:00 · 2025-04-11 20:26:47 +00:00 · 2025-04-11 20:26:47 +00:00 · 681e8bb1ce
commit 681e8bb1ce
parent 451e5ea31f
2 changed files with 26 additions and 33 deletions
--- a/src/services/llm/pipeline/chat_pipeline.ts
+++ b/src/services/llm/pipeline/chat_pipeline.ts
@ -344,7 +344,8 @@ export class ChatPipeline {
                // If streaming was enabled, send an update to the user
                if (isStreaming && streamCallback) {
                    streamingPaused = true;
-                    await streamCallback('', true); // Signal pause in streaming
+                    // IMPORTANT: Don't send done:true here, as it causes the client to stop processing messages
                    // Instead, send a marker message that indicates tools will be executed
                    await streamCallback('\n\n[Executing tools...]\n\n', false);
                }
@ -566,8 +567,15 @@ export class ChatPipeline {
                // If streaming was paused for tool execution, resume it now with the final response
                if (isStreaming && streamCallback && streamingPaused) {
                    // First log for debugging
                    log.info(`Resuming streaming with final response: ${currentResponse.text.length} chars`);
                    // Resume streaming with the final response text
                    // This is where we send the definitive done:true signal with the complete content
                    await streamCallback(currentResponse.text, true);
                    // Log confirmation
                    log.info(`Sent final response with done=true signal`);
                }
            } else if (toolsEnabled) {
                log.info(`========== NO TOOL CALLS DETECTED ==========`);
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@ -476,29 +476,14 @@ export class OllamaService extends BaseAIService {
                    // Call the callback with the current chunk content
                    if (opts.streamCallback) {
                        try {
-                            // For the final chunk, make sure to send the complete text with done=true
+                            // Don't send done:true when tool calls are present to avoid premature completion
-                            if (chunk.done) {
+                            const shouldMarkAsDone = !!chunk.done && !responseToolCalls.length;
-                                log.info(`Sending final callback with done=true and complete content (${completeText.length} chars)`);
+
                            await opts.streamCallback(
-                                    completeText, // Send the full accumulated content for the final chunk
+                                chunk.message?.content || '',
-                                    true,
+                                shouldMarkAsDone,
                                    { ...chunk, message: { ...chunk.message, content: completeText } }
                                );
                            } else if (chunk.message?.content) {
                                // For content chunks, send them as they come
                                await opts.streamCallback(
                                    chunk.message.content,
                                    !!chunk.done,
                                chunk
                            );
                            } else if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) {
                                // For tool call chunks, send an empty content string but include the tool calls
                                await opts.streamCallback(
                                    '',
                                    !!chunk.done,
                                    chunk
                                );
                            }
                            if (chunkCount === 1) {
                                log.info(`Successfully called streamCallback with first chunk`);