streaming works for ollama :)

This commit is contained in:
perf3ct 2025-04-11 20:26:47 +00:00
parent 451e5ea31f
commit 681e8bb1ce
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
2 changed files with 26 additions and 33 deletions

View File

@ -344,7 +344,8 @@ export class ChatPipeline {
// If streaming was enabled, send an update to the user
if (isStreaming && streamCallback) {
streamingPaused = true;
await streamCallback('', true); // Signal pause in streaming
// IMPORTANT: Don't send done:true here, as it causes the client to stop processing messages
// Instead, send a marker message that indicates tools will be executed
await streamCallback('\n\n[Executing tools...]\n\n', false);
}
@ -566,8 +567,15 @@ export class ChatPipeline {
// If streaming was paused for tool execution, resume it now with the final response
if (isStreaming && streamCallback && streamingPaused) {
// First log for debugging
log.info(`Resuming streaming with final response: ${currentResponse.text.length} chars`);
// Resume streaming with the final response text
// This is where we send the definitive done:true signal with the complete content
await streamCallback(currentResponse.text, true);
// Log confirmation
log.info(`Sent final response with done=true signal`);
}
} else if (toolsEnabled) {
log.info(`========== NO TOOL CALLS DETECTED ==========`);

View File

@ -476,29 +476,14 @@ export class OllamaService extends BaseAIService {
// Call the callback with the current chunk content
if (opts.streamCallback) {
try {
// For the final chunk, make sure to send the complete text with done=true
if (chunk.done) {
log.info(`Sending final callback with done=true and complete content (${completeText.length} chars)`);
// Don't send done:true when tool calls are present to avoid premature completion
const shouldMarkAsDone = !!chunk.done && !responseToolCalls.length;
await opts.streamCallback(
completeText, // Send the full accumulated content for the final chunk
true,
{ ...chunk, message: { ...chunk.message, content: completeText } }
);
} else if (chunk.message?.content) {
// For content chunks, send them as they come
await opts.streamCallback(
chunk.message.content,
!!chunk.done,
chunk.message?.content || '',
shouldMarkAsDone,
chunk
);
} else if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) {
// For tool call chunks, send an empty content string but include the tool calls
await opts.streamCallback(
'',
!!chunk.done,
chunk
);
}
if (chunkCount === 1) {
log.info(`Successfully called streamCallback with first chunk`);