mirror of
				https://github.com/TriliumNext/Notes.git
				synced 2025-10-31 21:11:30 +08:00 
			
		
		
		
	getting closer to streaming?
even closer? closer streaming... this is darn close
This commit is contained in:
		
							parent
							
								
									b68ff88840
								
							
						
					
					
						commit
						451e5ea31f
					
				| @ -127,6 +127,49 @@ async function handleMessage(event: MessageEvent<any>) { | |||||||
|         appContext.triggerEvent("apiLogMessages", { noteId: message.noteId, messages: message.messages }); |         appContext.triggerEvent("apiLogMessages", { noteId: message.noteId, messages: message.messages }); | ||||||
|     } else if (message.type === "toast") { |     } else if (message.type === "toast") { | ||||||
|         toastService.showMessage(message.message); |         toastService.showMessage(message.message); | ||||||
|  |     } else if (message.type === "llm-stream") { | ||||||
|  |         // ENHANCED LOGGING FOR DEBUGGING
 | ||||||
|  |         console.log(`[WS-CLIENT] >>> RECEIVED LLM STREAM MESSAGE <<<`); | ||||||
|  |         console.log(`[WS-CLIENT] Message details: sessionId=${message.sessionId}, hasContent=${!!message.content}, contentLength=${message.content ? message.content.length : 0}, hasThinking=${!!message.thinking}, hasToolExecution=${!!message.toolExecution}, isDone=${!!message.done}`); | ||||||
|  |          | ||||||
|  |         if (message.content) { | ||||||
|  |             console.log(`[WS-CLIENT] CONTENT PREVIEW: "${message.content.substring(0, 50)}..."`); | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Create the event with detailed logging
 | ||||||
|  |         console.log(`[WS-CLIENT] Creating CustomEvent 'llm-stream-message'`); | ||||||
|  |         const llmStreamEvent = new CustomEvent('llm-stream-message', { detail: message }); | ||||||
|  |          | ||||||
|  |         // Dispatch to multiple targets to ensure delivery
 | ||||||
|  |         try { | ||||||
|  |             console.log(`[WS-CLIENT] Dispatching event to window`); | ||||||
|  |             window.dispatchEvent(llmStreamEvent); | ||||||
|  |             console.log(`[WS-CLIENT] Event dispatched to window`); | ||||||
|  |              | ||||||
|  |             // Also try document for completeness
 | ||||||
|  |             console.log(`[WS-CLIENT] Dispatching event to document`); | ||||||
|  |             document.dispatchEvent(new CustomEvent('llm-stream-message', { detail: message })); | ||||||
|  |             console.log(`[WS-CLIENT] Event dispatched to document`); | ||||||
|  |         } catch (err) { | ||||||
|  |             console.error(`[WS-CLIENT] Error dispatching event:`, err); | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Debug current listeners (though we can't directly check for specific event listeners)
 | ||||||
|  |         console.log(`[WS-CLIENT] Active event listeners should receive this message now`); | ||||||
|  |          | ||||||
|  |         // Detailed logging based on message type
 | ||||||
|  |         if (message.content) { | ||||||
|  |             console.log(`[WS-CLIENT] Content message: ${message.content.length} chars`); | ||||||
|  |         } else if (message.thinking) { | ||||||
|  |             console.log(`[WS-CLIENT] Thinking update: "${message.thinking}"`); | ||||||
|  |         } else if (message.toolExecution) { | ||||||
|  |             console.log(`[WS-CLIENT] Tool execution: action=${message.toolExecution.action}, tool=${message.toolExecution.tool || 'unknown'}`); | ||||||
|  |             if (message.toolExecution.result) { | ||||||
|  |                 console.log(`[WS-CLIENT] Tool result preview: "${String(message.toolExecution.result).substring(0, 50)}..."`); | ||||||
|  |             } | ||||||
|  |         } else if (message.done) { | ||||||
|  |             console.log(`[WS-CLIENT] Completion signal received`); | ||||||
|  |         } | ||||||
|     } else if (message.type === "execute-script") { |     } else if (message.type === "execute-script") { | ||||||
|         // TODO: Remove after porting the file
 |         // TODO: Remove after porting the file
 | ||||||
|         // @ts-ignore
 |         // @ts-ignore
 | ||||||
|  | |||||||
| @ -7,6 +7,7 @@ import { t } from "../services/i18n.js"; | |||||||
| import libraryLoader from "../services/library_loader.js"; | import libraryLoader from "../services/library_loader.js"; | ||||||
| import { applySyntaxHighlight } from "../services/syntax_highlight.js"; | import { applySyntaxHighlight } from "../services/syntax_highlight.js"; | ||||||
| import options from "../services/options.js"; | import options from "../services/options.js"; | ||||||
|  | import ws from "../services/ws.js"; | ||||||
| import { marked } from "marked"; | import { marked } from "marked"; | ||||||
| 
 | 
 | ||||||
| // Import the LLM Chat CSS
 | // Import the LLM Chat CSS
 | ||||||
| @ -105,6 +106,8 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|     private validationWarning!: HTMLElement; |     private validationWarning!: HTMLElement; | ||||||
|     private sessionId: string | null = null; |     private sessionId: string | null = null; | ||||||
|     private currentNoteId: string | null = null; |     private currentNoteId: string | null = null; | ||||||
|  |     private _messageHandlerId: number | null = null; | ||||||
|  |     private _messageHandler: any = null; | ||||||
| 
 | 
 | ||||||
|     // Callbacks for data persistence
 |     // Callbacks for data persistence
 | ||||||
|     private onSaveData: ((data: any) => Promise<void>) | null = null; |     private onSaveData: ((data: any) => Promise<void>) | null = null; | ||||||
| @ -178,6 +181,15 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|         return this.$widget; |         return this.$widget; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     cleanup() { | ||||||
|  |         console.log(`LlmChatPanel cleanup called, removing any active WebSocket subscriptions`); | ||||||
|  | 
 | ||||||
|  |         // No need to manually clean up the event listeners, as they will be garbage collected
 | ||||||
|  |         // when the component is destroyed. We only need to clean up references.
 | ||||||
|  |         this._messageHandler = null; | ||||||
|  |         this._messageHandlerId = null; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /** |     /** | ||||||
|      * Set the callbacks for data persistence |      * Set the callbacks for data persistence | ||||||
|      */ |      */ | ||||||
| @ -375,16 +387,15 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|             // Create the message parameters
 |             // Create the message parameters
 | ||||||
|             const messageParams = { |             const messageParams = { | ||||||
|                 content, |                 content, | ||||||
|                 contextNoteId: this.currentNoteId, |  | ||||||
|                 useAdvancedContext, |                 useAdvancedContext, | ||||||
|                 showThinking |                 showThinking | ||||||
|             }; |             }; | ||||||
| 
 | 
 | ||||||
|             // First try to use streaming (preferred method)
 |             // Try websocket streaming (preferred method)
 | ||||||
|             try { |             try { | ||||||
|                 await this.setupStreamingResponse(messageParams); |                 await this.setupStreamingResponse(messageParams); | ||||||
|             } catch (streamingError) { |             } catch (streamingError) { | ||||||
|                 console.warn("Streaming request failed, falling back to direct response:", streamingError); |                 console.warn("WebSocket streaming failed, falling back to direct response:", streamingError); | ||||||
| 
 | 
 | ||||||
|                 // If streaming fails, fall back to direct response
 |                 // If streaming fails, fall back to direct response
 | ||||||
|                 const handled = await this.handleDirectResponse(messageParams); |                 const handled = await this.handleDirectResponse(messageParams); | ||||||
| @ -424,12 +435,14 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|      */ |      */ | ||||||
|     private async handleDirectResponse(messageParams: any): Promise<boolean> { |     private async handleDirectResponse(messageParams: any): Promise<boolean> { | ||||||
|         try { |         try { | ||||||
|             // Add format parameter to maintain consistency with the streaming GET request
 |             // Create a copy of the params without any streaming flags
 | ||||||
|             const postParams = { |             const postParams = { | ||||||
|                 ...messageParams, |                 ...messageParams, | ||||||
|                 format: 'stream'  // Match the format parameter used in the GET streaming request
 |                 stream: false  // Explicitly set to false to ensure we get a direct response
 | ||||||
|             }; |             }; | ||||||
| 
 | 
 | ||||||
|  |             console.log(`Sending direct POST request for session ${this.sessionId}`); | ||||||
|  | 
 | ||||||
|             // Send the message via POST request with the updated params
 |             // Send the message via POST request with the updated params
 | ||||||
|             const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, postParams); |             const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, postParams); | ||||||
| 
 | 
 | ||||||
| @ -474,184 +487,318 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Set up streaming response from the server |      * Set up streaming response via WebSocket | ||||||
|      */ |      */ | ||||||
|     private async setupStreamingResponse(messageParams: any): Promise<void> { |     private async setupStreamingResponse(messageParams: any): Promise<void> { | ||||||
|  |         const content = messageParams.content || ''; | ||||||
|         const useAdvancedContext = messageParams.useAdvancedContext; |         const useAdvancedContext = messageParams.useAdvancedContext; | ||||||
|         const showThinking = messageParams.showThinking; |         const showThinking = messageParams.showThinking; | ||||||
| 
 | 
 | ||||||
|         // Set up streaming via EventSource - explicitly add stream=true parameter to ensure consistency
 |  | ||||||
|         const streamUrl = `./api/llm/sessions/${this.sessionId}/messages?format=stream&stream=true&useAdvancedContext=${useAdvancedContext}&showThinking=${showThinking}`; |  | ||||||
| 
 |  | ||||||
|         return new Promise((resolve, reject) => { |         return new Promise((resolve, reject) => { | ||||||
|             const source = new EventSource(streamUrl); |  | ||||||
|             let assistantResponse = ''; |             let assistantResponse = ''; | ||||||
|             let receivedAnyContent = false; |             let receivedAnyContent = false; | ||||||
|             let timeoutId: number | null = null; |             let timeoutId: number | null = null; | ||||||
|  |             let initialTimeoutId: number | null = null; | ||||||
|  |             let receivedAnyMessage = false; | ||||||
|  |             let eventListener: ((event: Event) => void) | null = null; | ||||||
| 
 | 
 | ||||||
|             // Set up timeout for streaming response
 |             // Create a unique identifier for this response process
 | ||||||
|             timeoutId = this.setupStreamingTimeout(source); |             const responseId = `llm-stream-${Date.now()}-${Math.floor(Math.random() * 1000)}`; | ||||||
|  |             console.log(`[${responseId}] Setting up WebSocket streaming for session ${this.sessionId}`); | ||||||
| 
 | 
 | ||||||
|             // Handle streaming response
 |             // Create a message handler for CustomEvents
 | ||||||
|             source.onmessage = (event) => { |             eventListener = (event: Event) => { | ||||||
|                 try { |                 const customEvent = event as CustomEvent; | ||||||
|                     if (event.data === '[DONE]') { |                 const message = customEvent.detail; | ||||||
|                         // Stream completed successfully
 | 
 | ||||||
|                         this.handleStreamingComplete(source, timeoutId, receivedAnyContent, assistantResponse); |                 // Only process messages for our session
 | ||||||
|                         resolve(); |                 if (!message || message.sessionId !== this.sessionId) { | ||||||
|                         return; |                     return; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 console.log(`[${responseId}] LLM Stream message received via CustomEvent: session=${this.sessionId}, content=${!!message.content}, contentLength=${message.content?.length || 0}, thinking=${!!message.thinking}, toolExecution=${!!message.toolExecution}, done=${!!message.done}`); | ||||||
|  | 
 | ||||||
|  |                 // Mark first message received
 | ||||||
|  |                 if (!receivedAnyMessage) { | ||||||
|  |                     receivedAnyMessage = true; | ||||||
|  |                     console.log(`[${responseId}] First message received for session ${this.sessionId}`); | ||||||
|  | 
 | ||||||
|  |                     // Clear the initial timeout since we've received a message
 | ||||||
|  |                     if (initialTimeoutId !== null) { | ||||||
|  |                         window.clearTimeout(initialTimeoutId); | ||||||
|  |                         initialTimeoutId = null; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Handle content updates
 | ||||||
|  |                 if (message.content) { | ||||||
|  |                     receivedAnyContent = true; | ||||||
|  |                     assistantResponse += message.content; | ||||||
|  | 
 | ||||||
|  |                     // Update the UI immediately
 | ||||||
|  |                     this.updateStreamingUI(assistantResponse); | ||||||
|  | 
 | ||||||
|  |                     // Reset timeout since we got content
 | ||||||
|  |                     if (timeoutId !== null) { | ||||||
|  |                         window.clearTimeout(timeoutId); | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|                     const data = JSON.parse(event.data); |                     // Set new timeout
 | ||||||
|                     console.log("Received streaming data:", data); |                     timeoutId = window.setTimeout(() => { | ||||||
|  |                         console.warn(`[${responseId}] Stream timeout for session ${this.sessionId}`); | ||||||
| 
 | 
 | ||||||
|                     // Handle both content and error cases
 |                         // Save what we have
 | ||||||
|                     if (data.content) { |                         if (assistantResponse) { | ||||||
|                         receivedAnyContent = true; |                             console.log(`[${responseId}] Saving partial response due to timeout (${assistantResponse.length} chars)`); | ||||||
|                         assistantResponse += data.content; |                             this.messages.push({ | ||||||
| 
 |                                 role: 'assistant', | ||||||
|                         // Update the UI with the accumulated response
 |                                 content: assistantResponse, | ||||||
|                         this.updateStreamingUI(assistantResponse); |                                 timestamp: new Date() | ||||||
|                     } else if (data.toolExecution) { |                             }); | ||||||
|                         // Handle tool execution info
 |                             this.saveCurrentData().catch(err => { | ||||||
|                         this.showToolExecutionInfo(data.toolExecution); |                                 console.error(`[${responseId}] Failed to save partial response:`, err); | ||||||
|                         // When tool execution info is received, also show the loading indicator
 |                             }); | ||||||
|                         // in case it's not already visible
 |  | ||||||
|                         this.loadingIndicator.style.display = 'flex'; |  | ||||||
|                     } else if (data.error) { |  | ||||||
|                         // Handle error message
 |  | ||||||
|                         this.hideLoadingIndicator(); |  | ||||||
|                         this.addMessageToChat('assistant', `Error: ${data.error}`); |  | ||||||
| 
 |  | ||||||
|                         if (timeoutId !== null) { |  | ||||||
|                             window.clearTimeout(timeoutId); |  | ||||||
|                         } |                         } | ||||||
| 
 | 
 | ||||||
|                         source.close(); |                         // Clean up
 | ||||||
|                         reject(new Error(data.error)); |                         this.cleanupEventListener(eventListener); | ||||||
|                         return; |                         this.hideLoadingIndicator(); | ||||||
|  |                         reject(new Error('Stream timeout')); | ||||||
|  |                     }, 30000); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Handle tool execution updates
 | ||||||
|  |                 if (message.toolExecution) { | ||||||
|  |                     console.log(`[${responseId}] Received tool execution update: action=${message.toolExecution.action || 'unknown'}`); | ||||||
|  |                     this.showToolExecutionInfo(message.toolExecution); | ||||||
|  |                     this.loadingIndicator.style.display = 'flex'; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Handle thinking state updates
 | ||||||
|  |                 if (message.thinking) { | ||||||
|  |                     console.log(`[${responseId}] Received thinking update: ${message.thinking.substring(0, 50)}...`); | ||||||
|  |                     this.showThinkingState(message.thinking); | ||||||
|  |                     this.loadingIndicator.style.display = 'flex'; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Handle completion
 | ||||||
|  |                 if (message.done) { | ||||||
|  |                     console.log(`[${responseId}] Stream completed for session ${this.sessionId}, has content: ${!!message.content}, content length: ${message.content?.length || 0}, current response: ${assistantResponse.length} chars`); | ||||||
|  | 
 | ||||||
|  |                     // Dump message content to console for debugging
 | ||||||
|  |                     if (message.content) { | ||||||
|  |                         console.log(`[${responseId}] CONTENT IN DONE MESSAGE (first 200 chars): "${message.content.substring(0, 200)}..."`); | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|                     // Scroll to the bottom
 |                     // Clear timeout if set
 | ||||||
|                     this.chatContainer.scrollTop = this.chatContainer.scrollHeight; |                     if (timeoutId !== null) { | ||||||
|                 } catch (e) { |                         window.clearTimeout(timeoutId); | ||||||
|                     console.error('Error parsing SSE message:', e, 'Raw data:', event.data); |                         timeoutId = null; | ||||||
|                     reject(e); |                     } | ||||||
|                 } |  | ||||||
|             }; |  | ||||||
| 
 | 
 | ||||||
|             // Handle streaming errors
 |                     // Check if we have content in the done message
 | ||||||
|             source.onerror = (err) => { |                     // This is particularly important for Ollama which often sends the entire response in one message
 | ||||||
|                 console.error("EventSource error:", err); |                     if (message.content) { | ||||||
|                 source.close(); |                         console.log(`[${responseId}] Processing content in done message: ${message.content.length} chars`); | ||||||
|                 this.hideLoadingIndicator(); |                         receivedAnyContent = true; | ||||||
| 
 | 
 | ||||||
|                 // Clear the timeout if there was an error
 |                         // Replace current response if we didn't have content before or if it's empty
 | ||||||
|                 if (timeoutId !== null) { |                         if (assistantResponse.length === 0) { | ||||||
|                     window.clearTimeout(timeoutId); |                             console.log(`[${responseId}] Using content from done message as full response`); | ||||||
|                 } |                             assistantResponse = message.content; | ||||||
|  |                         } | ||||||
|  |                         // Otherwise append it if it's different
 | ||||||
|  |                         else if (message.content !== assistantResponse) { | ||||||
|  |                             console.log(`[${responseId}] Appending content from done message to existing response`); | ||||||
|  |                             assistantResponse += message.content; | ||||||
|  |                         } | ||||||
|  |                         else { | ||||||
|  |                             console.log(`[${responseId}] Content in done message is identical to existing response, not appending`); | ||||||
|  |                         } | ||||||
| 
 | 
 | ||||||
|                 // Only reject if we haven't received any content yet
 |                         this.updateStreamingUI(assistantResponse); | ||||||
|                 if (!receivedAnyContent) { |                     } | ||||||
|                     reject(new Error('Error connecting to the LLM streaming service')); | 
 | ||||||
|                 } else { |                     // Save the final response
 | ||||||
|                     // If we've already received some content, consider it a successful but incomplete response
 |                     if (assistantResponse) { | ||||||
|                     this.handleStreamingComplete(source, timeoutId, receivedAnyContent, assistantResponse); |                         console.log(`[${responseId}] Saving final response of ${assistantResponse.length} chars`); | ||||||
|  |                         this.messages.push({ | ||||||
|  |                             role: 'assistant', | ||||||
|  |                             content: assistantResponse, | ||||||
|  |                             timestamp: new Date() | ||||||
|  |                         }); | ||||||
|  | 
 | ||||||
|  |                         this.saveCurrentData().catch(err => { | ||||||
|  |                             console.error(`[${responseId}] Failed to save final response:`, err); | ||||||
|  |                         }); | ||||||
|  |                     } else { | ||||||
|  |                         // If we didn't receive any content at all, show a generic message
 | ||||||
|  |                         console.log(`[${responseId}] No content received for session ${this.sessionId}`); | ||||||
|  |                         const defaultMessage = 'I processed your request, but I don\'t have any specific information to share at the moment.'; | ||||||
|  |                         this.processAssistantResponse(defaultMessage); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Clean up and resolve
 | ||||||
|  |                     this.cleanupEventListener(eventListener); | ||||||
|  |                     this.hideLoadingIndicator(); | ||||||
|                     resolve(); |                     resolve(); | ||||||
|                 } |                 } | ||||||
|             }; |             }; | ||||||
|  | 
 | ||||||
|  |             // Register event listener for the custom event
 | ||||||
|  |             try { | ||||||
|  |                 window.addEventListener('llm-stream-message', eventListener); | ||||||
|  |                 console.log(`[${responseId}] Event listener added for llm-stream-message events`); | ||||||
|  |             } catch (err) { | ||||||
|  |                 console.error(`[${responseId}] Error setting up event listener:`, err); | ||||||
|  |                 reject(err); | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             // Set initial timeout for receiving any message
 | ||||||
|  |             initialTimeoutId = window.setTimeout(() => { | ||||||
|  |                 console.warn(`[${responseId}] No messages received for initial period in session ${this.sessionId}`); | ||||||
|  |                 if (!receivedAnyMessage) { | ||||||
|  |                     console.error(`[${responseId}] WebSocket connection not established for session ${this.sessionId}`); | ||||||
|  | 
 | ||||||
|  |                     if (timeoutId !== null) { | ||||||
|  |                         window.clearTimeout(timeoutId); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Clean up
 | ||||||
|  |                     this.cleanupEventListener(eventListener); | ||||||
|  |                     this.hideLoadingIndicator(); | ||||||
|  | 
 | ||||||
|  |                     // Show error message to user
 | ||||||
|  |                     const errorMessage = 'Connection error: Unable to establish WebSocket streaming.'; | ||||||
|  |                     this.processAssistantResponse(errorMessage); | ||||||
|  |                     reject(new Error('WebSocket connection not established')); | ||||||
|  |                 } | ||||||
|  |             }, 10000); | ||||||
|  | 
 | ||||||
|  |             // Send the streaming request to start the process
 | ||||||
|  |             console.log(`[${responseId}] Sending HTTP POST request to initiate streaming: /llm/sessions/${this.sessionId}/messages/stream`); | ||||||
|  |             server.post(`llm/sessions/${this.sessionId}/messages/stream`, { | ||||||
|  |                 content, | ||||||
|  |                 useAdvancedContext, | ||||||
|  |                 showThinking, | ||||||
|  |                 stream: true // Explicitly indicate this is a streaming request
 | ||||||
|  |             }).catch(err => { | ||||||
|  |                 console.error(`[${responseId}] HTTP error sending streaming request for session ${this.sessionId}:`, err); | ||||||
|  | 
 | ||||||
|  |                 // Clean up timeouts
 | ||||||
|  |                 if (initialTimeoutId !== null) { | ||||||
|  |                     window.clearTimeout(initialTimeoutId); | ||||||
|  |                     initialTimeoutId = null; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 if (timeoutId !== null) { | ||||||
|  |                     window.clearTimeout(timeoutId); | ||||||
|  |                     timeoutId = null; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Clean up event listener
 | ||||||
|  |                 this.cleanupEventListener(eventListener); | ||||||
|  | 
 | ||||||
|  |                 reject(err); | ||||||
|  |             }); | ||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Set up timeout for streaming response |      * Clean up an event listener | ||||||
|      * @returns Timeout ID for the created timeout |  | ||||||
|      */ |      */ | ||||||
|     private setupStreamingTimeout(source: EventSource): number { |     private cleanupEventListener(listener: ((event: Event) => void) | null): void { | ||||||
|         // Set a timeout to handle case where streaming doesn't work properly
 |         if (listener) { | ||||||
|         return window.setTimeout(() => { |             try { | ||||||
|             // If we haven't received any content after a reasonable timeout (10 seconds),
 |                 window.removeEventListener('llm-stream-message', listener); | ||||||
|             // add a fallback message and close the stream
 |                 console.log(`Successfully removed event listener`); | ||||||
|             this.hideLoadingIndicator(); |             } catch (err) { | ||||||
|             const errorMessage = 'I\'m having trouble generating a response right now. Please try again later.'; |                 console.error(`Error removing event listener:`, err); | ||||||
|             this.processAssistantResponse(errorMessage); |             } | ||||||
|             source.close(); |         } | ||||||
|         }, 10000); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Update the UI with streaming content as it arrives |      * Update the UI with streaming content as it arrives | ||||||
|      */ |      */ | ||||||
|     private updateStreamingUI(assistantResponse: string) { |     private updateStreamingUI(assistantResponse: string) { | ||||||
|  |         const logId = `ui-update-${Date.now()}`; | ||||||
|  |         console.log(`[${logId}] Updating UI with response text: ${assistantResponse.length} chars`); | ||||||
|  | 
 | ||||||
|  |         if (!this.noteContextChatMessages) { | ||||||
|  |             console.error(`[${logId}] noteContextChatMessages element not available`); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Check if we already have an assistant message element to update
 | ||||||
|         const assistantElement = this.noteContextChatMessages.querySelector('.assistant-message:last-child .message-content'); |         const assistantElement = this.noteContextChatMessages.querySelector('.assistant-message:last-child .message-content'); | ||||||
|  | 
 | ||||||
|         if (assistantElement) { |         if (assistantElement) { | ||||||
|             assistantElement.innerHTML = this.formatMarkdown(assistantResponse); |             console.log(`[${logId}] Found existing assistant message element, updating content`); | ||||||
|             // Apply syntax highlighting to any code blocks in the updated content
 |             try { | ||||||
|             applySyntaxHighlight($(assistantElement as HTMLElement)); |                 // Format markdown and update the element
 | ||||||
|  |                 const formattedContent = this.formatMarkdown(assistantResponse); | ||||||
|  | 
 | ||||||
|  |                 // Ensure content is properly formatted
 | ||||||
|  |                 if (!formattedContent || formattedContent.trim() === '') { | ||||||
|  |                     console.warn(`[${logId}] Formatted content is empty, using original content`); | ||||||
|  |                     assistantElement.textContent = assistantResponse; | ||||||
|  |                 } else { | ||||||
|  |                     assistantElement.innerHTML = formattedContent; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Apply syntax highlighting to any code blocks in the updated content
 | ||||||
|  |                 applySyntaxHighlight($(assistantElement as HTMLElement)); | ||||||
|  | 
 | ||||||
|  |                 console.log(`[${logId}] Successfully updated existing element with ${formattedContent.length} chars of HTML`); | ||||||
|  |             } catch (err) { | ||||||
|  |                 console.error(`[${logId}] Error updating existing element:`, err); | ||||||
|  |                 // Fallback to text content if HTML update fails
 | ||||||
|  |                 try { | ||||||
|  |                     assistantElement.textContent = assistantResponse; | ||||||
|  |                     console.log(`[${logId}] Fallback to text content successful`); | ||||||
|  |                 } catch (fallbackErr) { | ||||||
|  |                     console.error(`[${logId}] Even fallback update failed:`, fallbackErr); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             this.addMessageToChat('assistant', assistantResponse); |             console.log(`[${logId}] No existing assistant message element found, creating new one`); | ||||||
|         } |             try { | ||||||
|     } |                 this.addMessageToChat('assistant', assistantResponse); | ||||||
|  |                 console.log(`[${logId}] Successfully added new assistant message`); | ||||||
|  |             } catch (err) { | ||||||
|  |                 console.error(`[${logId}] Error adding new message:`, err); | ||||||
| 
 | 
 | ||||||
|     /** |                 // Last resort emergency approach - create element directly
 | ||||||
|      * Handle completion of streaming response |                 try { | ||||||
|      */ |                     console.log(`[${logId}] Attempting emergency DOM update`); | ||||||
|     private handleStreamingComplete( |                     const emergencyElement = document.createElement('div'); | ||||||
|         source: EventSource, |                     emergencyElement.className = 'chat-message assistant-message mb-3 d-flex'; | ||||||
|         timeoutId: number | null, |                     emergencyElement.innerHTML = ` | ||||||
|         receivedAnyContent: boolean, |                         <div class="message-avatar d-flex align-items-center justify-content-center me-2 assistant-avatar"> | ||||||
|         assistantResponse: string |                             <i class="bx bx-bot"></i> | ||||||
|     ) { |                         </div> | ||||||
|         // Stream completed
 |                         <div class="message-content p-3 rounded flex-grow-1 assistant-content"> | ||||||
|         source.close(); |                             ${assistantResponse} | ||||||
|         this.hideLoadingIndicator(); |                         </div> | ||||||
| 
 |                     `;
 | ||||||
|         // Clear the timeout since we're done
 |                     this.noteContextChatMessages.appendChild(emergencyElement); | ||||||
|         if (timeoutId !== null) { |                     console.log(`[${logId}] Emergency DOM update successful`); | ||||||
|             window.clearTimeout(timeoutId); |                 } catch (emergencyErr) { | ||||||
|  |                     console.error(`[${logId}] Emergency DOM update failed:`, emergencyErr); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // If we didn't receive any content but the stream completed normally,
 |         // Always try to scroll to the latest content
 | ||||||
|         // display a message to the user
 |         try { | ||||||
|         if (!receivedAnyContent) { |             if (this.chatContainer) { | ||||||
|             const defaultMessage = 'I processed your request, but I don\'t have any specific information to share at the moment.'; |                 this.chatContainer.scrollTop = this.chatContainer.scrollHeight; | ||||||
|             this.processAssistantResponse(defaultMessage); |                 console.log(`[${logId}] Scrolled to latest content`); | ||||||
|         } else if (assistantResponse) { |             } | ||||||
|             // Save the completed streaming response to the message array
 |         } catch (scrollErr) { | ||||||
|             this.messages.push({ |             console.error(`[${logId}] Error scrolling to latest content:`, scrollErr); | ||||||
|                 role: 'assistant', |  | ||||||
|                 content: assistantResponse, |  | ||||||
|                 timestamp: new Date() |  | ||||||
|             }); |  | ||||||
| 
 |  | ||||||
|             // Save to note
 |  | ||||||
|             this.saveCurrentData().catch(err => { |  | ||||||
|                 console.error("Failed to save assistant response to note:", err); |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /** |  | ||||||
|      * Handle errors during streaming response |  | ||||||
|      */ |  | ||||||
|     private handleStreamingError( |  | ||||||
|         source: EventSource, |  | ||||||
|         timeoutId: number | null, |  | ||||||
|         receivedAnyContent: boolean |  | ||||||
|     ) { |  | ||||||
|         source.close(); |  | ||||||
|         this.hideLoadingIndicator(); |  | ||||||
| 
 |  | ||||||
|         // Clear the timeout if there was an error
 |  | ||||||
|         if (timeoutId !== null) { |  | ||||||
|             window.clearTimeout(timeoutId); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Only show error message if we haven't received any content yet
 |  | ||||||
|         if (!receivedAnyContent) { |  | ||||||
|             // Instead of automatically showing the error message in the chat,
 |  | ||||||
|             // throw an error so the parent function can handle the fallback
 |  | ||||||
|             throw new Error('Error connecting to the LLM streaming service'); |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -755,32 +902,111 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     private showLoadingIndicator() { |     private showLoadingIndicator() { | ||||||
|         this.loadingIndicator.style.display = 'flex'; |         const logId = `ui-${Date.now()}`; | ||||||
|         // Reset the tool execution area when starting a new request, but keep it visible
 |         console.log(`[${logId}] Showing loading indicator and preparing tool execution display`); | ||||||
|         // We'll make it visible when we get our first tool execution event
 | 
 | ||||||
|         this.toolExecutionInfo.style.display = 'none'; |         // Ensure elements exist before trying to modify them
 | ||||||
|         this.toolExecutionSteps.innerHTML = ''; |         if (!this.loadingIndicator || !this.toolExecutionInfo || !this.toolExecutionSteps) { | ||||||
|  |             console.error(`[${logId}] UI elements not properly initialized`); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Force display of loading indicator
 | ||||||
|  |         try { | ||||||
|  |             this.loadingIndicator.style.display = 'flex'; | ||||||
|  | 
 | ||||||
|  |             // Make sure tool execution info area is always visible even before we get the first event
 | ||||||
|  |             // This helps avoid the UI getting stuck in "Processing..." state
 | ||||||
|  |             this.toolExecutionInfo.style.display = 'block'; | ||||||
|  | 
 | ||||||
|  |             // Clear previous tool steps but add a placeholder
 | ||||||
|  |             this.toolExecutionSteps.innerHTML = ` | ||||||
|  |                 <div class="tool-step my-1"> | ||||||
|  |                     <div class="d-flex align-items-center"> | ||||||
|  |                         <i class="bx bx-loader-alt bx-spin text-primary me-1"></i> | ||||||
|  |                         <span>Initializing...</span> | ||||||
|  |                     </div> | ||||||
|  |                 </div> | ||||||
|  |             `;
 | ||||||
|  | 
 | ||||||
|  |             // Force a UI update by accessing element properties
 | ||||||
|  |             const forceUpdate = this.loadingIndicator.offsetHeight; | ||||||
|  | 
 | ||||||
|  |             // Verify display states
 | ||||||
|  |             console.log(`[${logId}] Loading indicator display state: ${this.loadingIndicator.style.display}`); | ||||||
|  |             console.log(`[${logId}] Tool execution info display state: ${this.toolExecutionInfo.style.display}`); | ||||||
|  | 
 | ||||||
|  |             console.log(`[${logId}] Loading indicator and tool execution area initialized`); | ||||||
|  |         } catch (err) { | ||||||
|  |             console.error(`[${logId}] Error showing loading indicator:`, err); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     private hideLoadingIndicator() { |     private hideLoadingIndicator() { | ||||||
|         this.loadingIndicator.style.display = 'none'; |         const logId = `ui-${Date.now()}`; | ||||||
|         this.toolExecutionInfo.style.display = 'none'; |         console.log(`[${logId}] Hiding loading indicator and tool execution area`); | ||||||
|  | 
 | ||||||
|  |         // Ensure elements exist before trying to modify them
 | ||||||
|  |         if (!this.loadingIndicator || !this.toolExecutionInfo) { | ||||||
|  |             console.error(`[${logId}] UI elements not properly initialized`); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Properly reset DOM elements
 | ||||||
|  |         try { | ||||||
|  |             // First hide the tool execution info area
 | ||||||
|  |             this.toolExecutionInfo.style.display = 'none'; | ||||||
|  | 
 | ||||||
|  |             // Force a UI update by accessing element properties
 | ||||||
|  |             const forceUpdate1 = this.toolExecutionInfo.offsetHeight; | ||||||
|  | 
 | ||||||
|  |             // Then hide the loading indicator
 | ||||||
|  |             this.loadingIndicator.style.display = 'none'; | ||||||
|  | 
 | ||||||
|  |             // Force another UI update
 | ||||||
|  |             const forceUpdate2 = this.loadingIndicator.offsetHeight; | ||||||
|  | 
 | ||||||
|  |             // Verify display states immediately
 | ||||||
|  |             console.log(`[${logId}] Loading indicator display state: ${this.loadingIndicator.style.display}`); | ||||||
|  |             console.log(`[${logId}] Tool execution info display state: ${this.toolExecutionInfo.style.display}`); | ||||||
|  | 
 | ||||||
|  |             // Add a delay to double-check that UI updates are complete
 | ||||||
|  |             setTimeout(() => { | ||||||
|  |                 console.log(`[${logId}] Verification after hide timeout: loading indicator display=${this.loadingIndicator.style.display}, tool execution info display=${this.toolExecutionInfo.style.display}`); | ||||||
|  | 
 | ||||||
|  |                 // Force display none again in case something changed it
 | ||||||
|  |                 if (this.loadingIndicator.style.display !== 'none') { | ||||||
|  |                     console.log(`[${logId}] Loading indicator still visible after timeout, forcing hidden`); | ||||||
|  |                     this.loadingIndicator.style.display = 'none'; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 if (this.toolExecutionInfo.style.display !== 'none') { | ||||||
|  |                     console.log(`[${logId}] Tool execution info still visible after timeout, forcing hidden`); | ||||||
|  |                     this.toolExecutionInfo.style.display = 'none'; | ||||||
|  |                 } | ||||||
|  |             }, 100); | ||||||
|  |         } catch (err) { | ||||||
|  |             console.error(`[${logId}] Error hiding loading indicator:`, err); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|      | 
 | ||||||
|     /** |     /** | ||||||
|      * Show tool execution information in the UI |      * Show tool execution information in the UI | ||||||
|      */ |      */ | ||||||
|     private showToolExecutionInfo(toolExecutionData: any) { |     private showToolExecutionInfo(toolExecutionData: any) { | ||||||
|  |         console.log(`Showing tool execution info: ${JSON.stringify(toolExecutionData)}`); | ||||||
|  | 
 | ||||||
|         // Make sure tool execution info section is visible
 |         // Make sure tool execution info section is visible
 | ||||||
|         this.toolExecutionInfo.style.display = 'block'; |         this.toolExecutionInfo.style.display = 'block'; | ||||||
|          |         this.loadingIndicator.style.display = 'flex'; // Ensure loading indicator is shown during tool execution
 | ||||||
|  | 
 | ||||||
|         // Create a new step element to show the tool being executed
 |         // Create a new step element to show the tool being executed
 | ||||||
|         const stepElement = document.createElement('div'); |         const stepElement = document.createElement('div'); | ||||||
|         stepElement.className = 'tool-step my-1'; |         stepElement.className = 'tool-step my-1'; | ||||||
|          | 
 | ||||||
|         // Basic styling for the step
 |         // Basic styling for the step
 | ||||||
|         let stepHtml = ''; |         let stepHtml = ''; | ||||||
|          | 
 | ||||||
|         if (toolExecutionData.action === 'start') { |         if (toolExecutionData.action === 'start') { | ||||||
|             // Tool execution starting
 |             // Tool execution starting
 | ||||||
|             stepHtml = ` |             stepHtml = ` | ||||||
| @ -814,20 +1040,26 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|                 </div> |                 </div> | ||||||
|             `;
 |             `;
 | ||||||
|         } |         } | ||||||
|          | 
 | ||||||
|         stepElement.innerHTML = stepHtml; |         if (stepHtml) { | ||||||
|         this.toolExecutionSteps.appendChild(stepElement); |             stepElement.innerHTML = stepHtml; | ||||||
|          |             this.toolExecutionSteps.appendChild(stepElement); | ||||||
|         // Scroll to bottom of tool execution steps
 | 
 | ||||||
|         this.toolExecutionSteps.scrollTop = this.toolExecutionSteps.scrollHeight; |             // Scroll to bottom of tool execution steps
 | ||||||
|  |             this.toolExecutionSteps.scrollTop = this.toolExecutionSteps.scrollHeight; | ||||||
|  | 
 | ||||||
|  |             console.log(`Added new tool execution step to UI`); | ||||||
|  |         } else { | ||||||
|  |             console.log(`No HTML generated for tool execution data:`, toolExecutionData); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|      | 
 | ||||||
|     /** |     /** | ||||||
|      * Format tool arguments for display |      * Format tool arguments for display | ||||||
|      */ |      */ | ||||||
|     private formatToolArgs(args: any): string { |     private formatToolArgs(args: any): string { | ||||||
|         if (!args || typeof args !== 'object') return ''; |         if (!args || typeof args !== 'object') return ''; | ||||||
|          | 
 | ||||||
|         return Object.entries(args) |         return Object.entries(args) | ||||||
|             .map(([key, value]) => { |             .map(([key, value]) => { | ||||||
|                 // Format the value based on its type
 |                 // Format the value based on its type
 | ||||||
| @ -843,25 +1075,25 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|                 } else { |                 } else { | ||||||
|                     displayValue = String(value); |                     displayValue = String(value); | ||||||
|                 } |                 } | ||||||
|                  | 
 | ||||||
|                 return `<span class="text-primary">${this.escapeHtml(key)}</span>: ${this.escapeHtml(displayValue)}`; |                 return `<span class="text-primary">${this.escapeHtml(key)}</span>: ${this.escapeHtml(displayValue)}`; | ||||||
|             }) |             }) | ||||||
|             .join(', '); |             .join(', '); | ||||||
|     } |     } | ||||||
|      | 
 | ||||||
|     /** |     /** | ||||||
|      * Format tool results for display |      * Format tool results for display | ||||||
|      */ |      */ | ||||||
|     private formatToolResult(result: any): string { |     private formatToolResult(result: any): string { | ||||||
|         if (result === undefined || result === null) return ''; |         if (result === undefined || result === null) return ''; | ||||||
|          | 
 | ||||||
|         // Try to format as JSON if it's an object
 |         // Try to format as JSON if it's an object
 | ||||||
|         if (typeof result === 'object') { |         if (typeof result === 'object') { | ||||||
|             try { |             try { | ||||||
|                 // Get a preview of structured data
 |                 // Get a preview of structured data
 | ||||||
|                 const entries = Object.entries(result); |                 const entries = Object.entries(result); | ||||||
|                 if (entries.length === 0) return 'Empty result'; |                 if (entries.length === 0) return 'Empty result'; | ||||||
|                  | 
 | ||||||
|                 // Just show first 2 key-value pairs if there are many
 |                 // Just show first 2 key-value pairs if there are many
 | ||||||
|                 const preview = entries.slice(0, 2).map(([key, val]) => { |                 const preview = entries.slice(0, 2).map(([key, val]) => { | ||||||
|                     let valPreview; |                     let valPreview; | ||||||
| @ -876,22 +1108,22 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|                     } |                     } | ||||||
|                     return `${key}: ${valPreview}`; |                     return `${key}: ${valPreview}`; | ||||||
|                 }).join(', '); |                 }).join(', '); | ||||||
|                  | 
 | ||||||
|                 return entries.length > 2 ? `${preview}, ... (${entries.length} properties)` : preview; |                 return entries.length > 2 ? `${preview}, ... (${entries.length} properties)` : preview; | ||||||
|             } catch (e) { |             } catch (e) { | ||||||
|                 return String(result).substring(0, 100) + (String(result).length > 100 ? '...' : ''); |                 return String(result).substring(0, 100) + (String(result).length > 100 ? '...' : ''); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|          | 
 | ||||||
|         // For string results
 |         // For string results
 | ||||||
|         if (typeof result === 'string') { |         if (typeof result === 'string') { | ||||||
|             return result.length > 100 ? result.substring(0, 97) + '...' : result; |             return result.length > 100 ? result.substring(0, 97) + '...' : result; | ||||||
|         } |         } | ||||||
|          | 
 | ||||||
|         // Default formatting
 |         // Default formatting
 | ||||||
|         return String(result).substring(0, 100) + (String(result).length > 100 ? '...' : ''); |         return String(result).substring(0, 100) + (String(result).length > 100 ? '...' : ''); | ||||||
|     } |     } | ||||||
|      | 
 | ||||||
|     /** |     /** | ||||||
|      * Simple HTML escaping for safer content display |      * Simple HTML escaping for safer content display | ||||||
|      */ |      */ | ||||||
| @ -899,7 +1131,7 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|         if (typeof text !== 'string') { |         if (typeof text !== 'string') { | ||||||
|             text = String(text || ''); |             text = String(text || ''); | ||||||
|         } |         } | ||||||
|          | 
 | ||||||
|         return text |         return text | ||||||
|             .replace(/&/g, '&') |             .replace(/&/g, '&') | ||||||
|             .replace(/</g, '<') |             .replace(/</g, '<') | ||||||
| @ -968,6 +1200,26 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|         return processedContent; |         return processedContent; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /** | ||||||
|  |      * Show thinking state in the UI | ||||||
|  |      */ | ||||||
|  |     private showThinkingState(thinkingData: string) { | ||||||
|  |         // Update the UI to show thinking indicator
 | ||||||
|  |         const thinking = typeof thinkingData === 'string' ? thinkingData : 'Thinking...'; | ||||||
|  |         const toolExecutionStep = document.createElement('div'); | ||||||
|  |         toolExecutionStep.className = 'tool-step my-1'; | ||||||
|  |         toolExecutionStep.innerHTML = ` | ||||||
|  |             <div class="d-flex align-items-center"> | ||||||
|  |                 <i class="bx bx-bulb text-warning me-1"></i> | ||||||
|  |                 <span>${this.escapeHtml(thinking)}</span> | ||||||
|  |             </div> | ||||||
|  |         `;
 | ||||||
|  | 
 | ||||||
|  |         this.toolExecutionInfo.style.display = 'block'; | ||||||
|  |         this.toolExecutionSteps.appendChild(toolExecutionStep); | ||||||
|  |         this.toolExecutionSteps.scrollTop = this.toolExecutionSteps.scrollHeight; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /** |     /** | ||||||
|      * Validate embedding providers configuration |      * Validate embedding providers configuration | ||||||
|      * Check if there are issues with the embedding providers that might affect LLM functionality |      * Check if there are issues with the embedding providers that might affect LLM functionality | ||||||
| @ -1067,4 +1319,4 @@ export default class LlmChatPanel extends BasicWidget { | |||||||
|             this.validationWarning.style.display = 'none'; |             this.validationWarning.style.display = 'none'; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @ -791,6 +791,163 @@ async function indexNote(req: Request, res: Response) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /** | ||||||
|  |  * @swagger | ||||||
|  |  * /api/llm/sessions/{sessionId}/messages/stream: | ||||||
|  |  *   post: | ||||||
|  |  *     summary: Start a streaming response session via WebSockets | ||||||
|  |  *     operationId: llm-stream-message | ||||||
|  |  *     parameters: | ||||||
|  |  *       - name: sessionId | ||||||
|  |  *         in: path | ||||||
|  |  *         required: true | ||||||
|  |  *         schema: | ||||||
|  |  *           type: string | ||||||
|  |  *     requestBody: | ||||||
|  |  *       required: true | ||||||
|  |  *       content: | ||||||
|  |  *         application/json: | ||||||
|  |  *           schema: | ||||||
|  |  *             type: object | ||||||
|  |  *             properties: | ||||||
|  |  *               content: | ||||||
|  |  *                 type: string | ||||||
|  |  *                 description: The user message to send to the LLM | ||||||
|  |  *               useAdvancedContext: | ||||||
|  |  *                 type: boolean | ||||||
|  |  *                 description: Whether to use advanced context extraction | ||||||
|  |  *               showThinking: | ||||||
|  |  *                 type: boolean | ||||||
|  |  *                 description: Whether to show thinking process in the response | ||||||
|  |  *     responses: | ||||||
|  |  *       '200': | ||||||
|  |  *         description: Streaming started successfully | ||||||
|  |  *       '404': | ||||||
|  |  *         description: Session not found | ||||||
|  |  *       '500': | ||||||
|  |  *         description: Error processing request | ||||||
|  |  *     security: | ||||||
|  |  *       - session: [] | ||||||
|  |  *     tags: ["llm"] | ||||||
|  |  */ | ||||||
|  | async function streamMessage(req: Request, res: Response) { | ||||||
|  |     log.info("=== Starting streamMessage ==="); | ||||||
|  |     try { | ||||||
|  |         const sessionId = req.params.sessionId; | ||||||
|  |         const { content, useAdvancedContext, showThinking } = req.body; | ||||||
|  |          | ||||||
|  |         if (!content || typeof content !== 'string' || content.trim().length === 0) { | ||||||
|  |             throw new Error('Content cannot be empty'); | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Check if session exists
 | ||||||
|  |         const session = restChatService.getSessions().get(sessionId); | ||||||
|  |         if (!session) { | ||||||
|  |             throw new Error('Session not found'); | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Update last active timestamp
 | ||||||
|  |         session.lastActive = new Date(); | ||||||
|  |          | ||||||
|  |         // Add user message to the session
 | ||||||
|  |         session.messages.push({ | ||||||
|  |             role: 'user', | ||||||
|  |             content, | ||||||
|  |             timestamp: new Date() | ||||||
|  |         }); | ||||||
|  |          | ||||||
|  |         // Create request parameters for the pipeline
 | ||||||
|  |         const requestParams = { | ||||||
|  |             sessionId, | ||||||
|  |             content, | ||||||
|  |             useAdvancedContext: useAdvancedContext === true, | ||||||
|  |             showThinking: showThinking === true, | ||||||
|  |             stream: true // Always stream for this endpoint
 | ||||||
|  |         }; | ||||||
|  |          | ||||||
|  |         // Create a fake request/response pair to pass to the handler
 | ||||||
|  |         const fakeReq = { | ||||||
|  |             ...req, | ||||||
|  |             method: 'GET', // Set to GET to indicate streaming
 | ||||||
|  |             query: { | ||||||
|  |                 stream: 'true', // Set stream param - don't use format: 'stream' to avoid confusion
 | ||||||
|  |                 useAdvancedContext: String(useAdvancedContext === true), | ||||||
|  |                 showThinking: String(showThinking === true) | ||||||
|  |             }, | ||||||
|  |             params: { | ||||||
|  |                 sessionId | ||||||
|  |             }, | ||||||
|  |             // Make sure the original content is available to the handler
 | ||||||
|  |             body: { | ||||||
|  |                 content, | ||||||
|  |                 useAdvancedContext: useAdvancedContext === true,  | ||||||
|  |                 showThinking: showThinking === true | ||||||
|  |             } | ||||||
|  |         } as unknown as Request; | ||||||
|  |          | ||||||
|  |         // Log to verify correct parameters
 | ||||||
|  |         log.info(`WebSocket stream settings - useAdvancedContext=${useAdvancedContext === true}, in query=${fakeReq.query.useAdvancedContext}, in body=${fakeReq.body.useAdvancedContext}`); | ||||||
|  |         // Extra safety to ensure the parameters are passed correctly
 | ||||||
|  |         if (useAdvancedContext === true) { | ||||||
|  |             log.info(`Enhanced context IS enabled for this request`); | ||||||
|  |         } else { | ||||||
|  |             log.info(`Enhanced context is NOT enabled for this request`); | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Process the request in the background
 | ||||||
|  |         Promise.resolve().then(async () => { | ||||||
|  |             try { | ||||||
|  |                 await restChatService.handleSendMessage(fakeReq, res); | ||||||
|  |             } catch (error) { | ||||||
|  |                 log.error(`Background message processing error: ${error}`); | ||||||
|  |                  | ||||||
|  |                 // Import the WebSocket service
 | ||||||
|  |                 const wsService = (await import('../../services/ws.js')).default; | ||||||
|  |                  | ||||||
|  |                 // Define LLMStreamMessage interface
 | ||||||
|  |                 interface LLMStreamMessage { | ||||||
|  |                     type: 'llm-stream'; | ||||||
|  |                     sessionId: string; | ||||||
|  |                     content?: string; | ||||||
|  |                     thinking?: string; | ||||||
|  |                     toolExecution?: any; | ||||||
|  |                     done?: boolean; | ||||||
|  |                     error?: string; | ||||||
|  |                     raw?: unknown; | ||||||
|  |                 } | ||||||
|  |                  | ||||||
|  |                 // Send error to client via WebSocket
 | ||||||
|  |                 wsService.sendMessageToAllClients({ | ||||||
|  |                     type: 'llm-stream', | ||||||
|  |                     sessionId, | ||||||
|  |                     error: `Error processing message: ${error}`, | ||||||
|  |                     done: true | ||||||
|  |                 } as LLMStreamMessage); | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |          | ||||||
|  |         // Import the WebSocket service
 | ||||||
|  |         const wsService = (await import('../../services/ws.js')).default; | ||||||
|  |          | ||||||
|  |         // Let the client know streaming has started via WebSocket (helps client confirm connection is working)
 | ||||||
|  |         wsService.sendMessageToAllClients({ | ||||||
|  |             type: 'llm-stream', | ||||||
|  |             sessionId, | ||||||
|  |             thinking: 'Initializing streaming LLM response...' | ||||||
|  |         }); | ||||||
|  |          | ||||||
|  |         // Let the client know streaming has started via HTTP response
 | ||||||
|  |         return { | ||||||
|  |             success: true, | ||||||
|  |             message: 'Streaming started', | ||||||
|  |             sessionId | ||||||
|  |         }; | ||||||
|  |     } catch (error: any) { | ||||||
|  |         log.error(`Error starting message stream: ${error.message}`); | ||||||
|  |         throw error; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| export default { | export default { | ||||||
|     // Chat session management
 |     // Chat session management
 | ||||||
|     createSession, |     createSession, | ||||||
| @ -799,6 +956,7 @@ export default { | |||||||
|     listSessions, |     listSessions, | ||||||
|     deleteSession, |     deleteSession, | ||||||
|     sendMessage, |     sendMessage, | ||||||
|  |     streamMessage, // Add new streaming endpoint
 | ||||||
| 
 | 
 | ||||||
|     // Knowledge base index management
 |     // Knowledge base index management
 | ||||||
|     getIndexStats, |     getIndexStats, | ||||||
|  | |||||||
| @ -400,6 +400,7 @@ function register(app: express.Application) { | |||||||
|     apiRoute(DEL, "/api/llm/sessions/:sessionId", llmRoute.deleteSession); |     apiRoute(DEL, "/api/llm/sessions/:sessionId", llmRoute.deleteSession); | ||||||
|     apiRoute(PST, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage); |     apiRoute(PST, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage); | ||||||
|     apiRoute(GET, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage); |     apiRoute(GET, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage); | ||||||
|  |     apiRoute(PST, "/api/llm/sessions/:sessionId/messages/stream", llmRoute.streamMessage); | ||||||
| 
 | 
 | ||||||
|     // LLM index management endpoints - reorganized for REST principles
 |     // LLM index management endpoints - reorganized for REST principles
 | ||||||
|     apiRoute(GET, "/api/llm/indexes/stats", llmRoute.getIndexStats); |     apiRoute(GET, "/api/llm/indexes/stats", llmRoute.getIndexStats); | ||||||
|  | |||||||
| @ -1,12 +1,16 @@ | |||||||
| import type { ToolCall } from './tools/tool_interfaces.js'; | import type { ToolCall } from './tools/tool_interfaces.js'; | ||||||
| import type { ModelMetadata } from './providers/provider_options.js'; | import type { ModelMetadata } from './providers/provider_options.js'; | ||||||
| 
 | 
 | ||||||
|  | /** | ||||||
|  |  * Interface for chat messages between client and LLM models | ||||||
|  |  */ | ||||||
| export interface Message { | export interface Message { | ||||||
|     role: 'user' | 'assistant' | 'system' | 'tool'; |     role: 'user' | 'assistant' | 'system' | 'tool'; | ||||||
|     content: string; |     content: string; | ||||||
|     name?: string; |     name?: string; | ||||||
|     tool_call_id?: string; |     tool_call_id?: string; | ||||||
|     tool_calls?: ToolCall[] | any[]; |     tool_calls?: ToolCall[] | any[]; | ||||||
|  |     sessionId?: string; // Optional session ID for WebSocket communication
 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /** | /** | ||||||
| @ -32,6 +36,12 @@ export interface StreamChunk { | |||||||
|         completionTokens?: number; |         completionTokens?: number; | ||||||
|         totalTokens?: number; |         totalTokens?: number; | ||||||
|     }; |     }; | ||||||
|  |      | ||||||
|  |     /**  | ||||||
|  |      * Raw provider-specific data from the original response chunk | ||||||
|  |      * This can include thinking state, tool execution info, etc. | ||||||
|  |      */ | ||||||
|  |     raw?: any; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /** | /** | ||||||
|  | |||||||
| @ -127,6 +127,10 @@ export class ChatPipeline { | |||||||
|             // Determine if we should use tools or semantic context
 |             // Determine if we should use tools or semantic context
 | ||||||
|             const useTools = modelSelection.options.enableTools === true; |             const useTools = modelSelection.options.enableTools === true; | ||||||
|             const useEnhancedContext = input.options?.useAdvancedContext === true; |             const useEnhancedContext = input.options?.useAdvancedContext === true; | ||||||
|  |              | ||||||
|  |             // Log details about the advanced context parameter
 | ||||||
|  |             log.info(`Enhanced context option check: input.options=${JSON.stringify(input.options || {})}`); | ||||||
|  |             log.info(`Enhanced context decision: useEnhancedContext=${useEnhancedContext}, hasQuery=${!!input.query}`); | ||||||
| 
 | 
 | ||||||
|             // Early return if we don't have a query or enhanced context is disabled
 |             // Early return if we don't have a query or enhanced context is disabled
 | ||||||
|             if (!input.query || !useEnhancedContext) { |             if (!input.query || !useEnhancedContext) { | ||||||
| @ -431,8 +435,8 @@ export class ChatPipeline { | |||||||
|                                     ...modelSelection.options, |                                     ...modelSelection.options, | ||||||
|                                     // Ensure tool support is still enabled for follow-up requests
 |                                     // Ensure tool support is still enabled for follow-up requests
 | ||||||
|                                     enableTools: true, |                                     enableTools: true, | ||||||
|                                     // Disable streaming during tool execution follow-ups
 |                                     // Preserve original streaming setting for tool execution follow-ups
 | ||||||
|                                     stream: false, |                                     stream: modelSelection.options.stream, | ||||||
|                                     // Add tool execution status for Ollama provider
 |                                     // Add tool execution status for Ollama provider
 | ||||||
|                                     ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) |                                     ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) | ||||||
|                                 } |                                 } | ||||||
| @ -498,6 +502,8 @@ export class ChatPipeline { | |||||||
|                             messages: currentMessages, |                             messages: currentMessages, | ||||||
|                             options: { |                             options: { | ||||||
|                                 ...modelSelection.options, |                                 ...modelSelection.options, | ||||||
|  |                                 // Preserve streaming for error follow-up
 | ||||||
|  |                                 stream: modelSelection.options.stream, | ||||||
|                                 // For Ollama, include tool execution status
 |                                 // For Ollama, include tool execution status
 | ||||||
|                                 ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) |                                 ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) | ||||||
|                             } |                             } | ||||||
| @ -547,6 +553,8 @@ export class ChatPipeline { | |||||||
|                         options: { |                         options: { | ||||||
|                             ...modelSelection.options, |                             ...modelSelection.options, | ||||||
|                             enableTools: false, // Disable tools for the final response
 |                             enableTools: false, // Disable tools for the final response
 | ||||||
|  |                             // Preserve streaming setting for max iterations response
 | ||||||
|  |                             stream: modelSelection.options.stream, | ||||||
|                             // For Ollama, include tool execution status
 |                             // For Ollama, include tool execution status
 | ||||||
|                             ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) |                             ...(currentResponse.provider === 'Ollama' ? { toolExecutionStatus } : {}) | ||||||
|                         } |                         } | ||||||
|  | |||||||
| @ -1,12 +1,12 @@ | |||||||
| import { BasePipelineStage } from '../pipeline_stage.js'; | import { BasePipelineStage } from '../pipeline_stage.js'; | ||||||
| import type { LLMCompletionInput } from '../interfaces.js'; | import type { LLMCompletionInput } from '../interfaces.js'; | ||||||
| import type { ChatCompletionOptions, ChatResponse } from '../../ai_interface.js'; | import type { ChatCompletionOptions, ChatResponse, StreamChunk } from '../../ai_interface.js'; | ||||||
| import aiServiceManager from '../../ai_service_manager.js'; | import aiServiceManager from '../../ai_service_manager.js'; | ||||||
| import toolRegistry from '../../tools/tool_registry.js'; | import toolRegistry from '../../tools/tool_registry.js'; | ||||||
| import log from '../../../log.js'; | import log from '../../../log.js'; | ||||||
| 
 | 
 | ||||||
| /** | /** | ||||||
|  * Pipeline stage for LLM completion |  * Pipeline stage for LLM completion with enhanced streaming support | ||||||
|  */ |  */ | ||||||
| export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, { response: ChatResponse }> { | export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, { response: ChatResponse }> { | ||||||
|     constructor() { |     constructor() { | ||||||
| @ -15,88 +15,124 @@ export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, { | |||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Generate LLM completion using the AI service |      * Generate LLM completion using the AI service | ||||||
|  |      *  | ||||||
|  |      * This enhanced version supports better streaming by forwarding raw provider data | ||||||
|  |      * and ensuring consistent handling of stream options. | ||||||
|      */ |      */ | ||||||
|     protected async process(input: LLMCompletionInput): Promise<{ response: ChatResponse }> { |     protected async process(input: LLMCompletionInput): Promise<{ response: ChatResponse }> { | ||||||
|         const { messages, options, provider } = input; |         const { messages, options, provider } = input; | ||||||
| 
 | 
 | ||||||
|         // Log input options, particularly focusing on the stream option
 |         // Log input options
 | ||||||
|         log.info(`[LLMCompletionStage] Input options: ${JSON.stringify({ |         log.info(`[LLMCompletionStage] Input options: ${JSON.stringify({ | ||||||
|             model: options.model, |             model: options.model, | ||||||
|             provider, |             provider, | ||||||
|             stream: options.stream, |             stream: options.stream, | ||||||
|             enableTools: options.enableTools |             enableTools: options.enableTools | ||||||
|         })}`);
 |         })}`);
 | ||||||
|         log.info(`[LLMCompletionStage] Stream option in input: ${options.stream}, type: ${typeof options.stream}`); |  | ||||||
| 
 | 
 | ||||||
|         // Create a deep copy of options to avoid modifying the original
 |         // Create a deep copy of options to avoid modifying the original
 | ||||||
|         const updatedOptions: ChatCompletionOptions = JSON.parse(JSON.stringify(options)); |         const updatedOptions: ChatCompletionOptions = JSON.parse(JSON.stringify(options)); | ||||||
| 
 | 
 | ||||||
|         // IMPORTANT: Handle stream option carefully:
 |         // Handle stream option explicitly
 | ||||||
|         // 1. If it's undefined, leave it undefined (provider will use defaults)
 |  | ||||||
|         // 2. If explicitly set to true/false, ensure it's a proper boolean
 |  | ||||||
|         if (options.stream !== undefined) { |         if (options.stream !== undefined) { | ||||||
|             updatedOptions.stream = options.stream === true; |             updatedOptions.stream = options.stream === true; | ||||||
|             log.info(`[LLMCompletionStage] Stream explicitly provided in options, set to: ${updatedOptions.stream}`); |             log.info(`[LLMCompletionStage] Stream explicitly set to: ${updatedOptions.stream}`); | ||||||
|         } else { |  | ||||||
|             // If undefined, leave it undefined so provider can use its default behavior
 |  | ||||||
|             log.info(`[LLMCompletionStage] Stream option not explicitly set, leaving as undefined`); |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // If this is a direct (non-stream) call to Ollama but has the stream flag,
 |         // Add capture of raw provider data for streaming 
 | ||||||
|         // ensure we set additional metadata to maintain proper state
 |         if (updatedOptions.stream) { | ||||||
|         if (updatedOptions.stream && !provider && updatedOptions.providerMetadata?.provider === 'ollama') { |             // Add a function to capture raw provider data in stream chunks
 | ||||||
|             log.info(`[LLMCompletionStage] This is an Ollama request with stream=true, ensuring provider config is consistent`); |             const originalStreamCallback = updatedOptions.streamCallback; | ||||||
|  |             updatedOptions.streamCallback = async (text, done, rawProviderData) => { | ||||||
|  |                 // Create an enhanced chunk with the raw provider data
 | ||||||
|  |                 const enhancedChunk = { | ||||||
|  |                     text,  | ||||||
|  |                     done, | ||||||
|  |                     // Include raw provider data if available
 | ||||||
|  |                     raw: rawProviderData | ||||||
|  |                 }; | ||||||
|  |                  | ||||||
|  |                 // Call the original callback if provided
 | ||||||
|  |                 if (originalStreamCallback) { | ||||||
|  |                     return originalStreamCallback(text, done, enhancedChunk); | ||||||
|  |                 } | ||||||
|  |             }; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         log.info(`[LLMCompletionStage] Copied options: ${JSON.stringify({ |  | ||||||
|             model: updatedOptions.model, |  | ||||||
|             stream: updatedOptions.stream, |  | ||||||
|             enableTools: updatedOptions.enableTools |  | ||||||
|         })}`);
 |  | ||||||
| 
 |  | ||||||
|         // Check if tools should be enabled
 |         // Check if tools should be enabled
 | ||||||
|         if (updatedOptions.enableTools !== false) { |         if (updatedOptions.enableTools !== false) { | ||||||
|             // Get all available tools from the registry
 |  | ||||||
|             const toolDefinitions = toolRegistry.getAllToolDefinitions(); |             const toolDefinitions = toolRegistry.getAllToolDefinitions(); | ||||||
| 
 |  | ||||||
|             if (toolDefinitions.length > 0) { |             if (toolDefinitions.length > 0) { | ||||||
|                 // Enable tools and add them to the options
 |  | ||||||
|                 updatedOptions.enableTools = true; |                 updatedOptions.enableTools = true; | ||||||
|                 updatedOptions.tools = toolDefinitions; |                 updatedOptions.tools = toolDefinitions; | ||||||
|                 log.info(`Adding ${toolDefinitions.length} tools to LLM request`); |                 log.info(`Adding ${toolDefinitions.length} tools to LLM request`); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Determine which provider to use - prioritize in this order:
 |         // Determine which provider to use
 | ||||||
|         // 1. Explicit provider parameter (legacy approach)
 |  | ||||||
|         // 2. Provider from metadata
 |  | ||||||
|         // 3. Auto-selection
 |  | ||||||
|         let selectedProvider = provider; |         let selectedProvider = provider; | ||||||
| 
 |  | ||||||
|         // If no explicit provider is specified, check for provider metadata
 |  | ||||||
|         if (!selectedProvider && updatedOptions.providerMetadata?.provider) { |         if (!selectedProvider && updatedOptions.providerMetadata?.provider) { | ||||||
|             selectedProvider = updatedOptions.providerMetadata.provider; |             selectedProvider = updatedOptions.providerMetadata.provider; | ||||||
|             log.info(`Using provider ${selectedProvider} from metadata for model ${updatedOptions.model}`); |             log.info(`Using provider ${selectedProvider} from metadata for model ${updatedOptions.model}`); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         log.info(`Generating LLM completion, provider: ${selectedProvider || 'auto'}, model: ${updatedOptions?.model || 'default'}`); |         log.info(`Generating LLM completion, provider: ${selectedProvider || 'auto'}, model: ${updatedOptions?.model || 'default'}`); | ||||||
|         log.info(`[LLMCompletionStage] Options before service call: ${JSON.stringify({ |  | ||||||
|             model: updatedOptions.model, |  | ||||||
|             stream: updatedOptions.stream, |  | ||||||
|             enableTools: updatedOptions.enableTools |  | ||||||
|         })}`);
 |  | ||||||
| 
 | 
 | ||||||
|         // If provider is specified (either explicit or from metadata), use that specific provider
 |         // Use specific provider if available
 | ||||||
|         if (selectedProvider && aiServiceManager.isProviderAvailable(selectedProvider)) { |         if (selectedProvider && aiServiceManager.isProviderAvailable(selectedProvider)) { | ||||||
|             const service = aiServiceManager.getService(selectedProvider); |             const service = aiServiceManager.getService(selectedProvider); | ||||||
|             log.info(`[LLMCompletionStage] Using specific service for ${selectedProvider}, stream option: ${updatedOptions.stream}`); |             log.info(`[LLMCompletionStage] Using specific service for ${selectedProvider}`); | ||||||
|  |              | ||||||
|  |             // Generate completion and wrap with enhanced stream handling
 | ||||||
|             const response = await service.generateChatCompletion(messages, updatedOptions); |             const response = await service.generateChatCompletion(messages, updatedOptions); | ||||||
|  |              | ||||||
|  |             // If streaming is enabled, enhance the stream method
 | ||||||
|  |             if (response.stream && typeof response.stream === 'function' && updatedOptions.stream) { | ||||||
|  |                 const originalStream = response.stream; | ||||||
|  |                  | ||||||
|  |                 // Replace the stream method with an enhanced version that captures and forwards raw data
 | ||||||
|  |                 response.stream = async (callback) => { | ||||||
|  |                     return originalStream(async (chunk) => { | ||||||
|  |                         // Forward the chunk with any additional provider-specific data
 | ||||||
|  |                         // Create an enhanced chunk with provider info
 | ||||||
|  |                         const enhancedChunk: StreamChunk = { | ||||||
|  |                             ...chunk, | ||||||
|  |                             // If the provider didn't include raw data, add minimal info
 | ||||||
|  |                             raw: chunk.raw || { | ||||||
|  |                                 provider: selectedProvider, | ||||||
|  |                                 model: response.model | ||||||
|  |                             } | ||||||
|  |                         }; | ||||||
|  |                         return callback(enhancedChunk); | ||||||
|  |                     }); | ||||||
|  |                 }; | ||||||
|  |             } | ||||||
|  |              | ||||||
|             return { response }; |             return { response }; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Otherwise use the service manager to select an available provider
 |         // Use auto-selection if no specific provider
 | ||||||
|         log.info(`[LLMCompletionStage] Using auto-selected service, stream option: ${updatedOptions.stream}`); |         log.info(`[LLMCompletionStage] Using auto-selected service`); | ||||||
|         const response = await aiServiceManager.generateChatCompletion(messages, updatedOptions); |         const response = await aiServiceManager.generateChatCompletion(messages, updatedOptions); | ||||||
|  |          | ||||||
|  |         // Add similar stream enhancement for auto-selected provider
 | ||||||
|  |         if (response.stream && typeof response.stream === 'function' && updatedOptions.stream) { | ||||||
|  |             const originalStream = response.stream; | ||||||
|  |             response.stream = async (callback) => { | ||||||
|  |                 return originalStream(async (chunk) => { | ||||||
|  |                     // Create an enhanced chunk with provider info
 | ||||||
|  |                     const enhancedChunk: StreamChunk = { | ||||||
|  |                         ...chunk, | ||||||
|  |                         raw: chunk.raw || { | ||||||
|  |                             provider: response.provider, | ||||||
|  |                             model: response.model | ||||||
|  |                         } | ||||||
|  |                     }; | ||||||
|  |                     return callback(enhancedChunk); | ||||||
|  |                 }); | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |          | ||||||
|         return { response }; |         return { response }; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  | |||||||
| @ -112,6 +112,8 @@ export class AnthropicService extends BaseAIService { | |||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Handle streaming response from Anthropic |      * Handle streaming response from Anthropic | ||||||
|  |      *  | ||||||
|  |      * Simplified implementation that leverages the Anthropic SDK's streaming capabilities | ||||||
|      */ |      */ | ||||||
|     private async handleStreamingResponse( |     private async handleStreamingResponse( | ||||||
|         client: any, |         client: any, | ||||||
| @ -119,26 +121,29 @@ export class AnthropicService extends BaseAIService { | |||||||
|         opts: ChatCompletionOptions, |         opts: ChatCompletionOptions, | ||||||
|         providerOptions: AnthropicOptions |         providerOptions: AnthropicOptions | ||||||
|     ): Promise<ChatResponse> { |     ): Promise<ChatResponse> { | ||||||
|         let completeText = ''; |         // Create a stream handler function that processes the SDK's stream
 | ||||||
| 
 |  | ||||||
|         // Create a function that will return a Promise that resolves with the final text
 |  | ||||||
|         const streamHandler = async (callback: (chunk: StreamChunk) => Promise<void> | void): Promise<string> => { |         const streamHandler = async (callback: (chunk: StreamChunk) => Promise<void> | void): Promise<string> => { | ||||||
|  |             let completeText = ''; | ||||||
|  |              | ||||||
|             try { |             try { | ||||||
|  |                 // Request a streaming response from Anthropic
 | ||||||
|                 const streamResponse = await client.messages.create({ |                 const streamResponse = await client.messages.create({ | ||||||
|                     ...params, |                     ...params, | ||||||
|                     stream: true |                     stream: true | ||||||
|                 }); |                 }); | ||||||
| 
 | 
 | ||||||
|  |                 // Process each chunk in the stream
 | ||||||
|                 for await (const chunk of streamResponse) { |                 for await (const chunk of streamResponse) { | ||||||
|  |                     // Only process text content deltas
 | ||||||
|                     if (chunk.type === 'content_block_delta' && chunk.delta?.type === 'text_delta') { |                     if (chunk.type === 'content_block_delta' && chunk.delta?.type === 'text_delta') { | ||||||
|                         const text = chunk.delta.text || ''; |                         const text = chunk.delta.text || ''; | ||||||
|                         completeText += text; |                         completeText += text; | ||||||
|                          |                          | ||||||
|                         // Call the callback with the chunk
 |                         // Send the chunk to the caller
 | ||||||
|                         await callback({ |                         await callback({ | ||||||
|                             text, |                             text, | ||||||
|                             done: false, |                             done: false, | ||||||
|                             usage: {} // Usage stats not available in chunks
 |                             raw: chunk // Include the raw chunk for advanced processing
 | ||||||
|                         }); |                         }); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| @ -146,11 +151,7 @@ export class AnthropicService extends BaseAIService { | |||||||
|                 // Signal completion
 |                 // Signal completion
 | ||||||
|                 await callback({ |                 await callback({ | ||||||
|                     text: '', |                     text: '', | ||||||
|                     done: true, |                     done: true | ||||||
|                     usage: { |  | ||||||
|                         // We don't have token usage information in streaming mode from the chunks
 |  | ||||||
|                         totalTokens: completeText.length / 4 // Rough estimate
 |  | ||||||
|                     } |  | ||||||
|                 }); |                 }); | ||||||
| 
 | 
 | ||||||
|                 return completeText; |                 return completeText; | ||||||
| @ -160,25 +161,12 @@ export class AnthropicService extends BaseAIService { | |||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         // If a stream callback was provided in the options, set up immediate streaming
 |         // Return a response object with the stream handler
 | ||||||
|         if (opts.streamCallback) { |  | ||||||
|             // Start streaming in the background
 |  | ||||||
|             void streamHandler(async (chunk) => { |  | ||||||
|                 if (opts.streamCallback) { |  | ||||||
|                     await opts.streamCallback(chunk.text, chunk.done); |  | ||||||
|                 } |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         return { |         return { | ||||||
|             text: completeText, // This will be empty initially until streaming completes
 |             text: '', // Initial text is empty, will be populated during streaming
 | ||||||
|             model: providerOptions.model, |             model: providerOptions.model, | ||||||
|             provider: this.getName(), |             provider: this.getName(), | ||||||
|             stream: streamHandler, |             stream: streamHandler | ||||||
|             usage: { |  | ||||||
|                 // We don't have token counts initially with streaming
 |  | ||||||
|                 totalTokens: 0 |  | ||||||
|             } |  | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,6 +1,6 @@ | |||||||
| import options from '../../options.js'; | import options from '../../options.js'; | ||||||
| import { BaseAIService } from '../base_ai_service.js'; | import { BaseAIService } from '../base_ai_service.js'; | ||||||
| import type { Message, ChatCompletionOptions, ChatResponse } from '../ai_interface.js'; | import type { Message, ChatCompletionOptions, ChatResponse, StreamChunk } from '../ai_interface.js'; | ||||||
| import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js'; | import { OllamaMessageFormatter } from '../formatters/ollama_formatter.js'; | ||||||
| import log from '../../log.js'; | import log from '../../log.js'; | ||||||
| import type { ToolCall } from '../tools/tool_interfaces.js'; | import type { ToolCall } from '../tools/tool_interfaces.js'; | ||||||
| @ -37,7 +37,37 @@ export class OllamaService extends BaseAIService { | |||||||
|             if (!baseUrl) { |             if (!baseUrl) { | ||||||
|                 throw new Error('Ollama base URL is not configured'); |                 throw new Error('Ollama base URL is not configured'); | ||||||
|             } |             } | ||||||
|             this.client = new Ollama({ host: baseUrl }); | 
 | ||||||
|  |             log.info(`Creating new Ollama client with base URL: ${baseUrl}`); | ||||||
|  | 
 | ||||||
|  |             // Create client with debug options
 | ||||||
|  |             try { | ||||||
|  |                 this.client = new Ollama({ | ||||||
|  |                     host: baseUrl, | ||||||
|  |                     fetch: (url, init) => { | ||||||
|  |                         log.info(`Ollama API request to: ${url}`); | ||||||
|  |                         log.info(`Ollama API request method: ${init?.method || 'GET'}`); | ||||||
|  |                         log.info(`Ollama API request headers: ${JSON.stringify(init?.headers || {})}`); | ||||||
|  | 
 | ||||||
|  |                         // Call the actual fetch
 | ||||||
|  |                         return fetch(url, init).then(response => { | ||||||
|  |                             log.info(`Ollama API response status: ${response.status}`); | ||||||
|  |                             if (!response.ok) { | ||||||
|  |                                 log.error(`Ollama API error response: ${response.statusText}`); | ||||||
|  |                             } | ||||||
|  |                             return response; | ||||||
|  |                         }).catch(error => { | ||||||
|  |                             log.error(`Ollama API fetch error: ${error.message}`); | ||||||
|  |                             throw error; | ||||||
|  |                         }); | ||||||
|  |                     } | ||||||
|  |                 }); | ||||||
|  | 
 | ||||||
|  |                 log.info(`Ollama client successfully created`); | ||||||
|  |             } catch (error) { | ||||||
|  |                 log.error(`Error creating Ollama client: ${error}`); | ||||||
|  |                 throw error; | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         return this.client; |         return this.client; | ||||||
|     } |     } | ||||||
| @ -88,11 +118,6 @@ export class OllamaService extends BaseAIService { | |||||||
|                 log.info(`Sending to Ollama with formatted messages: ${messagesToSend.length}`); |                 log.info(`Sending to Ollama with formatted messages: ${messagesToSend.length}`); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Log request details
 |  | ||||||
|             log.info(`========== OLLAMA API REQUEST ==========`); |  | ||||||
|             log.info(`Model: ${providerOptions.model}, Messages: ${messagesToSend.length}`); |  | ||||||
|             log.info(`Stream: ${opts.streamCallback ? true : false}`); |  | ||||||
|              |  | ||||||
|             // Get tools if enabled
 |             // Get tools if enabled
 | ||||||
|             let tools = []; |             let tools = []; | ||||||
|             if (providerOptions.enableTools !== false) { |             if (providerOptions.enableTools !== false) { | ||||||
| @ -119,48 +144,18 @@ export class OllamaService extends BaseAIService { | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Check message structure and log detailed information about each message
 |  | ||||||
|             messagesToSend.forEach((msg: any, index: number) => { |  | ||||||
|                 const keys = Object.keys(msg); |  | ||||||
|                 log.info(`Message ${index}, Role: ${msg.role}, Keys: ${keys.join(', ')}`); |  | ||||||
| 
 |  | ||||||
|                 // Log message content preview
 |  | ||||||
|                 if (msg.content && typeof msg.content === 'string') { |  | ||||||
|                     const contentPreview = msg.content.length > 200 |  | ||||||
|                         ? `${msg.content.substring(0, 200)}...` |  | ||||||
|                         : msg.content; |  | ||||||
|                     log.info(`Message ${index} content: ${contentPreview}`); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 // Log tool-related details
 |  | ||||||
|                 if (keys.includes('tool_calls')) { |  | ||||||
|                     log.info(`Message ${index} has ${msg.tool_calls.length} tool calls`); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (keys.includes('tool_call_id')) { |  | ||||||
|                     log.info(`Message ${index} is a tool response for tool call ID: ${msg.tool_call_id}`); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 if (keys.includes('name') && msg.role === 'tool') { |  | ||||||
|                     log.info(`Message ${index} is from tool: ${msg.name}`); |  | ||||||
|                 } |  | ||||||
|             }); |  | ||||||
| 
 |  | ||||||
|             // Get client instance
 |  | ||||||
|             const client = this.getClient(); |  | ||||||
|              |  | ||||||
|             // Convert our message format to Ollama's format
 |             // Convert our message format to Ollama's format
 | ||||||
|             const convertedMessages = messagesToSend.map(msg => { |             const convertedMessages = messagesToSend.map(msg => { | ||||||
|                 const converted: any = { |                 const converted: any = { | ||||||
|                     role: msg.role, |                     role: msg.role, | ||||||
|                     content: msg.content |                     content: msg.content | ||||||
|                 }; |                 }; | ||||||
|                  | 
 | ||||||
|                 if (msg.tool_calls) { |                 if (msg.tool_calls) { | ||||||
|                     converted.tool_calls = msg.tool_calls.map(tc => { |                     converted.tool_calls = msg.tool_calls.map(tc => { | ||||||
|                         // For Ollama, arguments must be an object, not a string
 |                         // For Ollama, arguments must be an object, not a string
 | ||||||
|                         let processedArgs = tc.function.arguments; |                         let processedArgs = tc.function.arguments; | ||||||
|                          | 
 | ||||||
|                         // If arguments is a string, try to parse it as JSON
 |                         // If arguments is a string, try to parse it as JSON
 | ||||||
|                         if (typeof processedArgs === 'string') { |                         if (typeof processedArgs === 'string') { | ||||||
|                             try { |                             try { | ||||||
| @ -171,7 +166,7 @@ export class OllamaService extends BaseAIService { | |||||||
|                                 processedArgs = { raw: processedArgs }; |                                 processedArgs = { raw: processedArgs }; | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                          | 
 | ||||||
|                         return { |                         return { | ||||||
|                             id: tc.id, |                             id: tc.id, | ||||||
|                             function: { |                             function: { | ||||||
| @ -181,18 +176,18 @@ export class OllamaService extends BaseAIService { | |||||||
|                         }; |                         }; | ||||||
|                     }); |                     }); | ||||||
|                 } |                 } | ||||||
|                  | 
 | ||||||
|                 if (msg.tool_call_id) { |                 if (msg.tool_call_id) { | ||||||
|                     converted.tool_call_id = msg.tool_call_id; |                     converted.tool_call_id = msg.tool_call_id; | ||||||
|                 } |                 } | ||||||
|                  | 
 | ||||||
|                 if (msg.name) { |                 if (msg.name) { | ||||||
|                     converted.name = msg.name; |                     converted.name = msg.name; | ||||||
|                 } |                 } | ||||||
|                  | 
 | ||||||
|                 return converted; |                 return converted; | ||||||
|             }); |             }); | ||||||
|              | 
 | ||||||
|             // Prepare base request options
 |             // Prepare base request options
 | ||||||
|             const baseRequestOptions = { |             const baseRequestOptions = { | ||||||
|                 model: providerOptions.model, |                 model: providerOptions.model, | ||||||
| @ -202,85 +197,29 @@ export class OllamaService extends BaseAIService { | |||||||
|                 tools: tools.length > 0 ? tools : undefined |                 tools: tools.length > 0 ? tools : undefined | ||||||
|             }; |             }; | ||||||
| 
 | 
 | ||||||
|  |             // Get client instance
 | ||||||
|  |             const client = this.getClient(); | ||||||
|  | 
 | ||||||
|             // Handle streaming
 |             // Handle streaming
 | ||||||
|             if (opts.streamCallback) { |             if (opts.stream || opts.streamCallback) { | ||||||
|                 let responseText = ''; |                 return this.handleStreamingResponse(client, baseRequestOptions, opts, providerOptions); | ||||||
|                 let responseToolCalls: any[] = []; |  | ||||||
|                  |  | ||||||
|                 log.info(`Using streaming mode with Ollama client`); |  | ||||||
|                  |  | ||||||
|                 let streamResponse: OllamaChatResponse | null = null; |  | ||||||
|                  |  | ||||||
|                 // Create streaming request
 |  | ||||||
|                 const streamingRequest = { |  | ||||||
|                     ...baseRequestOptions, |  | ||||||
|                     stream: true as const // Use const assertion to fix the type
 |  | ||||||
|                 }; |  | ||||||
|                  |  | ||||||
|                 // Get the async iterator
 |  | ||||||
|                 const streamIterator = await client.chat(streamingRequest); |  | ||||||
|                  |  | ||||||
|                 // Process each chunk
 |  | ||||||
|                 for await (const chunk of streamIterator) { |  | ||||||
|                     // Save the last chunk for final stats
 |  | ||||||
|                     streamResponse = chunk; |  | ||||||
|                      |  | ||||||
|                     // Accumulate text
 |  | ||||||
|                     if (chunk.message?.content) { |  | ||||||
|                         responseText += chunk.message.content; |  | ||||||
|                     } |  | ||||||
|                      |  | ||||||
|                     // Check for tool calls
 |  | ||||||
|                     if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) { |  | ||||||
|                         responseToolCalls = [...chunk.message.tool_calls]; |  | ||||||
|                     } |  | ||||||
|                      |  | ||||||
|                     // Call the callback with the current chunk content
 |  | ||||||
|                     if (opts.streamCallback) { |  | ||||||
|                         // Original callback expects text content, isDone flag, and optional original chunk
 |  | ||||||
|                         opts.streamCallback( |  | ||||||
|                             chunk.message?.content || '',  |  | ||||||
|                             !!chunk.done,  |  | ||||||
|                             chunk |  | ||||||
|                         ); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                  |  | ||||||
|                 // Create the final response after streaming is complete
 |  | ||||||
|                 return { |  | ||||||
|                     text: responseText, |  | ||||||
|                     model: providerOptions.model, |  | ||||||
|                     provider: this.getName(), |  | ||||||
|                     tool_calls: this.transformToolCalls(responseToolCalls), |  | ||||||
|                     usage: { |  | ||||||
|                         promptTokens: streamResponse?.prompt_eval_count || 0, |  | ||||||
|                         completionTokens: streamResponse?.eval_count || 0, |  | ||||||
|                         totalTokens: (streamResponse?.prompt_eval_count || 0) + (streamResponse?.eval_count || 0) |  | ||||||
|                     } |  | ||||||
|                 }; |  | ||||||
|             } else { |             } else { | ||||||
|                 // Non-streaming request
 |                 // Non-streaming request
 | ||||||
|                 log.info(`Using non-streaming mode with Ollama client`); |                 log.info(`Using non-streaming mode with Ollama client`); | ||||||
|                  | 
 | ||||||
|                 // Create non-streaming request
 |                 // Create non-streaming request
 | ||||||
|                 const nonStreamingRequest = { |                 const nonStreamingRequest = { | ||||||
|                     ...baseRequestOptions, |                     ...baseRequestOptions, | ||||||
|                     stream: false as const // Use const assertion for type safety
 |                     stream: false as const // Use const assertion for type safety
 | ||||||
|                 }; |                 }; | ||||||
|                  | 
 | ||||||
|                 const response = await client.chat(nonStreamingRequest); |                 const response = await client.chat(nonStreamingRequest); | ||||||
|                  | 
 | ||||||
|                 // Log response details
 |                 // Log response details
 | ||||||
|                 log.info(`========== OLLAMA API RESPONSE ==========`); |                 log.info(`========== OLLAMA API RESPONSE ==========`); | ||||||
|                 log.info(`Model: ${response.model}, Content length: ${response.message?.content?.length || 0} chars`); |                 log.info(`Model: ${response.model}, Content length: ${response.message?.content?.length || 0} chars`); | ||||||
|                 log.info(`Tokens: ${response.prompt_eval_count || 0} prompt, ${response.eval_count || 0} completion, ${(response.prompt_eval_count || 0) + (response.eval_count || 0)} total`); |                 log.info(`Tokens: ${response.prompt_eval_count || 0} prompt, ${response.eval_count || 0} completion, ${(response.prompt_eval_count || 0) + (response.eval_count || 0)} total`); | ||||||
|                  | 
 | ||||||
|                 // Log content preview
 |  | ||||||
|                 const contentPreview = response.message?.content && response.message.content.length > 300 |  | ||||||
|                     ? `${response.message.content.substring(0, 300)}...` |  | ||||||
|                     : response.message?.content || ''; |  | ||||||
|                 log.info(`Response content: ${contentPreview}`); |  | ||||||
|                  |  | ||||||
|                 // Handle the response and extract tool calls if present
 |                 // Handle the response and extract tool calls if present
 | ||||||
|                 const chatResponse: ChatResponse = { |                 const chatResponse: ChatResponse = { | ||||||
|                     text: response.message?.content || '', |                     text: response.message?.content || '', | ||||||
| @ -292,15 +231,13 @@ export class OllamaService extends BaseAIService { | |||||||
|                         totalTokens: (response.prompt_eval_count || 0) + (response.eval_count || 0) |                         totalTokens: (response.prompt_eval_count || 0) + (response.eval_count || 0) | ||||||
|                     } |                     } | ||||||
|                 }; |                 }; | ||||||
|                  | 
 | ||||||
|                 // Add tool calls if present
 |                 // Add tool calls if present
 | ||||||
|                 if (response.message?.tool_calls && response.message.tool_calls.length > 0) { |                 if (response.message?.tool_calls && response.message.tool_calls.length > 0) { | ||||||
|                     log.info(`Ollama response includes ${response.message.tool_calls.length} tool calls`); |                     log.info(`Ollama response includes ${response.message.tool_calls.length} tool calls`); | ||||||
|                     chatResponse.tool_calls = this.transformToolCalls(response.message.tool_calls); |                     chatResponse.tool_calls = this.transformToolCalls(response.message.tool_calls); | ||||||
|                     log.info(`Transformed tool calls: ${JSON.stringify(chatResponse.tool_calls)}`); |  | ||||||
|                 } |                 } | ||||||
|                  | 
 | ||||||
|                 log.info(`========== END OLLAMA RESPONSE ==========`); |  | ||||||
|                 return chatResponse; |                 return chatResponse; | ||||||
|             } |             } | ||||||
|         } catch (error: any) { |         } catch (error: any) { | ||||||
| @ -315,6 +252,303 @@ export class OllamaService extends BaseAIService { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /** | ||||||
|  |      * Handle streaming response from Ollama | ||||||
|  |      * | ||||||
|  |      * Simplified implementation that leverages the Ollama SDK's streaming capabilities | ||||||
|  |      */ | ||||||
|  |     private async handleStreamingResponse( | ||||||
|  |         client: Ollama, | ||||||
|  |         requestOptions: any, | ||||||
|  |         opts: ChatCompletionOptions, | ||||||
|  |         providerOptions: OllamaOptions | ||||||
|  |     ): Promise<ChatResponse> { | ||||||
|  |         log.info(`Using streaming mode with Ollama client`); | ||||||
|  | 
 | ||||||
|  |         // Log detailed information about the streaming setup
 | ||||||
|  |         log.info(`Ollama streaming details: model=${providerOptions.model}, streamCallback=${opts.streamCallback ? 'provided' : 'not provided'}`); | ||||||
|  | 
 | ||||||
|  |         // Create a stream handler function that processes the SDK's stream
 | ||||||
|  |         const streamHandler = async (callback: (chunk: StreamChunk) => Promise<void> | void): Promise<string> => { | ||||||
|  |             let completeText = ''; | ||||||
|  |             let responseToolCalls: any[] = []; | ||||||
|  |             let chunkCount = 0; | ||||||
|  | 
 | ||||||
|  |             try { | ||||||
|  |                 // Create streaming request
 | ||||||
|  |                 const streamingRequest = { | ||||||
|  |                     ...requestOptions, | ||||||
|  |                     stream: true as const // Use const assertion to fix the type
 | ||||||
|  |                 }; | ||||||
|  | 
 | ||||||
|  |                 log.info(`Creating Ollama streaming request with options: model=${streamingRequest.model}, stream=${streamingRequest.stream}, tools=${streamingRequest.tools ? streamingRequest.tools.length : 0}`); | ||||||
|  | 
 | ||||||
|  |                 // Get the async iterator
 | ||||||
|  |                 log.info(`Calling Ollama chat API with streaming enabled`); | ||||||
|  |                 let streamIterator; | ||||||
|  |                 try { | ||||||
|  |                     log.info(`About to call client.chat with streaming request to ${options.getOption('ollamaBaseUrl')}`); | ||||||
|  |                     log.info(`Stream request: model=${streamingRequest.model}, messages count=${streamingRequest.messages?.length || 0}`); | ||||||
|  | 
 | ||||||
|  |                     // Check if we can connect to Ollama by getting available models
 | ||||||
|  |                     try { | ||||||
|  |                         log.info(`Performing Ollama health check...`); | ||||||
|  |                         const healthCheck = await client.list(); | ||||||
|  |                         log.info(`Ollama health check successful. Available models: ${healthCheck.models.map(m => m.name).join(', ')}`); | ||||||
|  |                     } catch (healthError) { | ||||||
|  |                         log.error(`Ollama health check failed: ${healthError instanceof Error ? healthError.message : String(healthError)}`); | ||||||
|  |                         log.error(`This indicates a connection issue to the Ollama server at ${options.getOption('ollamaBaseUrl')}`); | ||||||
|  |                         throw new Error(`Unable to connect to Ollama server: ${healthError instanceof Error ? healthError.message : String(healthError)}`); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Make the streaming request
 | ||||||
|  |                     log.info(`Proceeding with Ollama streaming request after successful health check`); | ||||||
|  |                     streamIterator = await client.chat(streamingRequest); | ||||||
|  | 
 | ||||||
|  |                     log.info(`Successfully obtained Ollama stream iterator`); | ||||||
|  | 
 | ||||||
|  |                     if (!streamIterator || typeof streamIterator[Symbol.asyncIterator] !== 'function') { | ||||||
|  |                         log.error(`Invalid stream iterator returned: ${JSON.stringify(streamIterator)}`); | ||||||
|  |                         throw new Error('Stream iterator is not valid'); | ||||||
|  |                     } | ||||||
|  |                 } catch (error) { | ||||||
|  |                     log.error(`Error getting stream iterator: ${error instanceof Error ? error.message : String(error)}`); | ||||||
|  |                     log.error(`Error stack: ${error instanceof Error ? error.stack : 'No stack trace'}`); | ||||||
|  |                     throw error; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Process each chunk
 | ||||||
|  |                 try { | ||||||
|  |                     log.info(`About to start processing stream chunks`); | ||||||
|  |                     for await (const chunk of streamIterator) { | ||||||
|  |                         chunkCount++; | ||||||
|  | 
 | ||||||
|  |                         // Log first chunk and then periodic updates
 | ||||||
|  |                         if (chunkCount === 1 || chunkCount % 10 === 0) { | ||||||
|  |                             log.info(`Processing Ollama stream chunk #${chunkCount}, done=${!!chunk.done}, has content=${!!chunk.message?.content}`); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Accumulate text
 | ||||||
|  |                         if (chunk.message?.content) { | ||||||
|  |                             const newContent = chunk.message.content; | ||||||
|  |                             completeText += newContent; | ||||||
|  | 
 | ||||||
|  |                             if (chunkCount === 1) { | ||||||
|  |                                 log.info(`First content chunk received: "${newContent.substring(0, 50)}${newContent.length > 50 ? '...' : ''}"`); | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Check for tool calls
 | ||||||
|  |                         if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) { | ||||||
|  |                             responseToolCalls = [...chunk.message.tool_calls]; | ||||||
|  |                             log.info(`Received tool calls in stream: ${chunk.message.tool_calls.length} tools`); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Send the chunk to the caller
 | ||||||
|  |                         await callback({ | ||||||
|  |                             text: chunk.message?.content || '', | ||||||
|  |                             done: !!chunk.done, | ||||||
|  |                             raw: chunk // Include the raw chunk for advanced processing
 | ||||||
|  |                         }); | ||||||
|  | 
 | ||||||
|  |                         // If this is the done chunk, log it
 | ||||||
|  |                         if (chunk.done) { | ||||||
|  |                             log.info(`Reached final chunk (done=true) after ${chunkCount} chunks, total content length: ${completeText.length}`); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     log.info(`Completed streaming from Ollama: processed ${chunkCount} chunks, total content: ${completeText.length} chars`); | ||||||
|  | 
 | ||||||
|  |                     // Signal completion
 | ||||||
|  |                     await callback({ | ||||||
|  |                         text: '', | ||||||
|  |                         done: true | ||||||
|  |                     }); | ||||||
|  |                 } catch (streamProcessError) { | ||||||
|  |                     log.error(`Error processing Ollama stream: ${streamProcessError instanceof Error ? streamProcessError.message : String(streamProcessError)}`); | ||||||
|  |                     log.error(`Stream process error stack: ${streamProcessError instanceof Error ? streamProcessError.stack : 'No stack trace'}`); | ||||||
|  | 
 | ||||||
|  |                     // Try to signal completion with error
 | ||||||
|  |                     try { | ||||||
|  |                         await callback({ | ||||||
|  |                             text: '', | ||||||
|  |                             done: true, | ||||||
|  |                             raw: { error: streamProcessError instanceof Error ? streamProcessError.message : String(streamProcessError) } | ||||||
|  |                         }); | ||||||
|  |                     } catch (finalError) { | ||||||
|  |                         log.error(`Error sending final error chunk: ${finalError}`); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     throw streamProcessError; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 return completeText; | ||||||
|  |             } catch (error) { | ||||||
|  |                 log.error(`Error in Ollama streaming: ${error}`); | ||||||
|  |                 log.error(`Error details: ${error instanceof Error ? error.stack : 'No stack trace available'}`); | ||||||
|  |                 throw error; | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  | 
 | ||||||
|  |         // Handle direct streamCallback if provided
 | ||||||
|  |         if (opts.streamCallback) { | ||||||
|  |             let completeText = ''; | ||||||
|  |             let responseToolCalls: any[] = []; | ||||||
|  |             let finalChunk: OllamaChatResponse | null = null; | ||||||
|  |             let chunkCount = 0; | ||||||
|  | 
 | ||||||
|  |             try { | ||||||
|  |                 // Create streaming request
 | ||||||
|  |                 const streamingRequest = { | ||||||
|  |                     ...requestOptions, | ||||||
|  |                     stream: true as const | ||||||
|  |                 }; | ||||||
|  | 
 | ||||||
|  |                 log.info(`Starting Ollama direct streamCallback processing with model ${providerOptions.model}`); | ||||||
|  | 
 | ||||||
|  |                 // Get the async iterator
 | ||||||
|  |                 log.info(`Calling Ollama chat API for direct streaming`); | ||||||
|  |                 let streamIterator; | ||||||
|  |                 try { | ||||||
|  |                     log.info(`About to call client.chat with streaming request to ${options.getOption('ollamaBaseUrl')}`); | ||||||
|  |                     log.info(`Model: ${streamingRequest.model}, Stream: ${streamingRequest.stream}`); | ||||||
|  |                     log.info(`Messages count: ${streamingRequest.messages.length}`); | ||||||
|  |                     log.info(`First message: role=${streamingRequest.messages[0].role}, content preview=${streamingRequest.messages[0].content?.substring(0, 50) || 'empty'}`); | ||||||
|  | 
 | ||||||
|  |                     // Perform health check before streaming
 | ||||||
|  |                     try { | ||||||
|  |                         log.info(`Performing Ollama health check before direct streaming...`); | ||||||
|  |                         const healthCheck = await client.list(); | ||||||
|  |                         log.info(`Ollama health check successful. Available models: ${healthCheck.models.map(m => m.name).join(', ')}`); | ||||||
|  |                     } catch (healthError) { | ||||||
|  |                         log.error(`Ollama health check failed: ${healthError instanceof Error ? healthError.message : String(healthError)}`); | ||||||
|  |                         log.error(`This indicates a connection issue to the Ollama server at ${options.getOption('ollamaBaseUrl')}`); | ||||||
|  |                         throw new Error(`Unable to connect to Ollama server: ${healthError instanceof Error ? healthError.message : String(healthError)}`); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Proceed with streaming after successful health check
 | ||||||
|  |                     log.info(`Making Ollama streaming request after successful health check`); | ||||||
|  |                     streamIterator = await client.chat(streamingRequest); | ||||||
|  | 
 | ||||||
|  |                     log.info(`Successfully obtained Ollama stream iterator for direct callback`); | ||||||
|  | 
 | ||||||
|  |                     // Check if the stream iterator is valid
 | ||||||
|  |                     if (!streamIterator || typeof streamIterator[Symbol.asyncIterator] !== 'function') { | ||||||
|  |                         log.error(`Invalid stream iterator returned from Ollama: ${JSON.stringify(streamIterator)}`); | ||||||
|  |                         throw new Error('Invalid stream iterator returned from Ollama'); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     log.info(`Stream iterator is valid, beginning processing`); | ||||||
|  |                 } catch (error) { | ||||||
|  |                     log.error(`Error getting stream iterator from Ollama: ${error instanceof Error ? error.message : String(error)}`); | ||||||
|  |                     log.error(`Error stack: ${error instanceof Error ? error.stack : 'No stack trace'}`); | ||||||
|  |                     throw error; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Process each chunk
 | ||||||
|  |                 try { | ||||||
|  |                     log.info(`Starting to iterate through stream chunks`); | ||||||
|  |                     for await (const chunk of streamIterator) { | ||||||
|  |                         chunkCount++; | ||||||
|  |                         finalChunk = chunk; | ||||||
|  | 
 | ||||||
|  |                         // Log first chunk and periodic updates
 | ||||||
|  |                         if (chunkCount === 1 || chunkCount % 10 === 0) { | ||||||
|  |                         log.info(`Processing Ollama direct stream chunk #${chunkCount}, done=${!!chunk.done}, has content=${!!chunk.message?.content}`); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Accumulate text
 | ||||||
|  |                     if (chunk.message?.content) { | ||||||
|  |                         const newContent = chunk.message.content; | ||||||
|  |                         completeText += newContent; | ||||||
|  | 
 | ||||||
|  |                         if (chunkCount === 1) { | ||||||
|  |                             log.info(`First direct content chunk: "${newContent.substring(0, 50)}${newContent.length > 50 ? '...' : ''}"`); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Check for tool calls
 | ||||||
|  |                     if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) { | ||||||
|  |                         responseToolCalls = [...chunk.message.tool_calls]; | ||||||
|  |                         log.info(`Received tool calls in direct stream: ${chunk.message.tool_calls.length} tools`); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Call the callback with the current chunk content
 | ||||||
|  |                     if (opts.streamCallback) { | ||||||
|  |                         try { | ||||||
|  |                             // For the final chunk, make sure to send the complete text with done=true
 | ||||||
|  |                             if (chunk.done) { | ||||||
|  |                                 log.info(`Sending final callback with done=true and complete content (${completeText.length} chars)`); | ||||||
|  |                                 await opts.streamCallback( | ||||||
|  |                                     completeText, // Send the full accumulated content for the final chunk
 | ||||||
|  |                                     true, | ||||||
|  |                                     { ...chunk, message: { ...chunk.message, content: completeText } } | ||||||
|  |                                 ); | ||||||
|  |                             } else if (chunk.message?.content) { | ||||||
|  |                                 // For content chunks, send them as they come
 | ||||||
|  |                                 await opts.streamCallback( | ||||||
|  |                                     chunk.message.content, | ||||||
|  |                                     !!chunk.done, | ||||||
|  |                                     chunk | ||||||
|  |                                 ); | ||||||
|  |                             } else if (chunk.message?.tool_calls && chunk.message.tool_calls.length > 0) { | ||||||
|  |                                 // For tool call chunks, send an empty content string but include the tool calls
 | ||||||
|  |                                 await opts.streamCallback( | ||||||
|  |                                     '', | ||||||
|  |                                     !!chunk.done, | ||||||
|  |                                     chunk | ||||||
|  |                                 ); | ||||||
|  |                             } | ||||||
|  | 
 | ||||||
|  |                             if (chunkCount === 1) { | ||||||
|  |                                 log.info(`Successfully called streamCallback with first chunk`); | ||||||
|  |                             } | ||||||
|  |                         } catch (callbackError) { | ||||||
|  |                             log.error(`Error in streamCallback: ${callbackError}`); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // If this is the done chunk, log it
 | ||||||
|  |                     if (chunk.done) { | ||||||
|  |                         log.info(`Reached final direct chunk (done=true) after ${chunkCount} chunks, total content length: ${completeText.length}`); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 log.info(`Completed direct streaming from Ollama: processed ${chunkCount} chunks, final content: ${completeText.length} chars`); | ||||||
|  |                 } catch (iterationError) { | ||||||
|  |                     log.error(`Error iterating through Ollama stream chunks: ${iterationError instanceof Error ? iterationError.message : String(iterationError)}`); | ||||||
|  |                     log.error(`Iteration error stack: ${iterationError instanceof Error ? iterationError.stack : 'No stack trace'}`); | ||||||
|  |                     throw iterationError; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 // Create the final response after streaming is complete
 | ||||||
|  |                 return { | ||||||
|  |                     text: completeText, | ||||||
|  |                     model: providerOptions.model, | ||||||
|  |                     provider: this.getName(), | ||||||
|  |                     tool_calls: this.transformToolCalls(responseToolCalls), | ||||||
|  |                     usage: { | ||||||
|  |                         promptTokens: finalChunk?.prompt_eval_count || 0, | ||||||
|  |                         completionTokens: finalChunk?.eval_count || 0, | ||||||
|  |                         totalTokens: (finalChunk?.prompt_eval_count || 0) + (finalChunk?.eval_count || 0) | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|  |             } catch (error) { | ||||||
|  |                 log.error(`Error in Ollama streaming with callback: ${error}`); | ||||||
|  |                 log.error(`Error details: ${error instanceof Error ? error.stack : 'No stack trace available'}`); | ||||||
|  |                 throw error; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Return a response object with the stream handler
 | ||||||
|  |         return { | ||||||
|  |             text: '', // Initial text is empty, will be populated during streaming
 | ||||||
|  |             model: providerOptions.model, | ||||||
|  |             provider: this.getName(), | ||||||
|  |             stream: streamHandler | ||||||
|  |         }; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /** |     /** | ||||||
|      * Transform Ollama tool calls to the standard format expected by the pipeline |      * Transform Ollama tool calls to the standard format expected by the pipeline | ||||||
|      */ |      */ | ||||||
| @ -322,14 +556,14 @@ export class OllamaService extends BaseAIService { | |||||||
|         if (!toolCalls || !Array.isArray(toolCalls) || toolCalls.length === 0) { |         if (!toolCalls || !Array.isArray(toolCalls) || toolCalls.length === 0) { | ||||||
|             return []; |             return []; | ||||||
|         } |         } | ||||||
|          | 
 | ||||||
|         return toolCalls.map((toolCall, index) => { |         return toolCalls.map((toolCall, index) => { | ||||||
|             // Generate a unique ID if none is provided
 |             // Generate a unique ID if none is provided
 | ||||||
|             const id = toolCall.id || `tool-call-${Date.now()}-${index}`; |             const id = toolCall.id || `tool-call-${Date.now()}-${index}`; | ||||||
|              | 
 | ||||||
|             // Handle arguments based on their type
 |             // Handle arguments based on their type
 | ||||||
|             let processedArguments: Record<string, any> | string = toolCall.function?.arguments || {}; |             let processedArguments: Record<string, any> | string = toolCall.function?.arguments || {}; | ||||||
|              | 
 | ||||||
|             if (typeof processedArguments === 'string') { |             if (typeof processedArguments === 'string') { | ||||||
|                 try { |                 try { | ||||||
|                     processedArguments = JSON.parse(processedArguments); |                     processedArguments = JSON.parse(processedArguments); | ||||||
| @ -339,7 +573,7 @@ export class OllamaService extends BaseAIService { | |||||||
|                     processedArguments = { raw: processedArguments }; |                     processedArguments = { raw: processedArguments }; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|              | 
 | ||||||
|             return { |             return { | ||||||
|                 id, |                 id, | ||||||
|                 type: 'function', |                 type: 'function', | ||||||
|  | |||||||
| @ -70,103 +70,61 @@ export class OpenAIService extends BaseAIService { | |||||||
|             if (providerOptions.stream) { |             if (providerOptions.stream) { | ||||||
|                 params.stream = true; |                 params.stream = true; | ||||||
|                  |                  | ||||||
|  |                 // Get stream from OpenAI SDK
 | ||||||
|                 const stream = await client.chat.completions.create(params); |                 const stream = await client.chat.completions.create(params); | ||||||
|                 let fullText = ''; |  | ||||||
|                  |                  | ||||||
|                 // If a direct callback is provided, use it
 |                 // Return a response with the stream handler
 | ||||||
|                 if (providerOptions.streamCallback) { |                 return { | ||||||
|                     // Process the stream with the callback
 |                     text: '', // Initial empty text, will be populated during streaming
 | ||||||
|                     try { |                     model: params.model, | ||||||
|                         // The stream is an AsyncIterable
 |                     provider: this.getName(), | ||||||
|                         if (Symbol.asyncIterator in stream) { |                     stream: async (callback) => { | ||||||
|                             for await (const chunk of stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>) { |                         let completeText = ''; | ||||||
|                                 const content = chunk.choices[0]?.delta?.content || ''; |                          | ||||||
|                                 if (content) { |                         try { | ||||||
|                                     fullText += content; |                             // Process the stream
 | ||||||
|                                     await providerOptions.streamCallback(content, false, chunk); |                             if (Symbol.asyncIterator in stream) { | ||||||
|                                 } |                                 for await (const chunk of stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>) { | ||||||
|                                  |                                     const content = chunk.choices[0]?.delta?.content || ''; | ||||||
|                                 // If this is the last chunk
 |                                     const isDone = !!chunk.choices[0]?.finish_reason; | ||||||
|                                 if (chunk.choices[0]?.finish_reason) { |  | ||||||
|                                     await providerOptions.streamCallback('', true, chunk); |  | ||||||
|                                 } |  | ||||||
|                             } |  | ||||||
|                         } else { |  | ||||||
|                             console.error('Stream is not iterable, falling back to non-streaming response'); |  | ||||||
|                              |  | ||||||
|                             // If we get a non-streaming response somehow
 |  | ||||||
|                             if ('choices' in stream) { |  | ||||||
|                                 const content = stream.choices[0]?.message?.content || ''; |  | ||||||
|                                 fullText = content; |  | ||||||
|                                 if (providerOptions.streamCallback) { |  | ||||||
|                                     await providerOptions.streamCallback(content, true, stream); |  | ||||||
|                                 } |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } catch (error) { |  | ||||||
|                         console.error('Error processing stream:', error); |  | ||||||
|                         throw error; |  | ||||||
|                     } |  | ||||||
|                      |  | ||||||
|                     return { |  | ||||||
|                         text: fullText, |  | ||||||
|                         model: params.model, |  | ||||||
|                         provider: this.getName(), |  | ||||||
|                         usage: {} // Usage stats aren't available with streaming
 |  | ||||||
|                     }; |  | ||||||
|                 } else { |  | ||||||
|                     // Use the more flexible stream interface
 |  | ||||||
|                     return { |  | ||||||
|                         text: '', // Initial empty text, will be filled by stream processing
 |  | ||||||
|                         model: params.model, |  | ||||||
|                         provider: this.getName(), |  | ||||||
|                         usage: {}, // Usage stats aren't available with streaming
 |  | ||||||
|                         stream: async (callback) => { |  | ||||||
|                             let completeText = ''; |  | ||||||
|                              |  | ||||||
|                             try { |  | ||||||
|                                 // The stream is an AsyncIterable
 |  | ||||||
|                                 if (Symbol.asyncIterator in stream) { |  | ||||||
|                                     for await (const chunk of stream as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>) { |  | ||||||
|                                         const content = chunk.choices[0]?.delta?.content || ''; |  | ||||||
|                                         const isDone = !!chunk.choices[0]?.finish_reason; |  | ||||||
|                                          |  | ||||||
|                                         if (content) { |  | ||||||
|                                             completeText += content; |  | ||||||
|                                         } |  | ||||||
|                                          |  | ||||||
|                                         // Call the provided callback with the StreamChunk interface
 |  | ||||||
|                                         await callback({ |  | ||||||
|                                             text: content, |  | ||||||
|                                             done: isDone |  | ||||||
|                                         }); |  | ||||||
|                                          |  | ||||||
|                                         if (isDone) { |  | ||||||
|                                             break; |  | ||||||
|                                         } |  | ||||||
|                                     } |  | ||||||
|                                 } else { |  | ||||||
|                                     console.warn('Stream is not iterable, falling back to non-streaming response'); |  | ||||||
|                                      |                                      | ||||||
|                                     // If we get a non-streaming response somehow
 |                                     if (content) { | ||||||
|                                     if ('choices' in stream) { |                                         completeText += content; | ||||||
|                                         const content = stream.choices[0]?.message?.content || ''; |                                     } | ||||||
|                                         completeText = content; |                                      | ||||||
|                                         await callback({ |                                     // Send the chunk to the caller with raw data
 | ||||||
|                                             text: content, |                                     await callback({ | ||||||
|                                             done: true |                                         text: content, | ||||||
|                                         }); |                                         done: isDone, | ||||||
|  |                                         raw: chunk // Include the raw chunk for advanced processing
 | ||||||
|  |                                     }); | ||||||
|  |                                      | ||||||
|  |                                     if (isDone) { | ||||||
|  |                                         break; | ||||||
|                                     } |                                     } | ||||||
|                                 } |                                 } | ||||||
|                             } catch (error) { |                             } else { | ||||||
|                                 console.error('Error processing stream:', error); |                                 // Fallback for non-iterable response
 | ||||||
|                                 throw error; |                                 console.warn('Stream is not iterable, falling back to non-streaming response'); | ||||||
|  |                                  | ||||||
|  |                                 if ('choices' in stream) { | ||||||
|  |                                     const content = stream.choices[0]?.message?.content || ''; | ||||||
|  |                                     completeText = content; | ||||||
|  |                                     await callback({ | ||||||
|  |                                         text: content, | ||||||
|  |                                         done: true, | ||||||
|  |                                         raw: stream | ||||||
|  |                                     }); | ||||||
|  |                                 } | ||||||
|                             } |                             } | ||||||
|                              |                         } catch (error) { | ||||||
|                             return completeText; |                             console.error('Error processing stream:', error); | ||||||
|  |                             throw error; | ||||||
|                         } |                         } | ||||||
|                     }; |                          | ||||||
|                 } |                         return completeText; | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|             } else { |             } else { | ||||||
|                 // Non-streaming response
 |                 // Non-streaming response
 | ||||||
|                 params.stream = false; |                 params.stream = false; | ||||||
|  | |||||||
| @ -1,6 +1,26 @@ | |||||||
| import log from "../log.js"; | import log from "../log.js"; | ||||||
| import type { Request, Response } from "express"; | import type { Request, Response } from "express"; | ||||||
| import type { Message, ChatCompletionOptions } from "./ai_interface.js"; | import type { Message, ChatCompletionOptions, ChatResponse, StreamChunk } from "./ai_interface.js"; | ||||||
|  | 
 | ||||||
|  | /** | ||||||
|  |  * Interface for WebSocket LLM streaming messages | ||||||
|  |  */ | ||||||
|  | interface LLMStreamMessage { | ||||||
|  |     type: 'llm-stream'; | ||||||
|  |     sessionId: string; | ||||||
|  |     content?: string; | ||||||
|  |     thinking?: string; | ||||||
|  |     toolExecution?: { | ||||||
|  |         action?: string; | ||||||
|  |         tool?: string; | ||||||
|  |         result?: string; | ||||||
|  |         error?: string; | ||||||
|  |         args?: Record<string, unknown>; | ||||||
|  |     }; | ||||||
|  |     done?: boolean; | ||||||
|  |     error?: string; | ||||||
|  |     raw?: unknown; | ||||||
|  | } | ||||||
| import contextService from "./context/services/context_service.js"; | import contextService from "./context/services/context_service.js"; | ||||||
| import { LLM_CONSTANTS } from './constants/provider_constants.js'; | import { LLM_CONSTANTS } from './constants/provider_constants.js'; | ||||||
| import { ERROR_PROMPTS } from './constants/llm_prompt_constants.js'; | import { ERROR_PROMPTS } from './constants/llm_prompt_constants.js'; | ||||||
| @ -290,22 +310,24 @@ class RestChatService { | |||||||
|                 // Add logging for POST requests
 |                 // Add logging for POST requests
 | ||||||
|                 log.info(`LLM POST message: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}, contentLength=${content ? content.length : 0}`); |                 log.info(`LLM POST message: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}, contentLength=${content ? content.length : 0}`); | ||||||
|             } else if (req.method === 'GET') { |             } else if (req.method === 'GET') { | ||||||
|                 // For GET (streaming) requests, get format from query params
 |                 // For GET (streaming) requests, get parameters from query params and body
 | ||||||
|                 // The content should have been sent in a previous POST request
 |                 // For streaming requests, we need the content from the body
 | ||||||
|                 useAdvancedContext = req.query.useAdvancedContext === 'true'; |                 useAdvancedContext = req.query.useAdvancedContext === 'true' || (req.body && req.body.useAdvancedContext === true); | ||||||
|                 showThinking = req.query.showThinking === 'true'; |                 showThinking = req.query.showThinking === 'true' || (req.body && req.body.showThinking === true); | ||||||
|                 content = ''; // We don't need content for GET requests
 |                 content = req.body && req.body.content ? req.body.content : ''; | ||||||
| 
 | 
 | ||||||
|                 // Add logging for GET requests
 |                 // Add detailed logging for GET requests
 | ||||||
|                 log.info(`LLM GET stream: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}`); |                 log.info(`LLM GET stream: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}`); | ||||||
|  |                 log.info(`Parameters from query: useAdvancedContext=${req.query.useAdvancedContext}, showThinking=${req.query.showThinking}`); | ||||||
|  |                 log.info(`Parameters from body: useAdvancedContext=${req.body?.useAdvancedContext}, showThinking=${req.body?.showThinking}, content=${content ? `${content.substring(0, 20)}...` : 'none'}`); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Get sessionId from URL params since it's part of the route
 |             // Get sessionId from URL params since it's part of the route
 | ||||||
|             sessionId = req.params.sessionId; |             sessionId = req.params.sessionId; | ||||||
| 
 | 
 | ||||||
|             // For GET requests, ensure we have the format=stream parameter
 |             // For GET requests, ensure we have the stream parameter
 | ||||||
|             if (req.method === 'GET' && (!req.query.format || req.query.format !== 'stream')) { |             if (req.method === 'GET' && req.query.stream !== 'true') { | ||||||
|                 throw new Error('Stream format parameter is required for GET requests'); |                 throw new Error('Stream parameter must be set to true for GET/streaming requests'); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // For POST requests, validate the content
 |             // For POST requests, validate the content
 | ||||||
| @ -443,6 +465,33 @@ class RestChatService { | |||||||
| 
 | 
 | ||||||
|             log.info("Executing chat pipeline..."); |             log.info("Executing chat pipeline..."); | ||||||
| 
 | 
 | ||||||
|  |             // Create options object for better tracking
 | ||||||
|  |             const pipelineOptions = { | ||||||
|  |                 // Force useAdvancedContext to be a boolean, no matter what
 | ||||||
|  |                 useAdvancedContext: useAdvancedContext === true, | ||||||
|  |                 systemPrompt: session.messages.find(m => m.role === 'system')?.content, | ||||||
|  |                 temperature: session.metadata.temperature, | ||||||
|  |                 maxTokens: session.metadata.maxTokens, | ||||||
|  |                 model: session.metadata.model, | ||||||
|  |                 // Set stream based on request type, but ensure it's explicitly a boolean value
 | ||||||
|  |                 // GET requests or format=stream parameter indicates streaming should be used
 | ||||||
|  |                 stream: !!(req.method === 'GET' || req.query.format === 'stream' || req.query.stream === 'true') | ||||||
|  |             }; | ||||||
|  | 
 | ||||||
|  |             // Log the options to verify what's being sent to the pipeline
 | ||||||
|  |             log.info(`Pipeline input options: ${JSON.stringify({ | ||||||
|  |                 useAdvancedContext: pipelineOptions.useAdvancedContext, | ||||||
|  |                 stream: pipelineOptions.stream | ||||||
|  |             })}`);
 | ||||||
|  | 
 | ||||||
|  |             // Import the WebSocket service for direct access
 | ||||||
|  |             const wsService = await import('../../services/ws.js'); | ||||||
|  | 
 | ||||||
|  |             // Create a stream callback wrapper
 | ||||||
|  |             // This will ensure we properly handle all streaming messages
 | ||||||
|  |             let messageContent = ''; | ||||||
|  |             let streamFinished = false; | ||||||
|  | 
 | ||||||
|             // Prepare the pipeline input
 |             // Prepare the pipeline input
 | ||||||
|             const pipelineInput: ChatPipelineInput = { |             const pipelineInput: ChatPipelineInput = { | ||||||
|                 messages: session.messages.map(msg => ({ |                 messages: session.messages.map(msg => ({ | ||||||
| @ -452,30 +501,109 @@ class RestChatService { | |||||||
|                 query: content, |                 query: content, | ||||||
|                 noteId: session.noteContext ?? undefined, |                 noteId: session.noteContext ?? undefined, | ||||||
|                 showThinking: showThinking, |                 showThinking: showThinking, | ||||||
|                 options: { |                 options: pipelineOptions, | ||||||
|                     useAdvancedContext: useAdvancedContext, |  | ||||||
|                     systemPrompt: session.messages.find(m => m.role === 'system')?.content, |  | ||||||
|                     temperature: session.metadata.temperature, |  | ||||||
|                     maxTokens: session.metadata.maxTokens, |  | ||||||
|                     model: session.metadata.model, |  | ||||||
|                     // Set stream based on request type, but ensure it's explicitly a boolean value
 |  | ||||||
|                     // GET requests or format=stream parameter indicates streaming should be used
 |  | ||||||
|                     stream: !!(req.method === 'GET' || req.query.format === 'stream') |  | ||||||
|                 }, |  | ||||||
|                 streamCallback: req.method === 'GET' ? (data, done, rawChunk) => { |                 streamCallback: req.method === 'GET' ? (data, done, rawChunk) => { | ||||||
|                     // Prepare response data - include both the content and raw chunk data if available
 |                     try { | ||||||
|                     const responseData: any = { content: data, done }; |                         // Send a single WebSocket message that contains everything needed
 | ||||||
|                      |                         // Only accumulate content that's actually text (not tool execution or thinking info)
 | ||||||
|                     // If there's tool execution information, add it to the response
 |                         if (data) { | ||||||
|                     if (rawChunk && rawChunk.toolExecution) { |                             messageContent += data; | ||||||
|                         responseData.toolExecution = rawChunk.toolExecution; |                         } | ||||||
|                     } | 
 | ||||||
|                      |                         // Create a message object with all necessary fields
 | ||||||
|                     // Send the data as a JSON event
 |                         const message: LLMStreamMessage = { | ||||||
|                     res.write(`data: ${JSON.stringify(responseData)}\n\n`); |                             type: 'llm-stream', | ||||||
|                      |                             sessionId | ||||||
|                     if (done) { |                         }; | ||||||
|                         res.end(); | 
 | ||||||
|  |                         // Add content if available - either the new chunk or full content on completion
 | ||||||
|  |                         if (data) { | ||||||
|  |                             message.content = data; | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Add thinking info if available in the raw chunk
 | ||||||
|  |                         if (rawChunk?.thinking) { | ||||||
|  |                             message.thinking = rawChunk.thinking; | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Add tool execution info if available in the raw chunk
 | ||||||
|  |                         if (rawChunk?.toolExecution) { | ||||||
|  |                             message.toolExecution = rawChunk.toolExecution; | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Set done flag explicitly
 | ||||||
|  |                         message.done = done; | ||||||
|  | 
 | ||||||
|  |                         // On final message, include the complete content too
 | ||||||
|  |                         if (done) { | ||||||
|  |                             streamFinished = true; | ||||||
|  | 
 | ||||||
|  |                             // Always send the accumulated content with the done=true message
 | ||||||
|  |                             // This ensures the client receives the complete content even if earlier messages were missed
 | ||||||
|  |                             message.content = messageContent; | ||||||
|  | 
 | ||||||
|  |                             log.info(`Stream complete, sending final message with ${messageContent.length} chars of content`); | ||||||
|  | 
 | ||||||
|  |                             // Store the response in the session when done
 | ||||||
|  |                             session.messages.push({ | ||||||
|  |                                 role: 'assistant', | ||||||
|  |                                 content: messageContent, | ||||||
|  |                                 timestamp: new Date() | ||||||
|  |                             }); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Send message to all clients
 | ||||||
|  |                         wsService.default.sendMessageToAllClients(message); | ||||||
|  | 
 | ||||||
|  |                         // Log what was sent (first message and completion)
 | ||||||
|  |                         if (message.thinking || done) { | ||||||
|  |                             log.info( | ||||||
|  |                                 `[WS-SERVER] Sending LLM stream message: sessionId=${sessionId}, content=${!!message.content}, contentLength=${message.content?.length || 0}, thinking=${!!message.thinking}, toolExecution=${!!message.toolExecution}, done=${done}` | ||||||
|  |                             ); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // For GET requests, also send as server-sent events
 | ||||||
|  |                         // Prepare response data for JSON event
 | ||||||
|  |                         const responseData: any = { | ||||||
|  |                             content: data, | ||||||
|  |                             done | ||||||
|  |                         }; | ||||||
|  | 
 | ||||||
|  |                         // Add tool execution if available
 | ||||||
|  |                         if (rawChunk?.toolExecution) { | ||||||
|  |                             responseData.toolExecution = rawChunk.toolExecution; | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Send the data as a JSON event
 | ||||||
|  |                         res.write(`data: ${JSON.stringify(responseData)}\n\n`); | ||||||
|  | 
 | ||||||
|  |                         if (done) { | ||||||
|  |                             res.end(); | ||||||
|  |                         } | ||||||
|  |                     } catch (error) { | ||||||
|  |                         log.error(`Error in stream callback: ${error}`); | ||||||
|  | 
 | ||||||
|  |                         // Try to send error message
 | ||||||
|  |                         try { | ||||||
|  |                             wsService.default.sendMessageToAllClients({ | ||||||
|  |                                 type: 'llm-stream', | ||||||
|  |                                 sessionId, | ||||||
|  |                                 error: `Stream error: ${error instanceof Error ? error.message : 'Unknown error'}`, | ||||||
|  |                                 done: true | ||||||
|  |                             }); | ||||||
|  |                         } catch (e) { | ||||||
|  |                             log.error(`Failed to send error message: ${e}`); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // End the response if not already done
 | ||||||
|  |                         try { | ||||||
|  |                             if (!streamFinished) { | ||||||
|  |                                 res.write(`data: ${JSON.stringify({ error: 'Stream error', done: true })}\n\n`); | ||||||
|  |                                 res.end(); | ||||||
|  |                             } | ||||||
|  |                         } catch (e) { | ||||||
|  |                             log.error(`Failed to end response: ${e}`); | ||||||
|  |                         } | ||||||
|                     } |                     } | ||||||
|                 } : undefined |                 } : undefined | ||||||
|             }; |             }; | ||||||
| @ -613,7 +741,7 @@ class RestChatService { | |||||||
| 
 | 
 | ||||||
|                         // Make a follow-up request with the tool results
 |                         // Make a follow-up request with the tool results
 | ||||||
|                         log.info(`Making follow-up request with ${toolResults.length} tool results`); |                         log.info(`Making follow-up request with ${toolResults.length} tool results`); | ||||||
|                         const followUpOptions = {...chatOptions, enableTools: iterationCount < MAX_ITERATIONS}; // Enable tools for follow-up but limit iterations
 |                         const followUpOptions = { ...chatOptions, enableTools: iterationCount < MAX_ITERATIONS }; // Enable tools for follow-up but limit iterations
 | ||||||
|                         const followUpResponse = await service.generateChatCompletion(currentMessages, followUpOptions); |                         const followUpResponse = await service.generateChatCompletion(currentMessages, followUpOptions); | ||||||
| 
 | 
 | ||||||
|                         // Check if the follow-up response has more tool calls
 |                         // Check if the follow-up response has more tool calls
 | ||||||
| @ -740,7 +868,10 @@ class RestChatService { | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /** |     /** | ||||||
|      * Handle streaming response from LLM |      * Handle streaming response via WebSocket | ||||||
|  |      * | ||||||
|  |      * This method processes LLM responses and sends them incrementally via WebSocket | ||||||
|  |      * to the client, supporting both text content and tool execution status updates. | ||||||
|      */ |      */ | ||||||
|     private async handleStreamingResponse( |     private async handleStreamingResponse( | ||||||
|         res: Response, |         res: Response, | ||||||
| @ -749,133 +880,211 @@ class RestChatService { | |||||||
|         service: any, |         service: any, | ||||||
|         session: ChatSession |         session: ChatSession | ||||||
|     ) { |     ) { | ||||||
|         // Set streaming headers once
 |         // The client receives a success response for their HTTP request,
 | ||||||
|         res.setHeader('Content-Type', 'text/event-stream'); |         // but the actual content will be streamed via WebSocket
 | ||||||
|         res.setHeader('Cache-Control', 'no-cache'); |         res.json({ success: true, message: 'Streaming response started' }); | ||||||
|         res.setHeader('Connection', 'keep-alive'); |  | ||||||
| 
 | 
 | ||||||
|         // Flag to indicate we've handled the response directly
 |         // Import the WebSocket service
 | ||||||
|         // This lets the route handler know not to process the result
 |         const wsService = (await import('../../services/ws.js')).default; | ||||||
|         (res as any).triliumResponseHandled = true; |  | ||||||
| 
 | 
 | ||||||
|         let messageContent = ''; |         let messageContent = ''; | ||||||
|  |         const sessionId = session.id; | ||||||
|  | 
 | ||||||
|  |         // Immediately send an initial message to confirm WebSocket connection is working
 | ||||||
|  |         // This helps prevent timeouts on the client side
 | ||||||
|  |         wsService.sendMessageToAllClients({ | ||||||
|  |             type: 'llm-stream', | ||||||
|  |             sessionId, | ||||||
|  |             thinking: 'Preparing response...' | ||||||
|  |         } as LLMStreamMessage); | ||||||
| 
 | 
 | ||||||
|         try { |         try { | ||||||
|             // Use the correct method name: generateChatCompletion
 |             // Generate the LLM completion with streaming enabled
 | ||||||
|             const response = await service.generateChatCompletion(aiMessages, chatOptions); |             const response = await service.generateChatCompletion(aiMessages, { | ||||||
|  |                 ...chatOptions, | ||||||
|  |                 stream: true | ||||||
|  |             }); | ||||||
| 
 | 
 | ||||||
|             // Check for tool calls in the response
 |             // If the model doesn't support streaming via .stream() method or returns tool calls,
 | ||||||
|  |             // we'll handle it specially
 | ||||||
|             if (response.tool_calls && response.tool_calls.length > 0) { |             if (response.tool_calls && response.tool_calls.length > 0) { | ||||||
|                 log.info(`========== STREAMING TOOL CALLS DETECTED ==========`); |                 // Send thinking state notification via WebSocket
 | ||||||
|                 log.info(`Response contains ${response.tool_calls.length} tool calls, executing them...`); |                 wsService.sendMessageToAllClients({ | ||||||
|                 log.info(`CRITICAL CHECK: Tool execution is supposed to happen in the pipeline, not directly here.`); |                     type: 'llm-stream', | ||||||
|                 log.info(`If tools are being executed here instead of in the pipeline, this may be a flow issue.`); |                     sessionId, | ||||||
|                 log.info(`Response came from provider: ${response.provider || 'unknown'}, model: ${response.model || 'unknown'}`); |                     thinking: 'Analyzing tools needed for this request...' | ||||||
|  |                 } as LLMStreamMessage); | ||||||
| 
 | 
 | ||||||
|                 try { |                 try { | ||||||
|                     log.info(`========== STREAMING TOOL EXECUTION PATH ==========`); |  | ||||||
|                     log.info(`About to execute tools in streaming path (this is separate from pipeline tool execution)`); |  | ||||||
| 
 |  | ||||||
|                     // Execute the tools
 |                     // Execute the tools
 | ||||||
|                     const toolResults = await this.executeToolCalls(response); |                     const toolResults = await this.executeToolCalls(response); | ||||||
|                     log.info(`Successfully executed ${toolResults.length} tool calls in streaming path`); |  | ||||||
| 
 | 
 | ||||||
|                     // Make a follow-up request with the tool results
 |                     // For each tool execution, send progress update via WebSocket
 | ||||||
|  |                     for (const toolResult of toolResults) { | ||||||
|  |                         wsService.sendMessageToAllClients({ | ||||||
|  |                             type: 'llm-stream', | ||||||
|  |                             sessionId, | ||||||
|  |                             toolExecution: { | ||||||
|  |                                 action: 'complete', | ||||||
|  |                                 tool: toolResult.name || 'unknown', | ||||||
|  |                                 result: toolResult.content.substring(0, 100) + (toolResult.content.length > 100 ? '...' : '') | ||||||
|  |                             } | ||||||
|  |                         } as LLMStreamMessage); | ||||||
|  |                     } | ||||||
|  | 
 | ||||||
|  |                     // Make follow-up request with tool results
 | ||||||
|                     const toolMessages = [...aiMessages, { |                     const toolMessages = [...aiMessages, { | ||||||
|                         role: 'assistant', |                         role: 'assistant', | ||||||
|                         content: response.text || '', |                         content: response.text || '', | ||||||
|                         tool_calls: response.tool_calls |                         tool_calls: response.tool_calls | ||||||
|                     }, ...toolResults]; |                     }, ...toolResults]; | ||||||
| 
 | 
 | ||||||
|                     log.info(`Making follow-up request with ${toolResults.length} tool results`); |  | ||||||
| 
 |  | ||||||
|                     // Send partial response to let the client know tools are being processed
 |  | ||||||
|                     if (!res.writableEnded) { |  | ||||||
|                         res.write(`data: ${JSON.stringify({ content: "Processing tools... " })}\n\n`); |  | ||||||
|                     } |  | ||||||
| 
 |  | ||||||
|                     // Use non-streaming for the follow-up to get a complete response
 |                     // Use non-streaming for the follow-up to get a complete response
 | ||||||
|                     const followUpOptions = {...chatOptions, stream: false, enableTools: false}; // Prevent infinite loops
 |                     const followUpOptions = { ...chatOptions, stream: false, enableTools: false }; | ||||||
|                     const followUpResponse = await service.generateChatCompletion(toolMessages, followUpOptions); |                     const followUpResponse = await service.generateChatCompletion(toolMessages, followUpOptions); | ||||||
| 
 | 
 | ||||||
|                     messageContent = followUpResponse.text || ""; |                     messageContent = followUpResponse.text || ""; | ||||||
| 
 | 
 | ||||||
|                     // Send the complete response as a single chunk
 |                     // Send the complete response with done flag in the same message
 | ||||||
|                     if (!res.writableEnded) { |                     wsService.sendMessageToAllClients({ | ||||||
|                         res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`); |                         type: 'llm-stream', | ||||||
|                         res.write('data: [DONE]\n\n'); |                         sessionId, | ||||||
|                         res.end(); |                         content: messageContent, | ||||||
|                     } |                         done: true | ||||||
|  |                     } as LLMStreamMessage); | ||||||
| 
 | 
 | ||||||
|                     // Store the full response for the session
 |                     // Store the response in the session
 | ||||||
|                     session.messages.push({ |                     session.messages.push({ | ||||||
|                         role: 'assistant', |                         role: 'assistant', | ||||||
|                         content: messageContent, |                         content: messageContent, | ||||||
|                         timestamp: new Date() |                         timestamp: new Date() | ||||||
|                     }); |                     }); | ||||||
| 
 | 
 | ||||||
|                     return; // Skip the rest of the processing
 |                     return; | ||||||
|                 } catch (toolError) { |                 } catch (toolError) { | ||||||
|                     log.error(`Error executing tools: ${toolError}`); |                     log.error(`Error executing tools: ${toolError}`); | ||||||
|                     // Continue with normal streaming response as fallback
 | 
 | ||||||
|  |                     // Send error via WebSocket with done flag
 | ||||||
|  |                     wsService.sendMessageToAllClients({ | ||||||
|  |                         type: 'llm-stream', | ||||||
|  |                         sessionId, | ||||||
|  |                         error: `Error executing tools: ${toolError instanceof Error ? toolError.message : 'Unknown error'}`, | ||||||
|  |                         done: true | ||||||
|  |                     } as LLMStreamMessage); | ||||||
|  | 
 | ||||||
|  |                     return; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Handle streaming if the response includes a stream method
 |             // Handle standard streaming through the stream() method
 | ||||||
|             if (response.stream) { |             if (response.stream) { | ||||||
|                 await response.stream((chunk: { text: string; done: boolean }) => { |                 log.info(`Provider ${service.getName()} supports streaming via stream() method`); | ||||||
|                     if (chunk.text) { |  | ||||||
|                         messageContent += chunk.text; |  | ||||||
|                         // Only write if the response hasn't finished
 |  | ||||||
|                         if (!res.writableEnded) { |  | ||||||
|                             res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
| 
 | 
 | ||||||
|                     if (chunk.done) { |                 try { | ||||||
|                         // Signal the end of the stream when done, only if not already ended
 |                     await response.stream(async (chunk: StreamChunk) => { | ||||||
|                         if (!res.writableEnded) { |                         if (chunk.text) { | ||||||
|                             res.write('data: [DONE]\n\n'); |                             messageContent += chunk.text; | ||||||
|                             res.end(); | 
 | ||||||
|  |                             // Send the chunk content via WebSocket
 | ||||||
|  |                             wsService.sendMessageToAllClients({ | ||||||
|  |                                 type: 'llm-stream', | ||||||
|  |                                 sessionId, | ||||||
|  |                                 content: chunk.text, | ||||||
|  |                                 // Include any raw data from the provider that might contain thinking/tool info
 | ||||||
|  |                                 ...(chunk.raw ? { raw: chunk.raw } : {}) | ||||||
|  |                             } as LLMStreamMessage); | ||||||
|  | 
 | ||||||
|  |                             // Log the first chunk (useful for debugging)
 | ||||||
|  |                             if (messageContent.length === chunk.text.length) { | ||||||
|  |                                 log.info(`First stream chunk received from ${service.getName()}`); | ||||||
|  |                             } | ||||||
|                         } |                         } | ||||||
|                     } | 
 | ||||||
|                 }); |                         // If the provider indicates this is "thinking" state, relay that
 | ||||||
|             } else { |                         if (chunk.raw?.thinking) { | ||||||
|                 // If no streaming available, send the response as a single chunk
 |                             wsService.sendMessageToAllClients({ | ||||||
|                 messageContent = response.text; |                                 type: 'llm-stream', | ||||||
|                 // Only write if the response hasn't finished
 |                                 sessionId, | ||||||
|                 if (!res.writableEnded) { |                                 thinking: chunk.raw.thinking | ||||||
|                     res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`); |                             } as LLMStreamMessage); | ||||||
|                     res.write('data: [DONE]\n\n'); |                         } | ||||||
|                     res.end(); | 
 | ||||||
|  |                         // If the provider indicates tool execution, relay that
 | ||||||
|  |                         if (chunk.raw?.toolExecution) { | ||||||
|  |                             wsService.sendMessageToAllClients({ | ||||||
|  |                                 type: 'llm-stream', | ||||||
|  |                                 sessionId, | ||||||
|  |                                 toolExecution: chunk.raw.toolExecution | ||||||
|  |                             } as LLMStreamMessage); | ||||||
|  |                         } | ||||||
|  | 
 | ||||||
|  |                         // Signal completion when done
 | ||||||
|  |                         if (chunk.done) { | ||||||
|  |                             log.info(`Stream completed from ${service.getName()}`); | ||||||
|  | 
 | ||||||
|  |                             // Send the final message with both content and done flag together
 | ||||||
|  |                             wsService.sendMessageToAllClients({ | ||||||
|  |                                 type: 'llm-stream', | ||||||
|  |                                 sessionId, | ||||||
|  |                                 content: messageContent, // Send the accumulated content
 | ||||||
|  |                                 done: true | ||||||
|  |                             } as LLMStreamMessage); | ||||||
|  |                         } | ||||||
|  |                     }); | ||||||
|  | 
 | ||||||
|  |                     log.info(`Streaming from ${service.getName()} completed successfully`); | ||||||
|  |                 } catch (streamError) { | ||||||
|  |                     log.error(`Error during streaming from ${service.getName()}: ${streamError}`); | ||||||
|  | 
 | ||||||
|  |                     // Report the error to the client
 | ||||||
|  |                     wsService.sendMessageToAllClients({ | ||||||
|  |                         type: 'llm-stream', | ||||||
|  |                         sessionId, | ||||||
|  |                         error: `Error during streaming: ${streamError instanceof Error ? streamError.message : 'Unknown error'}`, | ||||||
|  |                         done: true | ||||||
|  |                     } as LLMStreamMessage); | ||||||
|  | 
 | ||||||
|  |                     throw streamError; | ||||||
|                 } |                 } | ||||||
|  |             } else { | ||||||
|  |                 log.info(`Provider ${service.getName()} does not support streaming via stream() method, falling back to single response`); | ||||||
|  | 
 | ||||||
|  |                 // If streaming isn't available, send the entire response at once
 | ||||||
|  |                 messageContent = response.text || ''; | ||||||
|  | 
 | ||||||
|  |                 // Send via WebSocket - include both content and done flag in same message
 | ||||||
|  |                 wsService.sendMessageToAllClients({ | ||||||
|  |                     type: 'llm-stream', | ||||||
|  |                     sessionId, | ||||||
|  |                     content: messageContent, | ||||||
|  |                     done: true | ||||||
|  |                 } as LLMStreamMessage); | ||||||
|  | 
 | ||||||
|  |                 log.info(`Complete response sent for ${service.getName()}`); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Store the full response for the session
 |             // Store the full response in the session
 | ||||||
|             const aiResponse = messageContent; |  | ||||||
| 
 |  | ||||||
|             // Store the assistant's response in the session
 |  | ||||||
|             session.messages.push({ |             session.messages.push({ | ||||||
|                 role: 'assistant', |                 role: 'assistant', | ||||||
|                 content: aiResponse, |                 content: messageContent, | ||||||
|                 timestamp: new Date() |                 timestamp: new Date() | ||||||
|             }); |             }); | ||||||
|         } catch (streamingError: any) { |         } catch (streamingError: any) { | ||||||
|             // If streaming fails and we haven't sent a response yet, throw the error
 |             log.error(`Streaming error: ${streamingError.message}`); | ||||||
|             if (!res.headersSent) { | 
 | ||||||
|                 throw streamingError; |             // Send error via WebSocket
 | ||||||
|             } else { |             wsService.sendMessageToAllClients({ | ||||||
|                 // If headers were already sent, try to send an error event
 |                 type: 'llm-stream', | ||||||
|                 try { |                 sessionId, | ||||||
|                     if (!res.writableEnded) { |                 error: `Error generating response: ${streamingError instanceof Error ? streamingError.message : 'Unknown error'}` | ||||||
|                         res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`); |             } as LLMStreamMessage); | ||||||
|                         res.write('data: [DONE]\n\n'); | 
 | ||||||
|                         res.end(); |             // Signal completion
 | ||||||
|                     } |             wsService.sendMessageToAllClients({ | ||||||
|                 } catch (e) { |                 type: 'llm-stream', | ||||||
|                     log.error(`Failed to write streaming error: ${e}`); |                 sessionId, | ||||||
|                 } |                 done: true | ||||||
|             } |             } as LLMStreamMessage); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -56,6 +56,21 @@ interface Message { | |||||||
|     originEntityId?: string | null; |     originEntityId?: string | null; | ||||||
|     lastModifiedMs?: number; |     lastModifiedMs?: number; | ||||||
|     filePath?: string; |     filePath?: string; | ||||||
|  |      | ||||||
|  |     // LLM streaming specific fields
 | ||||||
|  |     sessionId?: string; | ||||||
|  |     content?: string; | ||||||
|  |     thinking?: string; | ||||||
|  |     toolExecution?: { | ||||||
|  |         action?: string; | ||||||
|  |         tool?: string; | ||||||
|  |         result?: string; | ||||||
|  |         error?: string; | ||||||
|  |         args?: Record<string, unknown>; | ||||||
|  |     }; | ||||||
|  |     done?: boolean; | ||||||
|  |     error?: string; | ||||||
|  |     raw?: unknown; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| type SessionParser = (req: IncomingMessage, params: {}, cb: () => void) => void; | type SessionParser = (req: IncomingMessage, params: {}, cb: () => void) => void; | ||||||
| @ -115,15 +130,25 @@ function sendMessageToAllClients(message: Message) { | |||||||
|     const jsonStr = JSON.stringify(message); |     const jsonStr = JSON.stringify(message); | ||||||
| 
 | 
 | ||||||
|     if (webSocketServer) { |     if (webSocketServer) { | ||||||
|         if (message.type !== "sync-failed" && message.type !== "api-log-messages") { |         // Special logging for LLM streaming messages
 | ||||||
|  |         if (message.type === "llm-stream") { | ||||||
|  |             log.info(`[WS-SERVER] Sending LLM stream message: sessionId=${message.sessionId}, content=${!!message.content}, thinking=${!!message.thinking}, toolExecution=${!!message.toolExecution}, done=${!!message.done}`); | ||||||
|  |         } else if (message.type !== "sync-failed" && message.type !== "api-log-messages") { | ||||||
|             log.info(`Sending message to all clients: ${jsonStr}`); |             log.info(`Sending message to all clients: ${jsonStr}`); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         let clientCount = 0; | ||||||
|         webSocketServer.clients.forEach(function each(client) { |         webSocketServer.clients.forEach(function each(client) { | ||||||
|             if (client.readyState === WebSocket.OPEN) { |             if (client.readyState === WebSocket.OPEN) { | ||||||
|                 client.send(jsonStr); |                 client.send(jsonStr); | ||||||
|  |                 clientCount++; | ||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|  |          | ||||||
|  |         // Log WebSocket client count for debugging
 | ||||||
|  |         if (message.type === "llm-stream") { | ||||||
|  |             log.info(`[WS-SERVER] Sent LLM stream message to ${clientCount} clients`); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 perf3ct
						perf3ct