diff --git a/src/public/app/widgets/llm_chat_panel.ts b/src/public/app/widgets/llm_chat_panel.ts
index be421a36a..18749c422 100644
--- a/src/public/app/widgets/llm_chat_panel.ts
+++ b/src/public/app/widgets/llm_chat_panel.ts
@@ -141,13 +141,39 @@ export default class LlmChatPanel extends BasicWidget {
             };
 
             // First, send the message via POST request
-            await server.post<any>(`llm/sessions/${this.sessionId}/messages`, messageParams);
+            const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, messageParams);
+
+            // If the POST request returned content directly, display it
+            if (postResponse && postResponse.content) {
+                this.addMessageToChat('assistant', postResponse.content);
+
+                // If there are sources, show them
+                if (postResponse.sources && postResponse.sources.length > 0) {
+                    this.showSources(postResponse.sources);
+                }
+
+                this.hideLoadingIndicator();
+                return;
+            }
 
             // Then set up streaming via EventSource
             const streamUrl = `./api/llm/sessions/${this.sessionId}/messages?format=stream&useAdvancedContext=${useAdvancedContext}`;
             const source = new EventSource(streamUrl);
 
             let assistantResponse = '';
+            let receivedAnyContent = false;
+            let timeoutId: number | null = null;
+
+            // Set a timeout to handle case where streaming doesn't work properly
+            timeoutId = window.setTimeout(() => {
+                if (!receivedAnyContent) {
+                    // If we haven't received any content after a reasonable timeout (10 seconds),
+                    // add a fallback message and close the stream
+                    this.hideLoadingIndicator();
+                    this.addMessageToChat('assistant', 'I\'m having trouble generating a response right now. Please try again later.');
+                    source.close();
+                }
+            }, 10000);
 
             // Handle streaming response
             source.onmessage = (event) => {
@@ -155,13 +181,29 @@ export default class LlmChatPanel extends BasicWidget {
                     // Stream completed
                     source.close();
                     this.hideLoadingIndicator();
+
+                    // Clear the timeout since we're done
+                    if (timeoutId !== null) {
+                        window.clearTimeout(timeoutId);
+                    }
+
+                    // If we didn't receive any content but the stream completed normally,
+                    // display a message to the user
+                    if (!receivedAnyContent) {
+                        this.addMessageToChat('assistant', 'I processed your request, but I don\'t have any specific information to share at the moment.');
+                    }
                     return;
                 }
 
                 try {
                     const data = JSON.parse(event.data);
+                    console.log("Received streaming data:", data); // Debug log
+
+                    // Handle both content and error cases
                     if (data.content) {
+                        receivedAnyContent = true;
                         assistantResponse += data.content;
+
                         // Update the UI with the accumulated response
                         const assistantElement = this.noteContextChatMessages.querySelector('.assistant-message:last-child .message-content');
                         if (assistantElement) {
@@ -169,18 +211,38 @@ export default class LlmChatPanel extends BasicWidget {
                         } else {
                             this.addMessageToChat('assistant', assistantResponse);
                         }
-                        // Scroll to the bottom
-                        this.chatContainer.scrollTop = this.chatContainer.scrollHeight;
+                    } else if (data.error) {
+                        // Handle error message
+                        this.hideLoadingIndicator();
+                        this.addMessageToChat('assistant', `Error: ${data.error}`);
+                        receivedAnyContent = true;
+                        source.close();
+
+                        if (timeoutId !== null) {
+                            window.clearTimeout(timeoutId);
+                        }
                     }
+
+                    // Scroll to the bottom
+                    this.chatContainer.scrollTop = this.chatContainer.scrollHeight;
                 } catch (e) {
-                    console.error('Error parsing SSE message:', e);
+                    console.error('Error parsing SSE message:', e, 'Raw data:', event.data);
                 }
             };
 
             source.onerror = () => {
                 source.close();
                 this.hideLoadingIndicator();
-                toastService.showError('Error connecting to the LLM service. Please try again.');
+
+                // Clear the timeout if there was an error
+                if (timeoutId !== null) {
+                    window.clearTimeout(timeoutId);
+                }
+
+                // Only show error message if we haven't received any content yet
+                if (!receivedAnyContent) {
+                    this.addMessageToChat('assistant', 'Error connecting to the LLM service. Please try again.');
+                }
             };
 
         } catch (error) {
diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts
index 4a0528a5b..021083ccf 100644
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -469,7 +469,7 @@ async function sendMessage(req: Request, res: Response) {
 
         // Get the Accept header once at the start
         const acceptHeader = req.get('Accept');
-        const isStreamingRequest = acceptHeader && acceptHeader.includes('text/event-stream');
+        const isStreamingRequest = req.method === 'GET' && req.query.format === 'stream';
 
         // For GET requests, ensure we have the format=stream parameter
         if (req.method === 'GET' && (!req.query.format || req.query.format !== 'stream')) {
@@ -580,46 +580,81 @@ async function sendMessage(req: Request, res: Response) {
 
                 // Process based on whether this is a streaming request
                 if (isStreamingRequest) {
+                    // Set streaming headers once
                     res.setHeader('Content-Type', 'text/event-stream');
                     res.setHeader('Cache-Control', 'no-cache');
                     res.setHeader('Connection', 'keep-alive');
 
+                    // Flag to indicate we've handled the response directly
+                    // This lets the route handler know not to process the result
+                    (res as any).triliumResponseHandled = true;
+
                     let messageContent = '';
 
-                    // Use the correct method name: generateChatCompletion
-                    const response = await service.generateChatCompletion(aiMessages, chatOptions);
+                    try {
+                        // Use the correct method name: generateChatCompletion
+                        const response = await service.generateChatCompletion(aiMessages, chatOptions);
 
-                    // Handle streaming if the response includes a stream method
-                    if (response.stream) {
-                        await response.stream((chunk: { text: string; done: boolean }) => {
-                            if (chunk.text) {
-                                messageContent += chunk.text;
-                                res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
-                            }
+                        // Handle streaming if the response includes a stream method
+                        if (response.stream) {
+                            await response.stream((chunk: { text: string; done: boolean }) => {
+                                if (chunk.text) {
+                                    messageContent += chunk.text;
+                                    // Only write if the response hasn't finished
+                                    if (!res.writableEnded) {
+                                        res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
+                                    }
+                                }
 
-                            if (chunk.done) {
-                                // Signal the end of the stream when done
+                                if (chunk.done) {
+                                    // Signal the end of the stream when done, only if not already ended
+                                    if (!res.writableEnded) {
+                                        res.write('data: [DONE]\n\n');
+                                        res.end();
+                                    }
+                                }
+                            });
+                        } else {
+                            // If no streaming available, send the response as a single chunk
+                            messageContent = response.text;
+                            // Only write if the response hasn't finished
+                            if (!res.writableEnded) {
+                                res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
                                 res.write('data: [DONE]\n\n');
                                 res.end();
                             }
+                        }
+
+                        // Store the full response for the session
+                        aiResponse = messageContent;
+
+                        // Store the assistant's response in the session
+                        session.messages.push({
+                            role: 'assistant',
+                            content: aiResponse,
+                            timestamp: new Date()
                         });
-                    } else {
-                        // If no streaming available, send the response as a single chunk
-                        messageContent = response.text;
-                        res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
-                        res.write('data: [DONE]\n\n');
-                        res.end();
+
+                        // For streaming requests we don't return anything as we've already sent the response
+                        return null;
+                    } catch (streamingError: any) {
+                        // If streaming fails and we haven't sent a response yet, throw the error
+                        if (!res.headersSent) {
+                            throw streamingError;
+                        } else {
+                            // If headers were already sent, try to send an error event
+                            try {
+                                if (!res.writableEnded) {
+                                    res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
+                                    res.write('data: [DONE]\n\n');
+                                    res.end();
+                                }
+                            } catch (e) {
+                                log.error(`Failed to write streaming error: ${e}`);
+                            }
+                            return null;
+                        }
                     }
-
-                    // Store the full response for the session
-                    aiResponse = messageContent;
-
-                    // Store the assistant's response in the session
-                    session.messages.push({
-                        role: 'assistant',
-                        content: aiResponse,
-                        timestamp: new Date()
-                    });
                 } else {
                     // Non-streaming approach for POST requests
                     const response = await service.generateChatCompletion(aiMessages, chatOptions);
@@ -646,7 +681,7 @@ async function sendMessage(req: Request, res: Response) {
             } else {
                 // Original approach - find relevant notes through direct embedding comparison
                 const relevantNotes = await findRelevantNotes(
-                    content,
+                    messageContent,
                     session.noteContext || null,
                     5
                 );
@@ -654,7 +689,7 @@ async function sendMessage(req: Request, res: Response) {
                 sourceNotes = relevantNotes;
 
                 // Build context from relevant notes
-                const context = buildContextFromNotes(relevantNotes, content);
+                const context = buildContextFromNotes(relevantNotes, messageContent);
 
                 // Add system message with the context
                 const contextMessage: Message = {
@@ -680,46 +715,81 @@ async function sendMessage(req: Request, res: Response) {
                 };
 
                 if (isStreamingRequest) {
+                    // Set streaming headers once
                     res.setHeader('Content-Type', 'text/event-stream');
                     res.setHeader('Cache-Control', 'no-cache');
                     res.setHeader('Connection', 'keep-alive');
 
+                    // Flag to indicate we've handled the response directly
+                    // This lets the route handler know not to process the result
+                    (res as any).triliumResponseHandled = true;
+
                     let messageContent = '';
 
-                    // Use the correct method name: generateChatCompletion
-                    const response = await service.generateChatCompletion(aiMessages, chatOptions);
+                    try {
+                        // Use the correct method name: generateChatCompletion
+                        const response = await service.generateChatCompletion(aiMessages, chatOptions);
 
-                    // Handle streaming if the response includes a stream method
-                    if (response.stream) {
-                        await response.stream((chunk: { text: string; done: boolean }) => {
-                            if (chunk.text) {
-                                messageContent += chunk.text;
-                                res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
-                            }
+                        // Handle streaming if the response includes a stream method
+                        if (response.stream) {
+                            await response.stream((chunk: { text: string; done: boolean }) => {
+                                if (chunk.text) {
+                                    messageContent += chunk.text;
+                                    // Only write if the response hasn't finished
+                                    if (!res.writableEnded) {
+                                        res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
+                                    }
+                                }
 
-                            if (chunk.done) {
-                                // Signal the end of the stream when done
+                                if (chunk.done) {
+                                    // Signal the end of the stream when done, only if not already ended
+                                    if (!res.writableEnded) {
+                                        res.write('data: [DONE]\n\n');
+                                        res.end();
+                                    }
+                                }
+                            });
+                        } else {
+                            // If no streaming available, send the response as a single chunk
+                            messageContent = response.text;
+                            // Only write if the response hasn't finished
+                            if (!res.writableEnded) {
+                                res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
                                 res.write('data: [DONE]\n\n');
                                 res.end();
                             }
+                        }
+
+                        // Store the full response for the session
+                        aiResponse = messageContent;
+
+                        // Store the assistant's response in the session
+                        session.messages.push({
+                            role: 'assistant',
+                            content: aiResponse,
+                            timestamp: new Date()
                         });
-                    } else {
-                        // If no streaming available, send the response as a single chunk
-                        messageContent = response.text;
-                        res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
-                        res.write('data: [DONE]\n\n');
-                        res.end();
+
+                        // For streaming requests we don't return anything as we've already sent the response
+                        return null;
+                    } catch (streamingError: any) {
+                        // If streaming fails and we haven't sent a response yet, throw the error
+                        if (!res.headersSent) {
+                            throw streamingError;
+                        } else {
+                            // If headers were already sent, try to send an error event
+                            try {
+                                if (!res.writableEnded) {
+                                    res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
+                                    res.write('data: [DONE]\n\n');
+                                    res.end();
+                                }
+                            } catch (e) {
+                                log.error(`Failed to write streaming error: ${e}`);
+                            }
+                            return null;
+                        }
                     }
-
-                    // Store the full response for the session
-                    aiResponse = messageContent;
-
-                    // Store the assistant's response in the session
-                    session.messages.push({
-                        role: 'assistant',
-                        content: aiResponse,
-                        timestamp: new Date()
-                    });
                 } else {
                     // Non-streaming approach for POST requests
                     const response = await service.generateChatCompletion(aiMessages, chatOptions);
@@ -744,6 +814,12 @@ async function sendMessage(req: Request, res: Response) {
                     };
                 }
             }
+        } else {
+            // If it's not a POST or streaming GET request, return the session's message history
+            return {
+                id: session.id,
+                messages: session.messages
+            };
         }
     } catch (error: any) {
         log.error(`Error sending message to LLM: ${error.message}`);
diff --git a/src/routes/routes.ts b/src/routes/routes.ts
index fa233e09f..ba1f8b7d1 100644
--- a/src/routes/routes.ts
+++ b/src/routes/routes.ts
@@ -505,8 +505,14 @@ function route(method: HttpMethod, path: string, middleware: express.Handler[],
 }
 
 function handleResponse(resultHandler: ApiResultHandler, req: express.Request, res: express.Response, result: unknown, start: number) {
-    const responseLength = resultHandler(req, res, result);
+    // Skip result handling if the response has already been handled
+    if ((res as any).triliumResponseHandled) {
+        // Just log the request without additional processing
+        log.request(req, res, Date.now() - start, 0);
+        return;
+    }
 
+    const responseLength = resultHandler(req, res, result);
     log.request(req, res, Date.now() - start, responseLength);
 }
 
diff --git a/src/services/llm/ollama_service.ts b/src/services/llm/ollama_service.ts
index c2a156a47..1c726441b 100644
--- a/src/services/llm/ollama_service.ts
+++ b/src/services/llm/ollama_service.ts
@@ -29,6 +29,10 @@ export class OllamaService extends BaseAIService {
         // Format messages for Ollama
         const formattedMessages = this.formatMessages(messages, systemPrompt);
 
+        // Log the formatted messages for debugging
+        console.log('Input messages for formatting:', messages);
+        console.log('Formatted messages for Ollama:', formattedMessages);
+
         try {
             const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`;
 
@@ -75,6 +79,7 @@ export class OllamaService extends BaseAIService {
                         const reader = response.body.getReader();
                         let fullText = "";
                         let partialLine = "";
+                        let receivedAnyContent = false;
 
                         try {
                             while (true) {
@@ -102,6 +107,7 @@ export class OllamaService extends BaseAIService {
                                             const newContent = data.message.content;
                                             // Add to full text
                                             fullText += newContent;
+                                            receivedAnyContent = true;
                                             // Call the callback with the new content
                                             await callback({
                                                 text: newContent,
@@ -110,6 +116,18 @@ export class OllamaService extends BaseAIService {
                                         }
 
                                         if (data.done) {
+                                            // If we received an empty response with done=true,
+                                            // generate a fallback response
+                                            if (!receivedAnyContent && fullText.trim() === "") {
+                                                // Generate a fallback response
+                                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
+                                                await callback({
+                                                    text: fallbackText,
+                                                    done: false
+                                                });
+                                                fullText = fallbackText;
+                                            }
+
                                             // Final message in the stream
                                             await callback({
                                                 text: "",
@@ -136,19 +154,64 @@ export class OllamaService extends BaseAIService {
                                     const data = JSON.parse(partialLine.trim());
                                     if (data.message && data.message.content) {
                                         fullText += data.message.content;
+                                        receivedAnyContent = true;
                                         await callback({
                                             text: data.message.content,
                                             done: false
                                         });
                                     }
+
+                                    if (data.done) {
+                                        // Check for empty responses
+                                        if (!receivedAnyContent && fullText.trim() === "") {
+                                            // Generate a fallback response
+                                            const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
+                                            await callback({
+                                                text: fallbackText,
+                                                done: false
+                                            });
+                                            fullText = fallbackText;
+                                        }
+
+                                        await callback({
+                                            text: "",
+                                            done: true,
+                                            usage: {
+                                                promptTokens: data.prompt_eval_count || 0,
+                                                completionTokens: data.eval_count || 0,
+                                                totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
+                                            }
+                                        });
+                                    }
                                 } catch (err) {
-                                    console.error("Error parsing final JSON from Ollama stream:", err);
+                                    console.error("Error parsing JSON from last line:", err, "Line:", partialLine);
                                 }
                             }
 
+                            // If we reached the end without a done message and without any content
+                            if (!receivedAnyContent && fullText.trim() === "") {
+                                // Generate a fallback response
+                                const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
+                                await callback({
+                                    text: fallbackText,
+                                    done: false
+                                });
+
+                                // Final message
+                                await callback({
+                                    text: "",
+                                    done: true,
+                                    usage: {
+                                        promptTokens: 0,
+                                        completionTokens: 0,
+                                        totalTokens: 0
+                                    }
+                                });
+                            }
+
                             return fullText;
                         } catch (err) {
-                            console.error("Error reading Ollama stream:", err);
+                            console.error("Error processing Ollama stream:", err);
                             throw err;
                         }
                     }