From ea4d3ac8002b8566bcede64cfa7c39b84f98fca2 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Fri, 28 Mar 2025 22:29:33 +0000
Subject: [PATCH] Do a better job with Ollama context, again

---
 src/routes/api/llm.ts                         |  53 +++---
 src/services/llm/chat_service.ts              |  17 +-
 .../llm/context/modules/context_formatter.ts  | 155 ++++++++++++++----
 src/services/llm/context_service.ts           |  72 ++++++++
 src/services/llm/providers/ollama_service.ts  | 154 +++++++++++++++--
 5 files changed, 378 insertions(+), 73 deletions(-)

diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts
index 693f5e839..a65c74fff 100644
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -19,7 +19,7 @@ import { CONTEXT_PROMPTS } from '../../services/llm/constants/llm_prompt_constan
 export const LLM_CONSTANTS = {
     // Context window sizes (in characters)
     CONTEXT_WINDOW: {
-        OLLAMA: 6000,
+        OLLAMA: 8000,
         OPENAI: 12000,
         ANTHROPIC: 15000,
         VOYAGE: 12000,
@@ -61,6 +61,8 @@ export const LLM_CONSTANTS = {
     // Model-specific context windows for Ollama models
     OLLAMA_MODEL_CONTEXT_WINDOWS: {
         "llama3": 8192,
+        "llama3.1": 8192,
+        "llama3.2": 8192,
         "mistral": 8192,
         "nomic": 32768,
         "mxbai": 32768,
@@ -954,20 +956,32 @@ async function sendMessage(req: Request, res: Response) {
                 log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
                 log.info(`Number of notes included: ${sourceNotes.length}`);
 
-                // Format all messages for the AI (advanced context case)
-                const aiMessages: Message[] = [
-                    contextMessage,
-                    ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
+                // Get messages with context properly formatted for the specific LLM provider
+                const aiMessages = contextService.buildMessagesWithContext(
+                    session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
-                    }))
-                ];
+                    })),
+                    context,
+                    service
+                );
+
+                // Add enhanced debug logging
+                if (service.constructor.name === 'OllamaService') {
+                    // Log condensed version of the context so we can see if it's being properly formatted
+                    console.log(`Sending context to Ollama with length: ${context.length} chars`);
+                    console.log(`Context first 200 chars: ${context.substring(0, 200).replace(/\n/g, '\\n')}...`);
+                    console.log(`Context last 200 chars: ${context.substring(context.length - 200).replace(/\n/g, '\\n')}...`);
+
+                    // Log the first user message to verify context injection is working
+                    const userMsg = aiMessages.find(m => m.role === 'user');
+                    if (userMsg) {
+                        console.log(`First user message (first 200 chars): ${userMsg.content.substring(0, 200).replace(/\n/g, '\\n')}...`);
+                    }
+                }
 
                 // DEBUG: Log message structure being sent to LLM
                 log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`);
-                aiMessages.forEach((msg, idx) => {
-                    log.info(`Message ${idx}: role=${msg.role}, content length=${msg.content.length} chars, begins with: "${msg.content.substring(0, 50)}..."`);
-                });
 
                 // Configure chat options from session metadata
                 const chatOptions: ChatCompletionOptions = {
@@ -1089,20 +1103,15 @@ async function sendMessage(req: Request, res: Response) {
                 // Build context from relevant notes
                 const context = buildContextFromNotes(relevantNotes, messageContent);
 
-                // Add system message with the context
-                const contextMessage: Message = {
-                    role: 'system',
-                    content: context
-                };
-
-                // Format all messages for the AI (original approach)
-                const aiMessages: Message[] = [
-                    contextMessage,
-                    ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
+                // Get messages with context properly formatted for the specific LLM provider
+                const aiMessages = contextService.buildMessagesWithContext(
+                    session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
-                    }))
-                ];
+                    })),
+                    context,
+                    service
+                );
 
                 // Configure chat options from session metadata
                 const chatOptions: ChatCompletionOptions = {
diff --git a/src/services/llm/chat_service.ts b/src/services/llm/chat_service.ts
index f2ba2bc68..f155248da 100644
--- a/src/services/llm/chat_service.ts
+++ b/src/services/llm/chat_service.ts
@@ -264,17 +264,12 @@ export class ChatService {
                 showThinking
             );
 
-            // Prepend a system message with context
-            const systemMessage: Message = {
-                role: 'system',
-                content: CONTEXT_PROMPTS.CONTEXT_AWARE_SYSTEM_PROMPT.replace(
-                    '{enhancedContext}',
-                    enhancedContext
-                )
-            };
-
-            // Create messages array with system message
-            const messagesWithContext = [systemMessage, ...session.messages];
+            // Create messages array with context using the improved method
+            const messagesWithContext = contextService.buildMessagesWithContext(
+                session.messages,
+                enhancedContext,
+                aiServiceManager.getService() // Get the default service
+            );
 
             // Generate AI response
             const response = await aiServiceManager.generateChatCompletion(
diff --git a/src/services/llm/context/modules/context_formatter.ts b/src/services/llm/context/modules/context_formatter.ts
index b85a284b2..91f1ac160 100644
--- a/src/services/llm/context/modules/context_formatter.ts
+++ b/src/services/llm/context/modules/context_formatter.ts
@@ -7,7 +7,7 @@ import type { IContextFormatter, NoteSearchResult } from '../../interfaces/conte
 const CONTEXT_WINDOW = {
     OPENAI: 16000,
     ANTHROPIC: 100000,
-    OLLAMA: 8000,
+    OLLAMA: 4000,  // Reduced to avoid issues
     DEFAULT: 4000
 };
 
@@ -42,20 +42,25 @@ export class ContextFormatter implements IContextFormatter {
 
             // DEBUG: Log context window size
             log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`);
-            log.info(`Formatting context from ${sources.length} sources for query: "${query.substring(0, 50)}..."`);
+            log.info(`Building context from notes with query: ${query}`);
+            log.info(`Sources length: ${sources.length}`);
 
-            // Use a format appropriate for the model family
-            const isAnthropicFormat = providerId === 'anthropic';
+            // Use provider-specific formatting
+            let formattedContext = '';
 
-            // Start with different headers based on provider
-            let formattedContext = isAnthropicFormat
-                ? CONTEXT_PROMPTS.CONTEXT_HEADERS.ANTHROPIC(query)
-                : CONTEXT_PROMPTS.CONTEXT_HEADERS.DEFAULT(query);
+            if (providerId === 'ollama') {
+                // For Ollama, use a much simpler plain text format that's less prone to encoding issues
+                formattedContext = `# Context for your query: "${query}"\n\n`;
+            } else if (providerId === 'anthropic') {
+                formattedContext = CONTEXT_PROMPTS.CONTEXT_HEADERS.ANTHROPIC(query);
+            } else {
+                formattedContext = CONTEXT_PROMPTS.CONTEXT_HEADERS.DEFAULT(query);
+            }
 
             // Sort sources by similarity if available to prioritize most relevant
             if (sources[0] && sources[0].similarity !== undefined) {
                 sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
-                // DEBUG: Log sorting information
+                // Log sorting information
                 log.info(`Sources sorted by similarity. Top sources: ${sources.slice(0, 3).map(s => s.title || 'Untitled').join(', ')}`);
             }
 
@@ -63,7 +68,7 @@ export class ContextFormatter implements IContextFormatter {
             let totalSize = formattedContext.length;
             const formattedSources: string[] = [];
 
-            // DEBUG: Track stats for logging
+            // Track stats for logging
             let sourcesProcessed = 0;
             let sourcesIncluded = 0;
             let sourcesSkipped = 0;
@@ -73,10 +78,18 @@ export class ContextFormatter implements IContextFormatter {
             for (const source of sources) {
                 sourcesProcessed++;
                 let content = '';
+                let title = 'Untitled Note';
+
                 if (typeof source === 'string') {
                     content = source;
                 } else if (source.content) {
-                    content = this.sanitizeNoteContent(source.content, source.type, source.mime);
+                    // For Ollama, use a more aggressive sanitization to avoid encoding issues
+                    if (providerId === 'ollama') {
+                        content = this.sanitizeForOllama(source.content);
+                    } else {
+                        content = this.sanitizeNoteContent(source.content, source.type, source.mime);
+                    }
+                    title = source.title || title;
                 } else {
                     sourcesSkipped++;
                     log.info(`Skipping note with no content: ${source.title || 'Untitled'}`);
@@ -86,14 +99,18 @@ export class ContextFormatter implements IContextFormatter {
                 // Skip if content is empty or just whitespace/minimal
                 if (!content || content.trim().length <= 10) {
                     sourcesSkipped++;
-                    log.info(`Skipping note with minimal content: ${source.title || 'Untitled'}`);
+                    log.info(`Skipping note with minimal content: ${title}`);
                     continue;
                 }
 
-                // Format source with title if available
-                const title = source.title || 'Untitled Note';
-                const noteId = source.noteId || '';
-                const formattedSource = `### ${title}\n${content}\n`;
+                // Format source with title - use simple format for Ollama
+                let formattedSource = '';
+                if (providerId === 'ollama') {
+                    // For Ollama, use a simpler format and plain ASCII
+                    formattedSource = `## ${title}\n${content}\n\n`;
+                } else {
+                    formattedSource = `### ${title}\n${content}\n\n`;
+                }
 
                 // Check if adding this would exceed our size limit
                 if (totalSize + formattedSource.length > maxTotalLength) {
@@ -102,12 +119,13 @@ export class ContextFormatter implements IContextFormatter {
                     if (formattedSources.length === 0) {
                         const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text
                         if (availableSpace > 200) { // Only if we have reasonable space
-                            const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`;
+                            const truncatedContent = providerId === 'ollama' ?
+                                `## ${title}\n${content.substring(0, availableSpace)}...\n\n` :
+                                `### ${title}\n${content.substring(0, availableSpace)}...\n\n`;
                             formattedSources.push(truncatedContent);
                             totalSize += truncatedContent.length;
                             sourcesIncluded++;
-                            // DEBUG: Log truncation
-                            log.info(`Truncated first source "${title}" to fit in context window. Used ${truncatedContent.length} of ${formattedSource.length} chars`);
+                            log.info(`Truncated first source "${title}" to fit in context window`);
                         }
                     }
                     break;
@@ -118,24 +136,29 @@ export class ContextFormatter implements IContextFormatter {
                 sourcesIncluded++;
             }
 
-            // DEBUG: Log sources stats
+            // Log sources stats
             log.info(`Context building stats: processed ${sourcesProcessed}/${sources.length} sources, included ${sourcesIncluded}, skipped ${sourcesSkipped}, exceeded limit ${sourcesExceededLimit}`);
             log.info(`Context size so far: ${totalSize}/${maxTotalLength} chars (${(totalSize/maxTotalLength*100).toFixed(2)}% of limit)`);
 
             // Add the formatted sources to the context
-            formattedContext += formattedSources.join('\n');
+            formattedContext += formattedSources.join('');
 
-            // Add closing to provide instructions to the AI
-            const closing = isAnthropicFormat
-                ? CONTEXT_PROMPTS.CONTEXT_CLOSINGS.ANTHROPIC
-                : CONTEXT_PROMPTS.CONTEXT_CLOSINGS.DEFAULT;
+            // Add closing to provide instructions to the AI - use simpler version for Ollama
+            let closing = '';
+            if (providerId === 'ollama') {
+                closing = '\n\nPlease use the information above to answer the query and keep your response concise.';
+            } else if (providerId === 'anthropic') {
+                closing = CONTEXT_PROMPTS.CONTEXT_CLOSINGS.ANTHROPIC;
+            } else {
+                closing = CONTEXT_PROMPTS.CONTEXT_CLOSINGS.DEFAULT;
+            }
 
             // Check if adding the closing would exceed our limit
             if (totalSize + closing.length <= maxTotalLength) {
                 formattedContext += closing;
             }
 
-            // DEBUG: Log final context size
+            // Log final context size
             log.info(`Final context: ${formattedContext.length} chars, ${formattedSources.length} sources included`);
 
             return formattedContext;
@@ -161,18 +184,52 @@ export class ContextFormatter implements IContextFormatter {
         try {
             // If it's HTML content, sanitize it
             if (mime === 'text/html' || type === 'text') {
-                // Use sanitize-html to convert HTML to plain text
-                const sanitized = sanitizeHtml(content, {
+                // First, try to preserve some structure by converting to markdown-like format
+                const contentWithMarkdown = content
+                    // Convert headers
+                    .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
+                    .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
+                    .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
+                    .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
+                    .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
+                    // Convert lists
+                    .replace(/<\/?ul[^>]*>/g, '\n')
+                    .replace(/<\/?ol[^>]*>/g, '\n')
+                    .replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
+                    // Convert links
+                    .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
+                    // Convert code blocks
+                    .replace(/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```')
+                    .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
+                    // Convert emphasis
+                    .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
+                    .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
+                    .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
+                    .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
+                    // Handle paragraphs better
+                    .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
+                    // Handle line breaks
+                    .replace(/<br\s*\/?>/gi, '\n');
+
+                // Then use sanitize-html to remove remaining HTML
+                const sanitized = sanitizeHtml(contentWithMarkdown, {
                     allowedTags: [], // No tags allowed (strip all HTML)
                     allowedAttributes: {}, // No attributes allowed
                     textFilter: function(text) {
                         return text
                             .replace(/&nbsp;/g, ' ')
+                            .replace(/&lt;/g, '<')
+                            .replace(/&gt;/g, '>')
+                            .replace(/&amp;/g, '&')
+                            .replace(/&quot;/g, '"')
                             .replace(/\n\s*\n\s*\n/g, '\n\n'); // Replace multiple blank lines with just one
                     }
                 });
 
-                return sanitized.trim();
+                // Remove unnecessary whitespace while preserving meaningful structure
+                return sanitized
+                    .replace(/\n{3,}/g, '\n\n')  // no more than 2 consecutive newlines
+                    .trim();
             }
 
             // If it's code, keep formatting but limit size
@@ -191,6 +248,46 @@ export class ContextFormatter implements IContextFormatter {
             return content; // Return original content if sanitization fails
         }
     }
+
+    /**
+     * Special sanitization for Ollama that removes all non-ASCII characters
+     * and simplifies formatting to avoid encoding issues
+     */
+    sanitizeForOllama(content: string): string {
+        if (!content) {
+            return '';
+        }
+
+        try {
+            // First remove any HTML
+            let plaintext = sanitizeHtml(content, {
+                allowedTags: [],
+                allowedAttributes: {},
+                textFilter: (text) => text
+            });
+
+            // Then aggressively sanitize to plain ASCII and simple formatting
+            plaintext = plaintext
+                // Replace common problematic quotes with simple ASCII quotes
+                .replace(/[""]/g, '"')
+                .replace(/['']/g, "'")
+                // Replace other common Unicode characters
+                .replace(/[–—]/g, '-')
+                .replace(/[•]/g, '*')
+                .replace(/[…]/g, '...')
+                // Strip all non-ASCII characters
+                .replace(/[^\x00-\x7F]/g, '')
+                // Normalize whitespace
+                .replace(/\s+/g, ' ')
+                .replace(/\n\s+/g, '\n')
+                .trim();
+
+            return plaintext;
+        } catch (error) {
+            log.error(`Error sanitizing note content for Ollama: ${error}`);
+            return ''; // Return empty if sanitization fails
+        }
+    }
 }
 
 // Export singleton instance
diff --git a/src/services/llm/context_service.ts b/src/services/llm/context_service.ts
index 4370cf731..bf6fa14ef 100644
--- a/src/services/llm/context_service.ts
+++ b/src/services/llm/context_service.ts
@@ -9,6 +9,8 @@ import log from '../log.js';
 import contextService from './context/modules/context_service.js';
 import { ContextExtractor } from './context/index.js';
 import type { NoteSearchResult } from './interfaces/context_interfaces.js';
+import type { Message } from './ai_interface.js';
+import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js';
 
 /**
  * Main Context Service for Trilium Notes
@@ -185,6 +187,76 @@ class TriliumContextService {
     clearCaches(): void {
         return contextService.clearCaches();
     }
+
+    /**
+     * Build messages with proper context for an LLM-enhanced chat
+     */
+    buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] {
+        // For simple conversations just add context to the system message
+        try {
+            if (!messages || messages.length === 0) {
+                return [{ role: 'system', content: context }];
+            }
+
+            const result: Message[] = [];
+            let hasSystemMessage = false;
+
+            // First pass: identify if there's a system message
+            for (const msg of messages) {
+                if (msg.role === 'system') {
+                    hasSystemMessage = true;
+                    break;
+                }
+            }
+
+            // If we have a system message, prepend context to it
+            // Otherwise create a new system message with the context
+            if (hasSystemMessage) {
+                for (const msg of messages) {
+                    if (msg.role === 'system') {
+                        // For Ollama, use a cleaner approach with just one system message
+                        if (llmService.constructor.name === 'OllamaService') {
+                            // If this is the first system message we've seen,
+                            // add context to it, otherwise skip (Ollama handles multiple
+                            // system messages poorly)
+                            if (result.findIndex(m => m.role === 'system') === -1) {
+                                result.push({
+                                    role: 'system',
+                                    content: `${context}\n\n${msg.content}`
+                                });
+                            }
+                        } else {
+                            // For other providers, include all system messages
+                            result.push({
+                                role: 'system',
+                                content: msg.content.includes(context) ?
+                                    msg.content : // Avoid duplicate context
+                                    `${context}\n\n${msg.content}`
+                            });
+                        }
+                    } else {
+                        result.push(msg);
+                    }
+                }
+            } else {
+                // No system message found, prepend one with the context
+                result.push({ role: 'system', content: context });
+                // Add all the original messages
+                result.push(...messages);
+            }
+
+            return result;
+        } catch (error) {
+            log.error(`Error building messages with context: ${error}`);
+
+            // Fallback: prepend a system message with context
+            const safeMessages = Array.isArray(messages) ? messages : [];
+            return [
+                { role: 'system', content: context },
+                ...safeMessages.filter(msg => msg.role !== 'system')
+            ];
+        }
+    }
 }
 
 // Export singleton instance
diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts
index 61d40db39..ff54459fb 100644
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@@ -287,28 +287,160 @@ export class OllamaService extends BaseAIService {
         }
     }
 
+    /**
+     * Clean up HTML and other problematic content before sending to Ollama
+     */
+    private cleanContextContent(content: string): string {
+        if (!content) return '';
+
+        try {
+            // First fix potential encoding issues
+            let sanitized = content
+                // Fix common encoding issues with quotes and special characters
+                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')  // Fix broken quote chars
+                .replace(/[\u00A0-\u9999]/g, match => {
+                    try {
+                        return encodeURIComponent(match).replace(/%/g, '');
+                    } catch (e) {
+                        return '';
+                    }
+                });
+
+            // Replace common HTML tags with markdown or plain text equivalents
+            sanitized = sanitized
+                // Remove HTML divs, spans, etc.
+                .replace(/<\/?div[^>]*>/g, '')
+                .replace(/<\/?span[^>]*>/g, '')
+                .replace(/<\/?p[^>]*>/g, '\n')
+                // Convert headers
+                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
+                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
+                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
+                // Convert lists
+                .replace(/<\/?ul[^>]*>/g, '')
+                .replace(/<\/?ol[^>]*>/g, '')
+                .replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
+                // Convert links
+                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
+                // Convert code blocks
+                .replace(/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```')
+                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
+                // Convert emphasis
+                .replace(/<\/?strong[^>]*>/g, '**')
+                .replace(/<\/?em[^>]*>/g, '*')
+                // Remove figure tags
+                .replace(/<\/?figure[^>]*>/g, '')
+                // Remove all other HTML tags
+                .replace(/<[^>]*>/g, '')
+                // Fix double line breaks
+                .replace(/\n\s*\n\s*\n/g, '\n\n')
+                // Fix HTML entities
+                .replace(/&nbsp;/g, ' ')
+                .replace(/&lt;/g, '<')
+                .replace(/&gt;/g, '>')
+                .replace(/&amp;/g, '&')
+                .replace(/&quot;/g, '"')
+                // Final clean whitespace
+                .replace(/\s+/g, ' ')
+                .replace(/\n\s+/g, '\n')
+                .trim();
+
+            return sanitized;
+        } catch (error) {
+            console.error("Error cleaning context content:", error);
+            return content; // Return original if cleaning fails
+        }
+    }
+
     /**
      * Format messages for the Ollama API
      */
     private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] {
         const formattedMessages: OllamaMessage[] = [];
+        const MAX_SYSTEM_CONTENT_LENGTH = 4000;
 
-        // Add system message if provided
-        if (systemPrompt) {
+        // First identify user and system messages
+        const systemMessages = messages.filter(msg => msg.role === 'system');
+        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
+
+        // In the case of Ollama, we need to ensure context is properly integrated
+        // The key insight is that simply including it in a system message doesn't work well
+
+        // Check if we have context (typically in the first system message)
+        let hasContext = false;
+        let contextContent = '';
+
+        if (systemMessages.length > 0) {
+            const potentialContext = systemMessages[0].content;
+            if (potentialContext && potentialContext.includes('# Context for your query')) {
+                hasContext = true;
+                contextContent = this.cleanContextContent(potentialContext);
+            }
+        }
+
+        // Create base system message with instructions
+        let basePrompt = systemPrompt ||
+            "You are an AI assistant integrated into TriliumNext Notes. " +
+            "Focus on helping users find information in their notes and answering questions based on their knowledge base. " +
+            "Be concise, informative, and direct when responding to queries.";
+
+        // If we have context, inject it differently - prepend it to the user's first question
+        if (hasContext && userMessages.length > 0) {
+            // Create initial system message with just the base prompt
             formattedMessages.push({
                 role: 'system',
-                content: systemPrompt
+                content: basePrompt
             });
+
+            // For user messages, inject context into the first user message
+            let injectedContext = false;
+
+            for (let i = 0; i < userMessages.length; i++) {
+                const msg = userMessages[i];
+
+                if (msg.role === 'user' && !injectedContext) {
+                    // Format the context in a way Ollama can't ignore
+                    const formattedContext =
+                        "I need you to answer based on the following information from my notes:\n\n" +
+                        "-----BEGIN MY NOTES-----\n" +
+                        contextContent +
+                        "\n-----END MY NOTES-----\n\n" +
+                        "Based on these notes, please answer: " + msg.content;
+
+                    formattedMessages.push({
+                        role: 'user',
+                        content: formattedContext
+                    });
+
+                    injectedContext = true;
+                } else {
+                    formattedMessages.push({
+                        role: msg.role,
+                        content: msg.content
+                    });
+                }
+            }
+        } else {
+            // No context or empty context case
+            // Add system message (with system prompt)
+            if (systemPrompt) {
+                formattedMessages.push({
+                    role: 'system',
+                    content: systemPrompt
+                });
+            }
+
+            // Add all user and assistant messages as-is
+            for (const msg of userMessages) {
+                formattedMessages.push({
+                    role: msg.role,
+                    content: msg.content
+                });
+            }
         }
 
-        // Add all messages
-        for (const msg of messages) {
-            // Ollama's API accepts 'user', 'assistant', and 'system' roles
-            formattedMessages.push({
-                role: msg.role,
-                content: msg.content
-            });
-        }
+        console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`);
+        console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`);
 
         return formattedMessages;
     }