From ea4d3ac8002b8566bcede64cfa7c39b84f98fca2 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Fri, 28 Mar 2025 22:29:33 +0000 Subject: [PATCH] Do a better job with Ollama context, again --- src/routes/api/llm.ts | 53 +++--- src/services/llm/chat_service.ts | 17 +- .../llm/context/modules/context_formatter.ts | 155 ++++++++++++++---- src/services/llm/context_service.ts | 72 ++++++++ src/services/llm/providers/ollama_service.ts | 154 +++++++++++++++-- 5 files changed, 378 insertions(+), 73 deletions(-) diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts index 693f5e839..a65c74fff 100644 --- a/src/routes/api/llm.ts +++ b/src/routes/api/llm.ts @@ -19,7 +19,7 @@ import { CONTEXT_PROMPTS } from '../../services/llm/constants/llm_prompt_constan export const LLM_CONSTANTS = { // Context window sizes (in characters) CONTEXT_WINDOW: { - OLLAMA: 6000, + OLLAMA: 8000, OPENAI: 12000, ANTHROPIC: 15000, VOYAGE: 12000, @@ -61,6 +61,8 @@ export const LLM_CONSTANTS = { // Model-specific context windows for Ollama models OLLAMA_MODEL_CONTEXT_WINDOWS: { "llama3": 8192, + "llama3.1": 8192, + "llama3.2": 8192, "mistral": 8192, "nomic": 32768, "mxbai": 32768, @@ -954,20 +956,32 @@ async function sendMessage(req: Request, res: Response) { log.info(`Context ends with: "...${context.substring(context.length - 200)}"`); log.info(`Number of notes included: ${sourceNotes.length}`); - // Format all messages for the AI (advanced context case) - const aiMessages: Message[] = [ - contextMessage, - ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ + // Get messages with context properly formatted for the specific LLM provider + const aiMessages = contextService.buildMessagesWithContext( + session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content - })) - ]; + })), + context, + service + ); + + // Add enhanced debug logging + if (service.constructor.name === 'OllamaService') { + // Log condensed version of the context so we can see if it's being properly formatted + console.log(`Sending context to Ollama with length: ${context.length} chars`); + console.log(`Context first 200 chars: ${context.substring(0, 200).replace(/\n/g, '\\n')}...`); + console.log(`Context last 200 chars: ${context.substring(context.length - 200).replace(/\n/g, '\\n')}...`); + + // Log the first user message to verify context injection is working + const userMsg = aiMessages.find(m => m.role === 'user'); + if (userMsg) { + console.log(`First user message (first 200 chars): ${userMsg.content.substring(0, 200).replace(/\n/g, '\\n')}...`); + } + } // DEBUG: Log message structure being sent to LLM log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`); - aiMessages.forEach((msg, idx) => { - log.info(`Message ${idx}: role=${msg.role}, content length=${msg.content.length} chars, begins with: "${msg.content.substring(0, 50)}..."`); - }); // Configure chat options from session metadata const chatOptions: ChatCompletionOptions = { @@ -1089,20 +1103,15 @@ async function sendMessage(req: Request, res: Response) { // Build context from relevant notes const context = buildContextFromNotes(relevantNotes, messageContent); - // Add system message with the context - const contextMessage: Message = { - role: 'system', - content: context - }; - - // Format all messages for the AI (original approach) - const aiMessages: Message[] = [ - contextMessage, - ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ + // Get messages with context properly formatted for the specific LLM provider + const aiMessages = contextService.buildMessagesWithContext( + session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content - })) - ]; + })), + context, + service + ); // Configure chat options from session metadata const chatOptions: ChatCompletionOptions = { diff --git a/src/services/llm/chat_service.ts b/src/services/llm/chat_service.ts index f2ba2bc68..f155248da 100644 --- a/src/services/llm/chat_service.ts +++ b/src/services/llm/chat_service.ts @@ -264,17 +264,12 @@ export class ChatService { showThinking ); - // Prepend a system message with context - const systemMessage: Message = { - role: 'system', - content: CONTEXT_PROMPTS.CONTEXT_AWARE_SYSTEM_PROMPT.replace( - '{enhancedContext}', - enhancedContext - ) - }; - - // Create messages array with system message - const messagesWithContext = [systemMessage, ...session.messages]; + // Create messages array with context using the improved method + const messagesWithContext = contextService.buildMessagesWithContext( + session.messages, + enhancedContext, + aiServiceManager.getService() // Get the default service + ); // Generate AI response const response = await aiServiceManager.generateChatCompletion( diff --git a/src/services/llm/context/modules/context_formatter.ts b/src/services/llm/context/modules/context_formatter.ts index b85a284b2..91f1ac160 100644 --- a/src/services/llm/context/modules/context_formatter.ts +++ b/src/services/llm/context/modules/context_formatter.ts @@ -7,7 +7,7 @@ import type { IContextFormatter, NoteSearchResult } from '../../interfaces/conte const CONTEXT_WINDOW = { OPENAI: 16000, ANTHROPIC: 100000, - OLLAMA: 8000, + OLLAMA: 4000, // Reduced to avoid issues DEFAULT: 4000 }; @@ -42,20 +42,25 @@ export class ContextFormatter implements IContextFormatter { // DEBUG: Log context window size log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`); - log.info(`Formatting context from ${sources.length} sources for query: "${query.substring(0, 50)}..."`); + log.info(`Building context from notes with query: ${query}`); + log.info(`Sources length: ${sources.length}`); - // Use a format appropriate for the model family - const isAnthropicFormat = providerId === 'anthropic'; + // Use provider-specific formatting + let formattedContext = ''; - // Start with different headers based on provider - let formattedContext = isAnthropicFormat - ? CONTEXT_PROMPTS.CONTEXT_HEADERS.ANTHROPIC(query) - : CONTEXT_PROMPTS.CONTEXT_HEADERS.DEFAULT(query); + if (providerId === 'ollama') { + // For Ollama, use a much simpler plain text format that's less prone to encoding issues + formattedContext = `# Context for your query: "${query}"\n\n`; + } else if (providerId === 'anthropic') { + formattedContext = CONTEXT_PROMPTS.CONTEXT_HEADERS.ANTHROPIC(query); + } else { + formattedContext = CONTEXT_PROMPTS.CONTEXT_HEADERS.DEFAULT(query); + } // Sort sources by similarity if available to prioritize most relevant if (sources[0] && sources[0].similarity !== undefined) { sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); - // DEBUG: Log sorting information + // Log sorting information log.info(`Sources sorted by similarity. Top sources: ${sources.slice(0, 3).map(s => s.title || 'Untitled').join(', ')}`); } @@ -63,7 +68,7 @@ export class ContextFormatter implements IContextFormatter { let totalSize = formattedContext.length; const formattedSources: string[] = []; - // DEBUG: Track stats for logging + // Track stats for logging let sourcesProcessed = 0; let sourcesIncluded = 0; let sourcesSkipped = 0; @@ -73,10 +78,18 @@ export class ContextFormatter implements IContextFormatter { for (const source of sources) { sourcesProcessed++; let content = ''; + let title = 'Untitled Note'; + if (typeof source === 'string') { content = source; } else if (source.content) { - content = this.sanitizeNoteContent(source.content, source.type, source.mime); + // For Ollama, use a more aggressive sanitization to avoid encoding issues + if (providerId === 'ollama') { + content = this.sanitizeForOllama(source.content); + } else { + content = this.sanitizeNoteContent(source.content, source.type, source.mime); + } + title = source.title || title; } else { sourcesSkipped++; log.info(`Skipping note with no content: ${source.title || 'Untitled'}`); @@ -86,14 +99,18 @@ export class ContextFormatter implements IContextFormatter { // Skip if content is empty or just whitespace/minimal if (!content || content.trim().length <= 10) { sourcesSkipped++; - log.info(`Skipping note with minimal content: ${source.title || 'Untitled'}`); + log.info(`Skipping note with minimal content: ${title}`); continue; } - // Format source with title if available - const title = source.title || 'Untitled Note'; - const noteId = source.noteId || ''; - const formattedSource = `### ${title}\n${content}\n`; + // Format source with title - use simple format for Ollama + let formattedSource = ''; + if (providerId === 'ollama') { + // For Ollama, use a simpler format and plain ASCII + formattedSource = `## ${title}\n${content}\n\n`; + } else { + formattedSource = `### ${title}\n${content}\n\n`; + } // Check if adding this would exceed our size limit if (totalSize + formattedSource.length > maxTotalLength) { @@ -102,12 +119,13 @@ export class ContextFormatter implements IContextFormatter { if (formattedSources.length === 0) { const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text if (availableSpace > 200) { // Only if we have reasonable space - const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`; + const truncatedContent = providerId === 'ollama' ? + `## ${title}\n${content.substring(0, availableSpace)}...\n\n` : + `### ${title}\n${content.substring(0, availableSpace)}...\n\n`; formattedSources.push(truncatedContent); totalSize += truncatedContent.length; sourcesIncluded++; - // DEBUG: Log truncation - log.info(`Truncated first source "${title}" to fit in context window. Used ${truncatedContent.length} of ${formattedSource.length} chars`); + log.info(`Truncated first source "${title}" to fit in context window`); } } break; @@ -118,24 +136,29 @@ export class ContextFormatter implements IContextFormatter { sourcesIncluded++; } - // DEBUG: Log sources stats + // Log sources stats log.info(`Context building stats: processed ${sourcesProcessed}/${sources.length} sources, included ${sourcesIncluded}, skipped ${sourcesSkipped}, exceeded limit ${sourcesExceededLimit}`); log.info(`Context size so far: ${totalSize}/${maxTotalLength} chars (${(totalSize/maxTotalLength*100).toFixed(2)}% of limit)`); // Add the formatted sources to the context - formattedContext += formattedSources.join('\n'); + formattedContext += formattedSources.join(''); - // Add closing to provide instructions to the AI - const closing = isAnthropicFormat - ? CONTEXT_PROMPTS.CONTEXT_CLOSINGS.ANTHROPIC - : CONTEXT_PROMPTS.CONTEXT_CLOSINGS.DEFAULT; + // Add closing to provide instructions to the AI - use simpler version for Ollama + let closing = ''; + if (providerId === 'ollama') { + closing = '\n\nPlease use the information above to answer the query and keep your response concise.'; + } else if (providerId === 'anthropic') { + closing = CONTEXT_PROMPTS.CONTEXT_CLOSINGS.ANTHROPIC; + } else { + closing = CONTEXT_PROMPTS.CONTEXT_CLOSINGS.DEFAULT; + } // Check if adding the closing would exceed our limit if (totalSize + closing.length <= maxTotalLength) { formattedContext += closing; } - // DEBUG: Log final context size + // Log final context size log.info(`Final context: ${formattedContext.length} chars, ${formattedSources.length} sources included`); return formattedContext; @@ -161,18 +184,52 @@ export class ContextFormatter implements IContextFormatter { try { // If it's HTML content, sanitize it if (mime === 'text/html' || type === 'text') { - // Use sanitize-html to convert HTML to plain text - const sanitized = sanitizeHtml(content, { + // First, try to preserve some structure by converting to markdown-like format + const contentWithMarkdown = content + // Convert headers + .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') + .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') + .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') + .replace(/]*>(.*?)<\/h4>/gi, '#### $1\n') + .replace(/]*>(.*?)<\/h5>/gi, '##### $1\n') + // Convert lists + .replace(/<\/?ul[^>]*>/g, '\n') + .replace(/<\/?ol[^>]*>/g, '\n') + .replace(/]*>(.*?)<\/li>/gi, '- $1\n') + // Convert links + .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') + // Convert code blocks + .replace(/]*>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```') + .replace(/]*>(.*?)<\/code>/gi, '`$1`') + // Convert emphasis + .replace(/]*>(.*?)<\/strong>/gi, '**$1**') + .replace(/]*>(.*?)<\/b>/gi, '**$1**') + .replace(/]*>(.*?)<\/em>/gi, '*$1*') + .replace(/]*>(.*?)<\/i>/gi, '*$1*') + // Handle paragraphs better + .replace(/]*>(.*?)<\/p>/gi, '$1\n\n') + // Handle line breaks + .replace(//gi, '\n'); + + // Then use sanitize-html to remove remaining HTML + const sanitized = sanitizeHtml(contentWithMarkdown, { allowedTags: [], // No tags allowed (strip all HTML) allowedAttributes: {}, // No attributes allowed textFilter: function(text) { return text .replace(/ /g, ' ') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&') + .replace(/"/g, '"') .replace(/\n\s*\n\s*\n/g, '\n\n'); // Replace multiple blank lines with just one } }); - return sanitized.trim(); + // Remove unnecessary whitespace while preserving meaningful structure + return sanitized + .replace(/\n{3,}/g, '\n\n') // no more than 2 consecutive newlines + .trim(); } // If it's code, keep formatting but limit size @@ -191,6 +248,46 @@ export class ContextFormatter implements IContextFormatter { return content; // Return original content if sanitization fails } } + + /** + * Special sanitization for Ollama that removes all non-ASCII characters + * and simplifies formatting to avoid encoding issues + */ + sanitizeForOllama(content: string): string { + if (!content) { + return ''; + } + + try { + // First remove any HTML + let plaintext = sanitizeHtml(content, { + allowedTags: [], + allowedAttributes: {}, + textFilter: (text) => text + }); + + // Then aggressively sanitize to plain ASCII and simple formatting + plaintext = plaintext + // Replace common problematic quotes with simple ASCII quotes + .replace(/[""]/g, '"') + .replace(/['']/g, "'") + // Replace other common Unicode characters + .replace(/[–—]/g, '-') + .replace(/[•]/g, '*') + .replace(/[…]/g, '...') + // Strip all non-ASCII characters + .replace(/[^\x00-\x7F]/g, '') + // Normalize whitespace + .replace(/\s+/g, ' ') + .replace(/\n\s+/g, '\n') + .trim(); + + return plaintext; + } catch (error) { + log.error(`Error sanitizing note content for Ollama: ${error}`); + return ''; // Return empty if sanitization fails + } + } } // Export singleton instance diff --git a/src/services/llm/context_service.ts b/src/services/llm/context_service.ts index 4370cf731..bf6fa14ef 100644 --- a/src/services/llm/context_service.ts +++ b/src/services/llm/context_service.ts @@ -9,6 +9,8 @@ import log from '../log.js'; import contextService from './context/modules/context_service.js'; import { ContextExtractor } from './context/index.js'; import type { NoteSearchResult } from './interfaces/context_interfaces.js'; +import type { Message } from './ai_interface.js'; +import type { LLMServiceInterface } from './interfaces/agent_tool_interfaces.js'; /** * Main Context Service for Trilium Notes @@ -185,6 +187,76 @@ class TriliumContextService { clearCaches(): void { return contextService.clearCaches(); } + + /** + * Build messages with proper context for an LLM-enhanced chat + */ + buildMessagesWithContext(messages: Message[], context: string, llmService: LLMServiceInterface): Message[] { + // For simple conversations just add context to the system message + try { + if (!messages || messages.length === 0) { + return [{ role: 'system', content: context }]; + } + + const result: Message[] = []; + let hasSystemMessage = false; + + // First pass: identify if there's a system message + for (const msg of messages) { + if (msg.role === 'system') { + hasSystemMessage = true; + break; + } + } + + // If we have a system message, prepend context to it + // Otherwise create a new system message with the context + if (hasSystemMessage) { + for (const msg of messages) { + if (msg.role === 'system') { + // For Ollama, use a cleaner approach with just one system message + if (llmService.constructor.name === 'OllamaService') { + // If this is the first system message we've seen, + // add context to it, otherwise skip (Ollama handles multiple + // system messages poorly) + if (result.findIndex(m => m.role === 'system') === -1) { + result.push({ + role: 'system', + content: `${context}\n\n${msg.content}` + }); + } + } else { + // For other providers, include all system messages + result.push({ + role: 'system', + content: msg.content.includes(context) ? + msg.content : // Avoid duplicate context + `${context}\n\n${msg.content}` + }); + } + } else { + result.push(msg); + } + } + } else { + // No system message found, prepend one with the context + result.push({ role: 'system', content: context }); + // Add all the original messages + result.push(...messages); + } + + return result; + } catch (error) { + log.error(`Error building messages with context: ${error}`); + + // Fallback: prepend a system message with context + const safeMessages = Array.isArray(messages) ? messages : []; + return [ + { role: 'system', content: context }, + ...safeMessages.filter(msg => msg.role !== 'system') + ]; + } + } } // Export singleton instance diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts index 61d40db39..ff54459fb 100644 --- a/src/services/llm/providers/ollama_service.ts +++ b/src/services/llm/providers/ollama_service.ts @@ -287,28 +287,160 @@ export class OllamaService extends BaseAIService { } } + /** + * Clean up HTML and other problematic content before sending to Ollama + */ + private cleanContextContent(content: string): string { + if (!content) return ''; + + try { + // First fix potential encoding issues + let sanitized = content + // Fix common encoding issues with quotes and special characters + .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"') // Fix broken quote chars + .replace(/[\u00A0-\u9999]/g, match => { + try { + return encodeURIComponent(match).replace(/%/g, ''); + } catch (e) { + return ''; + } + }); + + // Replace common HTML tags with markdown or plain text equivalents + sanitized = sanitized + // Remove HTML divs, spans, etc. + .replace(/<\/?div[^>]*>/g, '') + .replace(/<\/?span[^>]*>/g, '') + .replace(/<\/?p[^>]*>/g, '\n') + // Convert headers + .replace(/]*>(.*?)<\/h1>/gi, '# $1\n') + .replace(/]*>(.*?)<\/h2>/gi, '## $1\n') + .replace(/]*>(.*?)<\/h3>/gi, '### $1\n') + // Convert lists + .replace(/<\/?ul[^>]*>/g, '') + .replace(/<\/?ol[^>]*>/g, '') + .replace(/]*>(.*?)<\/li>/gi, '- $1\n') + // Convert links + .replace(/]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)') + // Convert code blocks + .replace(/]*>]*>(.*?)<\/code><\/pre>/gis, '```\n$1\n```') + .replace(/]*>(.*?)<\/code>/gi, '`$1`') + // Convert emphasis + .replace(/<\/?strong[^>]*>/g, '**') + .replace(/<\/?em[^>]*>/g, '*') + // Remove figure tags + .replace(/<\/?figure[^>]*>/g, '') + // Remove all other HTML tags + .replace(/<[^>]*>/g, '') + // Fix double line breaks + .replace(/\n\s*\n\s*\n/g, '\n\n') + // Fix HTML entities + .replace(/ /g, ' ') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&') + .replace(/"/g, '"') + // Final clean whitespace + .replace(/\s+/g, ' ') + .replace(/\n\s+/g, '\n') + .trim(); + + return sanitized; + } catch (error) { + console.error("Error cleaning context content:", error); + return content; // Return original if cleaning fails + } + } + /** * Format messages for the Ollama API */ private formatMessages(messages: Message[], systemPrompt: string): OllamaMessage[] { const formattedMessages: OllamaMessage[] = []; + const MAX_SYSTEM_CONTENT_LENGTH = 4000; - // Add system message if provided - if (systemPrompt) { + // First identify user and system messages + const systemMessages = messages.filter(msg => msg.role === 'system'); + const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant'); + + // In the case of Ollama, we need to ensure context is properly integrated + // The key insight is that simply including it in a system message doesn't work well + + // Check if we have context (typically in the first system message) + let hasContext = false; + let contextContent = ''; + + if (systemMessages.length > 0) { + const potentialContext = systemMessages[0].content; + if (potentialContext && potentialContext.includes('# Context for your query')) { + hasContext = true; + contextContent = this.cleanContextContent(potentialContext); + } + } + + // Create base system message with instructions + let basePrompt = systemPrompt || + "You are an AI assistant integrated into TriliumNext Notes. " + + "Focus on helping users find information in their notes and answering questions based on their knowledge base. " + + "Be concise, informative, and direct when responding to queries."; + + // If we have context, inject it differently - prepend it to the user's first question + if (hasContext && userMessages.length > 0) { + // Create initial system message with just the base prompt formattedMessages.push({ role: 'system', - content: systemPrompt + content: basePrompt }); + + // For user messages, inject context into the first user message + let injectedContext = false; + + for (let i = 0; i < userMessages.length; i++) { + const msg = userMessages[i]; + + if (msg.role === 'user' && !injectedContext) { + // Format the context in a way Ollama can't ignore + const formattedContext = + "I need you to answer based on the following information from my notes:\n\n" + + "-----BEGIN MY NOTES-----\n" + + contextContent + + "\n-----END MY NOTES-----\n\n" + + "Based on these notes, please answer: " + msg.content; + + formattedMessages.push({ + role: 'user', + content: formattedContext + }); + + injectedContext = true; + } else { + formattedMessages.push({ + role: msg.role, + content: msg.content + }); + } + } + } else { + // No context or empty context case + // Add system message (with system prompt) + if (systemPrompt) { + formattedMessages.push({ + role: 'system', + content: systemPrompt + }); + } + + // Add all user and assistant messages as-is + for (const msg of userMessages) { + formattedMessages.push({ + role: msg.role, + content: msg.content + }); + } } - // Add all messages - for (const msg of messages) { - // Ollama's API accepts 'user', 'assistant', and 'system' roles - formattedMessages.push({ - role: msg.role, - content: msg.content - }); - } + console.log(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for Ollama`); + console.log(`Context detected: ${hasContext ? 'Yes' : 'No'}`); return formattedMessages; }