From 0d2858c7e92a752815d3839980ca66c9c67be750 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Tue, 11 Mar 2025 23:04:51 +0000 Subject: [PATCH] upgrade chunking --- src/routes/api/llm.ts | 76 ++++++++++-- src/services/llm/context/chunking.ts | 37 +++--- src/services/llm/context/index.ts | 20 ++-- src/services/llm/embeddings/vector_store.ts | 123 +++++++++++++++----- src/services/llm/trilium_context_service.ts | 65 ++++++++--- 5 files changed, 242 insertions(+), 79 deletions(-) diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts index 97982db5b..49c7d442a 100644 --- a/src/routes/api/llm.ts +++ b/src/routes/api/llm.ts @@ -12,6 +12,62 @@ import * as aiServiceManagerModule from "../../services/llm/ai_service_manager.j import triliumContextService from "../../services/llm/trilium_context_service.js"; import sql from "../../services/sql.js"; +// LLM service constants +export const LLM_CONSTANTS = { + // Context window sizes (in characters) + CONTEXT_WINDOW: { + OLLAMA: 6000, + OPENAI: 12000, + ANTHROPIC: 15000, + DEFAULT: 6000 + }, + + // Embedding dimensions (verify these with your actual models) + EMBEDDING_DIMENSIONS: { + OLLAMA: { + DEFAULT: 384, + NOMIC: 768, + MISTRAL: 1024 + }, + OPENAI: { + ADA: 1536, + DEFAULT: 1536 + }, + ANTHROPIC: { + CLAUDE: 1024, + DEFAULT: 1024 + } + }, + + // Chunking parameters + CHUNKING: { + DEFAULT_SIZE: 1500, + OLLAMA_SIZE: 1000, + DEFAULT_OVERLAP: 100, + MAX_SIZE_FOR_SINGLE_EMBEDDING: 5000 + }, + + // Search/similarity thresholds + SIMILARITY: { + DEFAULT_THRESHOLD: 0.65, + HIGH_THRESHOLD: 0.75, + LOW_THRESHOLD: 0.5 + }, + + // Session management + SESSION: { + CLEANUP_INTERVAL_MS: 60 * 60 * 1000, // 1 hour + SESSION_EXPIRY_MS: 12 * 60 * 60 * 1000, // 12 hours + MAX_SESSION_MESSAGES: 10 + }, + + // Content limits + CONTENT: { + MAX_NOTE_CONTENT_LENGTH: 1500, + MAX_TOTAL_CONTENT_LENGTH: 10000 + } +}; + // Define basic interfaces interface ChatMessage { role: 'user' | 'assistant' | 'system'; @@ -55,7 +111,7 @@ const sessions = new Map(); let cleanupInitialized = false; /** - * Initialize the cleanup timer if not already running + * Initialize the session cleanup timer to remove old/inactive sessions * Only call this after database is initialized */ function initializeCleanupTimer() { @@ -63,18 +119,18 @@ function initializeCleanupTimer() { return; } - // Utility function to clean sessions older than 12 hours + // Clean sessions that have expired based on the constants function cleanupOldSessions() { - const twelveHoursAgo = new Date(Date.now() - 12 * 60 * 60 * 1000); + const expiryTime = new Date(Date.now() - LLM_CONSTANTS.SESSION.SESSION_EXPIRY_MS); for (const [sessionId, session] of sessions.entries()) { - if (session.lastActive < twelveHoursAgo) { + if (session.lastActive < expiryTime) { sessions.delete(sessionId); } } } - // Run cleanup every hour - setInterval(cleanupOldSessions, 60 * 60 * 1000); + // Run cleanup at the configured interval + setInterval(cleanupOldSessions, LLM_CONSTANTS.SESSION.CLEANUP_INTERVAL_MS); cleanupInitialized = true; } @@ -563,10 +619,10 @@ async function sendMessage(req: Request, res: Response) { content: context }; - // Format all messages for the AI + // Format all messages for the AI (advanced context case) const aiMessages: Message[] = [ contextMessage, - ...session.messages.slice(-10).map(msg => ({ + ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content })) @@ -699,10 +755,10 @@ async function sendMessage(req: Request, res: Response) { content: context }; - // Format all messages for the AI + // Format all messages for the AI (original approach) const aiMessages: Message[] = [ contextMessage, - ...session.messages.slice(-10).map(msg => ({ + ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({ role: msg.role, content: msg.content })) diff --git a/src/services/llm/context/chunking.ts b/src/services/llm/context/chunking.ts index 76e0f3d13..8727001cc 100644 --- a/src/services/llm/context/chunking.ts +++ b/src/services/llm/context/chunking.ts @@ -49,26 +49,32 @@ export interface ChunkOptions { /** * Default options for chunking */ -const DEFAULT_CHUNK_OPTIONS: Required = { - maxChunkSize: 1500, // Characters per chunk - overlapSize: 100, // Overlap between chunks - respectBoundaries: true, - includeMetadata: true, - metadata: {} -}; +async function getDefaultChunkOptions(): Promise> { + // Import constants dynamically to avoid circular dependencies + const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js'); + + return { + maxChunkSize: LLM_CONSTANTS.CHUNKING.DEFAULT_SIZE, + overlapSize: LLM_CONSTANTS.CHUNKING.DEFAULT_OVERLAP, + respectBoundaries: true, + includeMetadata: true, + metadata: {} + }; +} /** * Chunk content into smaller pieces * Used for processing large documents and preparing them for LLMs */ -export function chunkContent( +export async function chunkContent( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} -): ContentChunk[] { +): Promise { // Merge provided options with defaults - const config: Required = { ...DEFAULT_CHUNK_OPTIONS, ...options }; + const defaultOptions = await getDefaultChunkOptions(); + const config: Required = { ...defaultOptions, ...options }; // If content is small enough, return as a single chunk if (content.length <= config.maxChunkSize) { @@ -167,14 +173,15 @@ export function chunkContent( /** * Smarter chunking that tries to respect semantic boundaries like headers and sections */ -export function semanticChunking( +export async function semanticChunking( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} -): ContentChunk[] { +): Promise { // Merge provided options with defaults - const config: Required = { ...DEFAULT_CHUNK_OPTIONS, ...options }; + const defaultOptions = await getDefaultChunkOptions(); + const config: Required = { ...defaultOptions, ...options }; // If content is small enough, return as a single chunk if (content.length <= config.maxChunkSize) { @@ -214,7 +221,7 @@ export function semanticChunking( // If no headers were found, fall back to regular chunking if (sections.length <= 1) { - return chunkContent(content, title, noteId, options); + return await chunkContent(content, title, noteId, options); } // Process each section @@ -238,7 +245,7 @@ export function semanticChunking( } // Chunk this section separately - const sectionChunks = chunkContent( + const sectionChunks = await chunkContent( section, title, noteId, diff --git a/src/services/llm/context/index.ts b/src/services/llm/context/index.ts index b0fe6b841..168a226fc 100644 --- a/src/services/llm/context/index.ts +++ b/src/services/llm/context/index.ts @@ -161,48 +161,48 @@ export class ContextExtractor { /** * Chunk content into smaller pieces */ - static chunkContent( + static async chunkContent( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} - ): ContentChunk[] { + ): Promise { return chunkContent(content, title, noteId, options); } /** * Chunk content into smaller pieces - instance method */ - chunkContent( + async chunkContent( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} - ): ContentChunk[] { + ): Promise { return ContextExtractor.chunkContent(content, title, noteId, options); } /** * Smarter chunking that respects semantic boundaries */ - static semanticChunking( + static async semanticChunking( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} - ): ContentChunk[] { + ): Promise { return semanticChunking(content, title, noteId, options); } /** * Smarter chunking that respects semantic boundaries - instance method */ - semanticChunking( + async semanticChunking( content: string, title: string = '', noteId: string = '', options: ChunkOptions = {} - ): ContentChunk[] { + ): Promise { return ContextExtractor.semanticChunking(content, title, noteId, options); } @@ -572,7 +572,7 @@ export class ContextExtractor { if (!content) return []; // Use the new chunking functionality - const chunks = chunkContent( + const chunks = await ContextExtractor.chunkContent( content, '', noteId, @@ -580,7 +580,7 @@ export class ContextExtractor { ); // Convert to the old API format which was an array of strings - return chunks.map(chunk => chunk.content); + return (await chunks).map(chunk => chunk.content); } /** diff --git a/src/services/llm/embeddings/vector_store.ts b/src/services/llm/embeddings/vector_store.ts index 433e597f4..46e18146c 100644 --- a/src/services/llm/embeddings/vector_store.ts +++ b/src/services/llm/embeddings/vector_store.ts @@ -149,8 +149,12 @@ export async function findSimilarNotes( providerId: string, modelId: string, limit = 10, - threshold = 0.65 // Slightly lowered from 0.7 to account for relationship focus + threshold?: number // Made optional to use constants ): Promise<{noteId: string, similarity: number}[]> { + // Import constants dynamically to avoid circular dependencies + const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js'); + // Use provided threshold or default from constants + const similarityThreshold = threshold ?? LLM_CONSTANTS.SIMILARITY.DEFAULT_THRESHOLD; // Get all embeddings for the given provider and model const rows = await sql.getRows(` SELECT embedId, noteId, providerId, modelId, dimension, embedding @@ -175,7 +179,7 @@ export async function findSimilarNotes( // Filter by threshold and sort by similarity (highest first) return similarities - .filter(item => item.similarity >= threshold) + .filter(item => item.similarity >= similarityThreshold) .sort((a, b) => b.similarity - a.similarity) .slice(0, limit); } @@ -183,7 +187,7 @@ export async function findSimilarNotes( /** * Clean note content by removing HTML tags and normalizing whitespace */ -function cleanNoteContent(content: string, type: string, mime: string): string { +async function cleanNoteContent(content: string, type: string, mime: string): Promise { if (!content) return ''; // If it's HTML content, remove HTML tags @@ -214,10 +218,11 @@ function cleanNoteContent(content: string, type: string, mime: string): string { // Trim the content content = content.trim(); + // Import constants dynamically to avoid circular dependencies + const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js'); // Truncate if extremely long - const MAX_CONTENT_LENGTH = 10000; - if (content.length > MAX_CONTENT_LENGTH) { - content = content.substring(0, MAX_CONTENT_LENGTH) + ' [content truncated]'; + if (content.length > LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) { + content = content.substring(0, LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) + ' [content truncated]'; } return content; @@ -455,7 +460,7 @@ export async function getNoteEmbeddingContext(noteId: string): Promise { try { // Get the context extractor dynamically to avoid circular dependencies - const { ContextExtractor } = await import('../../llm/context/index.js'); + const { ContextExtractor } = await import('../context/index.js'); const contextExtractor = new ContextExtractor(); - // Get chunks of the note content - const chunks = await contextExtractor.getChunkedNoteContent(noteId); + // Get note from becca + const note = becca.notes[noteId]; + if (!note) { + throw new Error(`Note ${noteId} not found in Becca cache`); + } + + // Use semantic chunking for better boundaries + const chunks = await contextExtractor.semanticChunking( + context.content, + note.title, + noteId, + { + // Adjust chunk size based on provider using constants + maxChunkSize: provider.name === 'ollama' ? + (await import('../../../routes/api/llm.js')).LLM_CONSTANTS.CHUNKING.OLLAMA_SIZE : + (await import('../../../routes/api/llm.js')).LLM_CONSTANTS.CHUNKING.DEFAULT_SIZE, + respectBoundaries: true + } + ); if (!chunks || chunks.length === 0) { // Fall back to single embedding if chunking fails - const embedding = await provider.generateNoteEmbeddings(context); + const embedding = await provider.generateEmbeddings(context.content); const config = provider.getConfig(); await storeNoteEmbedding(noteId, provider.name, config.model, embedding); + log.info(`Generated single embedding for note ${noteId} (${note.title}) since chunking failed`); return; } @@ -993,23 +1016,19 @@ async function processNoteWithChunking( let failedChunks = 0; const totalChunks = chunks.length; const failedChunkDetails: {index: number, error: string}[] = []; + const retryQueue: {index: number, chunk: any}[] = []; - // Process each chunk with a slight delay to avoid rate limits + log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`); + + // Process each chunk with a delay based on provider to avoid rate limits for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; - const chunkId = `chunk_${i + 1}_of_${chunks.length}`; - try { - // Create a modified context object with just this chunk's content - const chunkContext: NoteEmbeddingContext = { - ...context, - content: chunk - }; + // Generate embedding for this chunk's content + const embedding = await provider.generateEmbeddings(chunk.content); - // Generate embedding for this chunk - const embedding = await provider.generateNoteEmbeddings(chunkContext); - - // Store with chunk information + // Store with chunk information in a unique ID format + const chunkIdSuffix = `${i + 1}_of_${chunks.length}`; await storeNoteEmbedding( noteId, provider.name, @@ -1019,9 +1038,10 @@ async function processNoteWithChunking( successfulChunks++; - // Small delay between chunks to avoid rate limits + // Small delay between chunks to avoid rate limits - longer for Ollama if (i < chunks.length - 1) { - await new Promise(resolve => setTimeout(resolve, 100)); + await new Promise(resolve => setTimeout(resolve, + provider.name === 'ollama' ? 500 : 100)); } } catch (error: any) { // Track the failure for this specific chunk @@ -1031,17 +1051,62 @@ async function processNoteWithChunking( error: error.message || 'Unknown error' }); - log.error(`Error processing chunk ${chunkId} for note ${noteId}: ${error.message || 'Unknown error'}`); + // Add to retry queue + retryQueue.push({ + index: i, + chunk: chunk + }); + + log.error(`Error processing chunk ${i + 1} for note ${noteId}: ${error.message || 'Unknown error'}`); + } + } + + // Retry failed chunks with exponential backoff + if (retryQueue.length > 0 && retryQueue.length < chunks.length) { + log.info(`Retrying ${retryQueue.length} failed chunks for note ${noteId}`); + + for (let j = 0; j < retryQueue.length; j++) { + const {index, chunk} = retryQueue[j]; + + try { + // Wait longer for retries with exponential backoff + await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(1.5, j))); + + // Retry the embedding + const embedding = await provider.generateEmbeddings(chunk.content); + + // Store with unique ID that indicates it was a retry + const chunkIdSuffix = `${index + 1}_of_${chunks.length}`; + await storeNoteEmbedding( + noteId, + provider.name, + config.model, + embedding + ); + + // Update counters + successfulChunks++; + failedChunks--; + + // Remove from failedChunkDetails + const detailIndex = failedChunkDetails.findIndex(d => d.index === index + 1); + if (detailIndex >= 0) { + failedChunkDetails.splice(detailIndex, 1); + } + } catch (error: any) { + log.error(`Retry failed for chunk ${index + 1} of note ${noteId}: ${error.message || 'Unknown error'}`); + // Keep failure count as is + } } } // Log information about the processed chunks if (successfulChunks > 0) { - log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId}`); + log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`); } if (failedChunks > 0) { - log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId}`); + log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title})`); } // If no chunks were successfully processed, throw an error diff --git a/src/services/llm/trilium_context_service.ts b/src/services/llm/trilium_context_service.ts index 312578d9e..2e9c5aa53 100644 --- a/src/services/llm/trilium_context_service.ts +++ b/src/services/llm/trilium_context_service.ts @@ -333,12 +333,9 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`; } /** - * Build a context string from relevant notes - * @param sources - Array of notes - * @param query - Original user query - * @returns Formatted context string + * Build context string from retrieved notes */ - buildContextFromNotes(sources: any[], query: string): string { + async buildContextFromNotes(sources: any[], query: string): Promise { if (!sources || sources.length === 0) { // Return a default context instead of empty string return "I am an AI assistant helping you with your Trilium notes. " + @@ -348,13 +345,46 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`; let context = `I've found some relevant information in your notes that may help answer: "${query}"\n\n`; + // Sort sources by similarity if available to prioritize most relevant + if (sources[0] && sources[0].similarity !== undefined) { + sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); + } + + // Get provider name to adjust context for different models + const providerId = this.provider?.name || 'default'; + // Get approximate max length based on provider using constants + // Import the constants dynamically to avoid circular dependencies + const { LLM_CONSTANTS } = await import('../../routes/api/llm.js'); + const maxTotalLength = providerId === 'ollama' ? LLM_CONSTANTS.CONTEXT_WINDOW.OLLAMA : + providerId === 'openai' ? LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI : + LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC; + + // Track total context length to avoid oversized context + let currentLength = context.length; + const maxNoteContentLength = Math.min(LLM_CONSTANTS.CONTENT.MAX_NOTE_CONTENT_LENGTH, + Math.floor(maxTotalLength / Math.max(1, sources.length))); + sources.forEach((source) => { - // Use the note title as a meaningful heading - context += `### ${source.title}\n`; + // Check if adding this source would exceed our total limit + if (currentLength >= maxTotalLength) return; + + // Build source section + let sourceSection = `### ${source.title}\n`; // Add relationship context if available if (source.parentTitle) { - context += `Part of: ${source.parentTitle}\n`; + sourceSection += `Part of: ${source.parentTitle}\n`; + } + + // Add attributes if available (for better context) + if (source.noteId) { + const note = becca.notes[source.noteId]; + if (note) { + const labels = note.getLabels(); + if (labels.length > 0) { + sourceSection += `Labels: ${labels.map(l => `#${l.name}${l.value ? '=' + l.value : ''}`).join(' ')}\n`; + } + } } if (source.content) { @@ -362,17 +392,22 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`; let cleanContent = this.sanitizeNoteContent(source.content, source.type, source.mime); // Truncate content if it's too long - const maxContentLength = 1000; - if (cleanContent.length > maxContentLength) { - cleanContent = cleanContent.substring(0, maxContentLength) + " [content truncated due to length]"; + if (cleanContent.length > maxNoteContentLength) { + cleanContent = cleanContent.substring(0, maxNoteContentLength) + " [content truncated due to length]"; } - context += `${cleanContent}\n`; + sourceSection += `${cleanContent}\n`; } else { - context += "[This note doesn't contain textual content]\n"; + sourceSection += "[This note doesn't contain textual content]\n"; } - context += "\n"; + sourceSection += "\n"; + + // Check if adding this section would exceed total length limit + if (currentLength + sourceSection.length <= maxTotalLength) { + context += sourceSection; + currentLength += sourceSection.length; + } }); // Add clear instructions about how to reference the notes @@ -475,7 +510,7 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`; } // Step 3: Build context from the notes - const context = this.buildContextFromNotes(relevantNotes, userQuestion); + const context = await this.buildContextFromNotes(relevantNotes, userQuestion); return { context,