From 0d2858c7e92a752815d3839980ca66c9c67be750 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Tue, 11 Mar 2025 23:04:51 +0000
Subject: [PATCH] upgrade chunking

---
 src/routes/api/llm.ts                       |  76 ++++++++++--
 src/services/llm/context/chunking.ts        |  37 +++---
 src/services/llm/context/index.ts           |  20 ++--
 src/services/llm/embeddings/vector_store.ts | 123 +++++++++++++++-----
 src/services/llm/trilium_context_service.ts |  65 ++++++++---
 5 files changed, 242 insertions(+), 79 deletions(-)

diff --git a/src/routes/api/llm.ts b/src/routes/api/llm.ts
index 97982db5b..49c7d442a 100644
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@@ -12,6 +12,62 @@ import * as aiServiceManagerModule from "../../services/llm/ai_service_manager.j
 import triliumContextService from "../../services/llm/trilium_context_service.js";
 import sql from "../../services/sql.js";
 
+// LLM service constants
+export const LLM_CONSTANTS = {
+    // Context window sizes (in characters)
+    CONTEXT_WINDOW: {
+        OLLAMA: 6000,
+        OPENAI: 12000,
+        ANTHROPIC: 15000,
+        DEFAULT: 6000
+    },
+
+    // Embedding dimensions (verify these with your actual models)
+    EMBEDDING_DIMENSIONS: {
+        OLLAMA: {
+            DEFAULT: 384,
+            NOMIC: 768,
+            MISTRAL: 1024
+        },
+        OPENAI: {
+            ADA: 1536,
+            DEFAULT: 1536
+        },
+        ANTHROPIC: {
+            CLAUDE: 1024,
+            DEFAULT: 1024
+        }
+    },
+
+    // Chunking parameters
+    CHUNKING: {
+        DEFAULT_SIZE: 1500,
+        OLLAMA_SIZE: 1000,
+        DEFAULT_OVERLAP: 100,
+        MAX_SIZE_FOR_SINGLE_EMBEDDING: 5000
+    },
+
+    // Search/similarity thresholds
+    SIMILARITY: {
+        DEFAULT_THRESHOLD: 0.65,
+        HIGH_THRESHOLD: 0.75,
+        LOW_THRESHOLD: 0.5
+    },
+
+    // Session management
+    SESSION: {
+        CLEANUP_INTERVAL_MS: 60 * 60 * 1000, // 1 hour
+        SESSION_EXPIRY_MS: 12 * 60 * 60 * 1000, // 12 hours
+        MAX_SESSION_MESSAGES: 10
+    },
+
+    // Content limits
+    CONTENT: {
+        MAX_NOTE_CONTENT_LENGTH: 1500,
+        MAX_TOTAL_CONTENT_LENGTH: 10000
+    }
+};
+
 // Define basic interfaces
 interface ChatMessage {
     role: 'user' | 'assistant' | 'system';
@@ -55,7 +111,7 @@ const sessions = new Map<string, ChatSession>();
 let cleanupInitialized = false;
 
 /**
- * Initialize the cleanup timer if not already running
+ * Initialize the session cleanup timer to remove old/inactive sessions
  * Only call this after database is initialized
  */
 function initializeCleanupTimer() {
@@ -63,18 +119,18 @@ function initializeCleanupTimer() {
         return;
     }
 
-    // Utility function to clean sessions older than 12 hours
+    // Clean sessions that have expired based on the constants
     function cleanupOldSessions() {
-        const twelveHoursAgo = new Date(Date.now() - 12 * 60 * 60 * 1000);
+        const expiryTime = new Date(Date.now() - LLM_CONSTANTS.SESSION.SESSION_EXPIRY_MS);
         for (const [sessionId, session] of sessions.entries()) {
-            if (session.lastActive < twelveHoursAgo) {
+            if (session.lastActive < expiryTime) {
                 sessions.delete(sessionId);
             }
         }
     }
 
-    // Run cleanup every hour
-    setInterval(cleanupOldSessions, 60 * 60 * 1000);
+    // Run cleanup at the configured interval
+    setInterval(cleanupOldSessions, LLM_CONSTANTS.SESSION.CLEANUP_INTERVAL_MS);
     cleanupInitialized = true;
 }
 
@@ -563,10 +619,10 @@ async function sendMessage(req: Request, res: Response) {
                     content: context
                 };
 
-                // Format all messages for the AI
+                // Format all messages for the AI (advanced context case)
                 const aiMessages: Message[] = [
                     contextMessage,
-                    ...session.messages.slice(-10).map(msg => ({
+                    ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
                     }))
@@ -699,10 +755,10 @@ async function sendMessage(req: Request, res: Response) {
                     content: context
                 };
 
-                // Format all messages for the AI
+                // Format all messages for the AI (original approach)
                 const aiMessages: Message[] = [
                     contextMessage,
-                    ...session.messages.slice(-10).map(msg => ({
+                    ...session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
                         role: msg.role,
                         content: msg.content
                     }))
diff --git a/src/services/llm/context/chunking.ts b/src/services/llm/context/chunking.ts
index 76e0f3d13..8727001cc 100644
--- a/src/services/llm/context/chunking.ts
+++ b/src/services/llm/context/chunking.ts
@@ -49,26 +49,32 @@ export interface ChunkOptions {
 /**
  * Default options for chunking
  */
-const DEFAULT_CHUNK_OPTIONS: Required<ChunkOptions> = {
-    maxChunkSize: 1500,  // Characters per chunk
-    overlapSize: 100,    // Overlap between chunks
-    respectBoundaries: true,
-    includeMetadata: true,
-    metadata: {}
-};
+async function getDefaultChunkOptions(): Promise<Required<ChunkOptions>> {
+    // Import constants dynamically to avoid circular dependencies
+    const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js');
+    
+    return {
+        maxChunkSize: LLM_CONSTANTS.CHUNKING.DEFAULT_SIZE,
+        overlapSize: LLM_CONSTANTS.CHUNKING.DEFAULT_OVERLAP,
+        respectBoundaries: true,
+        includeMetadata: true,
+        metadata: {}
+    };
+}
 
 /**
  * Chunk content into smaller pieces
  * Used for processing large documents and preparing them for LLMs
  */
-export function chunkContent(
+export async function chunkContent(
     content: string,
     title: string = '',
     noteId: string = '',
     options: ChunkOptions = {}
-): ContentChunk[] {
+): Promise<ContentChunk[]> {
     // Merge provided options with defaults
-    const config: Required<ChunkOptions> = { ...DEFAULT_CHUNK_OPTIONS, ...options };
+    const defaultOptions = await getDefaultChunkOptions();
+    const config: Required<ChunkOptions> = { ...defaultOptions, ...options };
 
     // If content is small enough, return as a single chunk
     if (content.length <= config.maxChunkSize) {
@@ -167,14 +173,15 @@ export function chunkContent(
 /**
  * Smarter chunking that tries to respect semantic boundaries like headers and sections
  */
-export function semanticChunking(
+export async function semanticChunking(
     content: string,
     title: string = '',
     noteId: string = '',
     options: ChunkOptions = {}
-): ContentChunk[] {
+): Promise<ContentChunk[]> {
     // Merge provided options with defaults
-    const config: Required<ChunkOptions> = { ...DEFAULT_CHUNK_OPTIONS, ...options };
+    const defaultOptions = await getDefaultChunkOptions();
+    const config: Required<ChunkOptions> = { ...defaultOptions, ...options };
 
     // If content is small enough, return as a single chunk
     if (content.length <= config.maxChunkSize) {
@@ -214,7 +221,7 @@ export function semanticChunking(
 
     // If no headers were found, fall back to regular chunking
     if (sections.length <= 1) {
-        return chunkContent(content, title, noteId, options);
+        return await chunkContent(content, title, noteId, options);
     }
 
     // Process each section
@@ -238,7 +245,7 @@ export function semanticChunking(
                 }
 
                 // Chunk this section separately
-                const sectionChunks = chunkContent(
+                const sectionChunks = await chunkContent(
                     section,
                     title,
                     noteId,
diff --git a/src/services/llm/context/index.ts b/src/services/llm/context/index.ts
index b0fe6b841..168a226fc 100644
--- a/src/services/llm/context/index.ts
+++ b/src/services/llm/context/index.ts
@@ -161,48 +161,48 @@ export class ContextExtractor {
     /**
      * Chunk content into smaller pieces
      */
-    static chunkContent(
+    static async chunkContent(
         content: string,
         title: string = '',
         noteId: string = '',
         options: ChunkOptions = {}
-    ): ContentChunk[] {
+    ): Promise<ContentChunk[]> {
         return chunkContent(content, title, noteId, options);
     }
 
     /**
      * Chunk content into smaller pieces - instance method
      */
-    chunkContent(
+    async chunkContent(
         content: string,
         title: string = '',
         noteId: string = '',
         options: ChunkOptions = {}
-    ): ContentChunk[] {
+    ): Promise<ContentChunk[]> {
         return ContextExtractor.chunkContent(content, title, noteId, options);
     }
 
     /**
      * Smarter chunking that respects semantic boundaries
      */
-    static semanticChunking(
+    static async semanticChunking(
         content: string,
         title: string = '',
         noteId: string = '',
         options: ChunkOptions = {}
-    ): ContentChunk[] {
+    ): Promise<ContentChunk[]> {
         return semanticChunking(content, title, noteId, options);
     }
 
     /**
      * Smarter chunking that respects semantic boundaries - instance method
      */
-    semanticChunking(
+    async semanticChunking(
         content: string,
         title: string = '',
         noteId: string = '',
         options: ChunkOptions = {}
-    ): ContentChunk[] {
+    ): Promise<ContentChunk[]> {
         return ContextExtractor.semanticChunking(content, title, noteId, options);
     }
 
@@ -572,7 +572,7 @@ export class ContextExtractor {
         if (!content) return [];
 
         // Use the new chunking functionality
-        const chunks = chunkContent(
+        const chunks = await ContextExtractor.chunkContent(
             content,
             '',
             noteId,
@@ -580,7 +580,7 @@ export class ContextExtractor {
         );
 
         // Convert to the old API format which was an array of strings
-        return chunks.map(chunk => chunk.content);
+        return (await chunks).map(chunk => chunk.content);
     }
 
     /**
diff --git a/src/services/llm/embeddings/vector_store.ts b/src/services/llm/embeddings/vector_store.ts
index 433e597f4..46e18146c 100644
--- a/src/services/llm/embeddings/vector_store.ts
+++ b/src/services/llm/embeddings/vector_store.ts
@@ -149,8 +149,12 @@ export async function findSimilarNotes(
     providerId: string,
     modelId: string,
     limit = 10,
-    threshold = 0.65  // Slightly lowered from 0.7 to account for relationship focus
+    threshold?: number  // Made optional to use constants
 ): Promise<{noteId: string, similarity: number}[]> {
+    // Import constants dynamically to avoid circular dependencies
+    const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js');
+    // Use provided threshold or default from constants
+    const similarityThreshold = threshold ?? LLM_CONSTANTS.SIMILARITY.DEFAULT_THRESHOLD;
     // Get all embeddings for the given provider and model
     const rows = await sql.getRows(`
         SELECT embedId, noteId, providerId, modelId, dimension, embedding
@@ -175,7 +179,7 @@ export async function findSimilarNotes(
 
     // Filter by threshold and sort by similarity (highest first)
     return similarities
-        .filter(item => item.similarity >= threshold)
+        .filter(item => item.similarity >= similarityThreshold)
         .sort((a, b) => b.similarity - a.similarity)
         .slice(0, limit);
 }
@@ -183,7 +187,7 @@ export async function findSimilarNotes(
 /**
  * Clean note content by removing HTML tags and normalizing whitespace
  */
-function cleanNoteContent(content: string, type: string, mime: string): string {
+async function cleanNoteContent(content: string, type: string, mime: string): Promise<string> {
     if (!content) return '';
 
     // If it's HTML content, remove HTML tags
@@ -214,10 +218,11 @@ function cleanNoteContent(content: string, type: string, mime: string): string {
     // Trim the content
     content = content.trim();
 
+    // Import constants dynamically to avoid circular dependencies
+    const { LLM_CONSTANTS } = await import('../../../routes/api/llm.js');
     // Truncate if extremely long
-    const MAX_CONTENT_LENGTH = 10000;
-    if (content.length > MAX_CONTENT_LENGTH) {
-        content = content.substring(0, MAX_CONTENT_LENGTH) + ' [content truncated]';
+    if (content.length > LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) {
+        content = content.substring(0, LLM_CONSTANTS.CONTENT.MAX_TOTAL_CONTENT_LENGTH) + ' [content truncated]';
     }
 
     return content;
@@ -455,7 +460,7 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
             }
 
             // Clean the content to remove HTML tags and normalize whitespace
-            content = cleanNoteContent(content, note.type, note.mime);
+            content = await cleanNoteContent(content, note.type, note.mime);
         }
     } catch (err) {
         console.error(`Error getting content for note ${noteId}:`, err);
@@ -469,7 +474,7 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
             } else if (['canvas', 'mindMap', 'relationMap', 'mermaid', 'geoMap'].includes(note.type)) {
                 content = extractStructuredContent(rawContent, note.type, note.mime);
             }
-            content = cleanNoteContent(content, note.type, note.mime);
+            content = await cleanNoteContent(content, note.type, note.mime);
         } catch (fallbackErr) {
             console.error(`Fallback content extraction also failed for note ${noteId}:`, fallbackErr);
         }
@@ -968,17 +973,35 @@ async function processNoteWithChunking(
 ): Promise<void> {
     try {
         // Get the context extractor dynamically to avoid circular dependencies
-        const { ContextExtractor } = await import('../../llm/context/index.js');
+        const { ContextExtractor } = await import('../context/index.js');
         const contextExtractor = new ContextExtractor();
 
-        // Get chunks of the note content
-        const chunks = await contextExtractor.getChunkedNoteContent(noteId);
+        // Get note from becca
+        const note = becca.notes[noteId];
+        if (!note) {
+            throw new Error(`Note ${noteId} not found in Becca cache`);
+        }
+
+        // Use semantic chunking for better boundaries
+        const chunks = await contextExtractor.semanticChunking(
+            context.content,
+            note.title,
+            noteId,
+            {
+                // Adjust chunk size based on provider using constants
+                maxChunkSize: provider.name === 'ollama' ?
+                    (await import('../../../routes/api/llm.js')).LLM_CONSTANTS.CHUNKING.OLLAMA_SIZE :
+                    (await import('../../../routes/api/llm.js')).LLM_CONSTANTS.CHUNKING.DEFAULT_SIZE,
+                respectBoundaries: true
+            }
+        );
 
         if (!chunks || chunks.length === 0) {
             // Fall back to single embedding if chunking fails
-            const embedding = await provider.generateNoteEmbeddings(context);
+            const embedding = await provider.generateEmbeddings(context.content);
             const config = provider.getConfig();
             await storeNoteEmbedding(noteId, provider.name, config.model, embedding);
+            log.info(`Generated single embedding for note ${noteId} (${note.title}) since chunking failed`);
             return;
         }
 
@@ -993,23 +1016,19 @@ async function processNoteWithChunking(
         let failedChunks = 0;
         const totalChunks = chunks.length;
         const failedChunkDetails: {index: number, error: string}[] = [];
+        const retryQueue: {index: number, chunk: any}[] = [];
 
-        // Process each chunk with a slight delay to avoid rate limits
+        log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`);
+
+        // Process each chunk with a delay based on provider to avoid rate limits
         for (let i = 0; i < chunks.length; i++) {
             const chunk = chunks[i];
-            const chunkId = `chunk_${i + 1}_of_${chunks.length}`;
-
             try {
-                // Create a modified context object with just this chunk's content
-                const chunkContext: NoteEmbeddingContext = {
-                    ...context,
-                    content: chunk
-                };
+                // Generate embedding for this chunk's content
+                const embedding = await provider.generateEmbeddings(chunk.content);
 
-                // Generate embedding for this chunk
-                const embedding = await provider.generateNoteEmbeddings(chunkContext);
-
-                // Store with chunk information
+                // Store with chunk information in a unique ID format
+                const chunkIdSuffix = `${i + 1}_of_${chunks.length}`;
                 await storeNoteEmbedding(
                     noteId,
                     provider.name,
@@ -1019,9 +1038,10 @@ async function processNoteWithChunking(
 
                 successfulChunks++;
 
-                // Small delay between chunks to avoid rate limits
+                // Small delay between chunks to avoid rate limits - longer for Ollama
                 if (i < chunks.length - 1) {
-                    await new Promise(resolve => setTimeout(resolve, 100));
+                    await new Promise(resolve => setTimeout(resolve,
+                        provider.name === 'ollama' ? 500 : 100));
                 }
             } catch (error: any) {
                 // Track the failure for this specific chunk
@@ -1031,17 +1051,62 @@ async function processNoteWithChunking(
                     error: error.message || 'Unknown error'
                 });
 
-                log.error(`Error processing chunk ${chunkId} for note ${noteId}: ${error.message || 'Unknown error'}`);
+                // Add to retry queue
+                retryQueue.push({
+                    index: i,
+                    chunk: chunk
+                });
+
+                log.error(`Error processing chunk ${i + 1} for note ${noteId}: ${error.message || 'Unknown error'}`);
+            }
+        }
+
+        // Retry failed chunks with exponential backoff
+        if (retryQueue.length > 0 && retryQueue.length < chunks.length) {
+            log.info(`Retrying ${retryQueue.length} failed chunks for note ${noteId}`);
+
+            for (let j = 0; j < retryQueue.length; j++) {
+                const {index, chunk} = retryQueue[j];
+
+                try {
+                    // Wait longer for retries with exponential backoff
+                    await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(1.5, j)));
+
+                    // Retry the embedding
+                    const embedding = await provider.generateEmbeddings(chunk.content);
+
+                    // Store with unique ID that indicates it was a retry
+                    const chunkIdSuffix = `${index + 1}_of_${chunks.length}`;
+                    await storeNoteEmbedding(
+                        noteId,
+                        provider.name,
+                        config.model,
+                        embedding
+                    );
+
+                    // Update counters
+                    successfulChunks++;
+                    failedChunks--;
+
+                    // Remove from failedChunkDetails
+                    const detailIndex = failedChunkDetails.findIndex(d => d.index === index + 1);
+                    if (detailIndex >= 0) {
+                        failedChunkDetails.splice(detailIndex, 1);
+                    }
+                } catch (error: any) {
+                    log.error(`Retry failed for chunk ${index + 1} of note ${noteId}: ${error.message || 'Unknown error'}`);
+                    // Keep failure count as is
+                }
             }
         }
 
         // Log information about the processed chunks
         if (successfulChunks > 0) {
-            log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId}`);
+            log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`);
         }
 
         if (failedChunks > 0) {
-            log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId}`);
+            log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title})`);
         }
 
         // If no chunks were successfully processed, throw an error
diff --git a/src/services/llm/trilium_context_service.ts b/src/services/llm/trilium_context_service.ts
index 312578d9e..2e9c5aa53 100644
--- a/src/services/llm/trilium_context_service.ts
+++ b/src/services/llm/trilium_context_service.ts
@@ -333,12 +333,9 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`;
     }
 
     /**
-     * Build a context string from relevant notes
-     * @param sources - Array of notes
-     * @param query - Original user query
-     * @returns Formatted context string
+     * Build context string from retrieved notes
      */
-    buildContextFromNotes(sources: any[], query: string): string {
+    async buildContextFromNotes(sources: any[], query: string): Promise<string> {
         if (!sources || sources.length === 0) {
             // Return a default context instead of empty string
             return "I am an AI assistant helping you with your Trilium notes. " +
@@ -348,13 +345,46 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`;
 
         let context = `I've found some relevant information in your notes that may help answer: "${query}"\n\n`;
 
+        // Sort sources by similarity if available to prioritize most relevant
+        if (sources[0] && sources[0].similarity !== undefined) {
+            sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
+        }
+
+        // Get provider name to adjust context for different models
+        const providerId = this.provider?.name || 'default';
+        // Get approximate max length based on provider using constants
+        // Import the constants dynamically to avoid circular dependencies
+        const { LLM_CONSTANTS } = await import('../../routes/api/llm.js');
+        const maxTotalLength = providerId === 'ollama' ? LLM_CONSTANTS.CONTEXT_WINDOW.OLLAMA :
+                              providerId === 'openai' ? LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI :
+                              LLM_CONSTANTS.CONTEXT_WINDOW.ANTHROPIC;
+
+        // Track total context length to avoid oversized context
+        let currentLength = context.length;
+        const maxNoteContentLength = Math.min(LLM_CONSTANTS.CONTENT.MAX_NOTE_CONTENT_LENGTH,
+                                   Math.floor(maxTotalLength / Math.max(1, sources.length)));
+
         sources.forEach((source) => {
-            // Use the note title as a meaningful heading
-            context += `### ${source.title}\n`;
+            // Check if adding this source would exceed our total limit
+            if (currentLength >= maxTotalLength) return;
+
+            // Build source section
+            let sourceSection = `### ${source.title}\n`;
 
             // Add relationship context if available
             if (source.parentTitle) {
-                context += `Part of: ${source.parentTitle}\n`;
+                sourceSection += `Part of: ${source.parentTitle}\n`;
+            }
+
+            // Add attributes if available (for better context)
+            if (source.noteId) {
+                const note = becca.notes[source.noteId];
+                if (note) {
+                    const labels = note.getLabels();
+                    if (labels.length > 0) {
+                        sourceSection += `Labels: ${labels.map(l => `#${l.name}${l.value ? '=' + l.value : ''}`).join(' ')}\n`;
+                    }
+                }
             }
 
             if (source.content) {
@@ -362,17 +392,22 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`;
                 let cleanContent = this.sanitizeNoteContent(source.content, source.type, source.mime);
 
                 // Truncate content if it's too long
-                const maxContentLength = 1000;
-                if (cleanContent.length > maxContentLength) {
-                    cleanContent = cleanContent.substring(0, maxContentLength) + " [content truncated due to length]";
+                if (cleanContent.length > maxNoteContentLength) {
+                    cleanContent = cleanContent.substring(0, maxNoteContentLength) + " [content truncated due to length]";
                 }
 
-                context += `${cleanContent}\n`;
+                sourceSection += `${cleanContent}\n`;
             } else {
-                context += "[This note doesn't contain textual content]\n";
+                sourceSection += "[This note doesn't contain textual content]\n";
             }
 
-            context += "\n";
+            sourceSection += "\n";
+
+            // Check if adding this section would exceed total length limit
+            if (currentLength + sourceSection.length <= maxTotalLength) {
+                context += sourceSection;
+                currentLength += sourceSection.length;
+            }
         });
 
         // Add clear instructions about how to reference the notes
@@ -475,7 +510,7 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`;
             }
 
             // Step 3: Build context from the notes
-            const context = this.buildContextFromNotes(relevantNotes, userQuestion);
+            const context = await this.buildContextFromNotes(relevantNotes, userQuestion);
 
             return {
                 context,