do a better job of building the context

2025-11-10 20:21:41 +08:00 · 2025-03-20 19:35:20 +00:00 · 2025-03-20 19:35:20 +00:00 · 1be70f1163
commit 1be70f1163
parent 9c1ab4f322
3 changed files with 254 additions and 19 deletions
--- a/src/routes/api/llm.ts
+++ b/src/routes/api/llm.ts
@ -670,6 +670,12 @@ async function sendMessage(req: Request, res: Response) {
                    content: context
                };

+                // DEBUG: Log context details before sending to LLM
+                log.info(`CONTEXT BEING SENT TO LLM: ${context.length} chars`);
+                log.info(`Context begins with: "${context.substring(0, 200)}..."`);
+                log.info(`Context ends with: "...${context.substring(context.length - 200)}"`);
+                log.info(`Number of notes included: ${sourceNotes.length}`);
+
                // Format all messages for the AI (advanced context case)
                const aiMessages: Message[] = [
                    contextMessage,
@ -679,6 +685,12 @@ async function sendMessage(req: Request, res: Response) {
                    }))
                ];

+                // DEBUG: Log message structure being sent to LLM
+                log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`);
+                aiMessages.forEach((msg, idx) => {
+                    log.info(`Message ${idx}: role=${msg.role}, content length=${msg.content.length} chars, begins with: "${msg.content.substring(0, 50)}..."`);
+                });
+
                // Configure chat options from session metadata
                const chatOptions: ChatCompletionOptions = {
                    temperature: session.metadata.temperature || 0.7,
--- a/src/services/llm/context/modules/context_formatter.ts
+++ b/src/services/llm/context/modules/context_formatter.ts
@ -36,6 +36,10 @@ export class ContextFormatter {
                providerId === 'ollama' ? CONTEXT_WINDOW.OLLAMA :
                CONTEXT_WINDOW.DEFAULT;

+            // DEBUG: Log context window size
+            log.info(`Context window for provider ${providerId}: ${maxTotalLength} chars`);
+            log.info(`Building context from ${sources.length} sources for query: "${query.substring(0, 50)}..."`);
+
            // Use a format appropriate for the model family
            const isAnthropicFormat = providerId === 'anthropic';

@ -47,24 +51,35 @@ export class ContextFormatter {
            // Sort sources by similarity if available to prioritize most relevant
            if (sources[0] && sources[0].similarity !== undefined) {
                sources = [...sources].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
+                // DEBUG: Log sorting information
+                log.info(`Sources sorted by similarity. Top sources: ${sources.slice(0, 3).map(s => s.title || 'Untitled').join(', ')}`);
            }

            // Track total size to avoid exceeding model context window
            let totalSize = context.length;
            const formattedSources: string[] = [];

+            // DEBUG: Track stats for logging
+            let sourcesProcessed = 0;
+            let sourcesIncluded = 0;
+            let sourcesSkipped = 0;
+            let sourcesExceededLimit = 0;
+
            // Process each source
            for (const source of sources) {
+                sourcesProcessed++;
                let content = '';
                if (typeof source === 'string') {
                    content = source;
                } else if (source.content) {
                    content = this.sanitizeNoteContent(source.content, source.type, source.mime);
                } else {
+                    sourcesSkipped++;
                    continue; // Skip invalid sources
                }

                if (!content || content.trim().length === 0) {
+                    sourcesSkipped++;
                    continue;
                }

@ -75,6 +90,7 @@ export class ContextFormatter {

                // Check if adding this would exceed our size limit
                if (totalSize + formattedSource.length > maxTotalLength) {
+                    sourcesExceededLimit++;
                    // If this is the first source, include a truncated version
                    if (formattedSources.length === 0) {
                        const availableSpace = maxTotalLength - totalSize - 100; // Buffer for closing text
@ -82,6 +98,9 @@ export class ContextFormatter {
                            const truncatedContent = `### ${title}\n${content.substring(0, availableSpace)}...\n`;
                            formattedSources.push(truncatedContent);
                            totalSize += truncatedContent.length;
+                            sourcesIncluded++;
+                            // DEBUG: Log truncation
+                            log.info(`Truncated first source "${title}" to fit in context window. Used ${truncatedContent.length} of ${formattedSource.length} chars`);
                        }
                    }
                    break;
@ -89,8 +108,13 @@ export class ContextFormatter {

                formattedSources.push(formattedSource);
                totalSize += formattedSource.length;
+                sourcesIncluded++;
            }

+            // DEBUG: Log sources stats
+            log.info(`Context building stats: processed ${sourcesProcessed}/${sources.length} sources, included ${sourcesIncluded}, skipped ${sourcesSkipped}, exceeded limit ${sourcesExceededLimit}`);
+            log.info(`Context size so far: ${totalSize}/${maxTotalLength} chars (${(totalSize/maxTotalLength*100).toFixed(2)}% of limit)`);
+
            // Add the formatted sources to the context
            context += formattedSources.join('\n');

@ -104,6 +128,9 @@ export class ContextFormatter {
                context += closing;
            }

+            // DEBUG: Log final context size
+            log.info(`Final context: ${context.length} chars, ${formattedSources.length} sources included`);
+
            return context;
        } catch (error) {
            log.error(`Error building context from notes: ${error}`);
--- a/src/services/llm/context/modules/context_service.ts
+++ b/src/services/llm/context/modules/context_service.ts
@ -134,7 +134,7 @@ export class ContextService {
                // Convert map to array and limit to top results
                relevantNotes = Array.from(allResults.values())
                    .sort((a, b) => b.similarity - a.similarity)
-                    .slice(0, 8); // Get top 8 notes
+                    .slice(0, 20); // Increased from 8 to 20 notes
            } catch (error) {
                log.error(`Error finding relevant notes: ${error}`);
                // Continue with empty notes list
@ -145,6 +145,9 @@ export class ContextService {
            const providerId = provider?.name || 'default';
            const context = await contextFormatter.buildContextFromNotes(relevantNotes, userQuestion, providerId);

+            // DEBUG: Log the initial context built from notes
+            log.info(`Initial context from buildContextFromNotes: ${context.length} chars, starting with: "${context.substring(0, 150)}..."`);
+
            // Step 4: Add agent tools context with thinking process if requested
            let enhancedContext = context;
            try {
@ -162,6 +165,9 @@ export class ContextService {
                if (agentContext) {
                    enhancedContext = enhancedContext + "\n\n" + agentContext;
                }
+
+                // DEBUG: Log the final combined context
+                log.info(`FINAL COMBINED CONTEXT: ${enhancedContext.length} chars, with content structure: ${this.summarizeContextStructure(enhancedContext)}`);
            } catch (error) {
                log.error(`Error getting agent tools context: ${error}`);
                // Continue with the basic context
@ -372,31 +378,89 @@ export class ContextService {
                log.error(`Error adding note structure to context: ${error}`);
            }

-            // Add most relevant notes from search results
-            const allSearchResults = searchResults.flatMap(r => r.results);
+            // Combine the notes from both searches - the initial relevantNotes and from vector search
+            // Start with a Map to deduplicate by noteId
+            const allNotes = new Map<string, any>();

-            // Deduplicate results by noteId
-            const uniqueResults = new Map();
-            for (const result of allSearchResults) {
-                if (!uniqueResults.has(result.noteId) || uniqueResults.get(result.noteId).similarity < result.similarity) {
-                    uniqueResults.set(result.noteId, result);
+            // Add notes from the initial search in processQuery (relevantNotes parameter)
+            if (relevantNotes && relevantNotes.length > 0) {
+                log.info(`Adding ${relevantNotes.length} notes from initial search to combined results`);
+                for (const note of relevantNotes) {
+                    if (note.noteId) {
+                        allNotes.set(note.noteId, note);
+                    }
                }
            }

-            // Sort by similarity
-            const sortedResults = Array.from(uniqueResults.values())
-                .sort((a, b) => b.similarity - a.similarity)
-                .slice(0, 10);  // Get top 10 unique results
+            // Add notes from vector search of sub-queries
+            const vectorSearchNotes = searchResults.flatMap(r => r.results);
+            if (vectorSearchNotes.length > 0) {
+                log.info(`Adding ${vectorSearchNotes.length} notes from vector search to combined results`);
+                for (const note of vectorSearchNotes) {
+                    // If note already exists, keep the one with higher similarity
+                    if (!allNotes.has(note.noteId) || note.similarity > allNotes.get(note.noteId).similarity) {
+                        allNotes.set(note.noteId, note);
+                    }
+                }
+            }

-            if (sortedResults.length > 0) {
-                agentContext += `## Relevant Information\n`;
+            // Convert the combined Map to an array and sort by similarity
+            const combinedNotes = Array.from(allNotes.values())
+                .sort((a, b) => b.similarity - a.similarity);

-                for (const result of sortedResults) {
-                    agentContext += `### ${result.title}\n`;
+            log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`);

-                    if (result.content) {
-                        // Limit content to 500 chars per note to avoid token explosion
-                        agentContext += `${result.content.substring(0, 500)}${result.content.length > 500 ? '...' : ''}\n\n`;
+            // Filter for Qu-related notes
+            const quNotes = combinedNotes.filter(result =>
+                result.title.toLowerCase().includes('qu') ||
+                (result.content && result.content.toLowerCase().includes('qu'))
+            );
+
+            if (quNotes.length > 0) {
+                log.info(`Found ${quNotes.length} Qu-related notes out of ${combinedNotes.length} total notes`);
+                quNotes.forEach((note, idx) => {
+                    if (idx < 3) { // Log just a sample to avoid log spam
+                        log.info(`Qu note ${idx+1}: "${note.title}" (similarity: ${Math.round(note.similarity * 100)}%), content length: ${note.content ? note.content.length : 0} chars`);
+                    }
+                });
+
+                // Prioritize Qu notes first, then other notes by similarity
+                const nonQuNotes = combinedNotes.filter(note => !quNotes.includes(note));
+                const finalNotes = [...quNotes, ...nonQuNotes].slice(0, 30); // Take top 30 prioritized notes
+
+                log.info(`Selected ${finalNotes.length} notes for context, with ${quNotes.length} Qu-related notes prioritized`);
+
+                // Add the selected notes to the context
+                if (finalNotes.length > 0) {
+                    agentContext += `## Relevant Information\n`;
+
+                    for (const note of finalNotes) {
+                        agentContext += `### ${note.title}\n`;
+
+                        if (note.content) {
+                            // Extract relevant content instead of just taking first 2000 chars
+                            const relevantContent = await this.extractRelevantContent(note.content, query, 2000);
+                            agentContext += `${relevantContent}\n\n`;
+                        }
+                    }
+                }
+            } else {
+                log.info(`No Qu-related notes found among the ${combinedNotes.length} combined notes`);
+
+                // Just take the top notes by similarity
+                const finalNotes = combinedNotes.slice(0, 30); // Take top 30 notes
+
+                if (finalNotes.length > 0) {
+                    agentContext += `## Relevant Information\n`;
+
+                    for (const note of finalNotes) {
+                        agentContext += `### ${note.title}\n`;
+
+                        if (note.content) {
+                            // Extract relevant content instead of just taking first 2000 chars
+                            const relevantContent = await this.extractRelevantContent(note.content, query, 2000);
+                            agentContext += `${relevantContent}\n\n`;
+                        }
                    }
                }
            }
@ -415,6 +479,15 @@ export class ContextService {
            // Log stats about the context
            log.info(`Agent tools context built: ${agentContext.length} chars, ${agentContext.split('\n').length} lines`);

+            // DEBUG: Log more detailed information about the agent tools context content
+            log.info(`Agent tools context content structure: ${this.summarizeContextStructure(agentContext)}`);
+            if (agentContext.length < 1000) {
+                log.info(`Agent tools context full content (short): ${agentContext}`);
+            } else {
+                log.info(`Agent tools context first 500 chars: ${agentContext.substring(0, 500)}...`);
+                log.info(`Agent tools context last 500 chars: ${agentContext.substring(agentContext.length - 500)}`);
+            }
+
            return agentContext;
        } catch (error) {
            log.error(`Error getting agent tools context: ${error}`);
@ -422,6 +495,31 @@ export class ContextService {
        }
    }

+    /**
+     * Summarize the structure of a context string for debugging
+     * @param context - The context string to summarize
+     * @returns A summary of the context structure
+     */
+    private summarizeContextStructure(context: string): string {
+        if (!context) return "Empty context";
+
+        // Count sections and headers
+        const sections = context.split('##').length - 1;
+        const subSections = context.split('###').length - 1;
+
+        // Count notes referenced
+        const noteMatches = context.match(/### [^\n]+/g);
+        const noteCount = noteMatches ? noteMatches.length : 0;
+
+        // Extract note titles if present
+        let noteTitles = "";
+        if (noteMatches && noteMatches.length > 0) {
+            noteTitles = ` Note titles: ${noteMatches.slice(0, 3).map(m => m.substring(4)).join(', ')}${noteMatches.length > 3 ? '...' : ''}`;
+        }
+
+        return `${sections} main sections, ${subSections} subsections, ${noteCount} notes referenced.${noteTitles}`;
+    }
+
    /**
     * Get semantic context for a note and query
     *
@ -586,6 +684,104 @@ export class ContextService {
    clearCaches(): void {
        cacheManager.clearAllCaches();
    }
+
+    /**
+     * Extract the most relevant portions from a note's content
+     * @param content - The full note content
+     * @param query - The user's query
+     * @param maxChars - Maximum characters to include
+     * @returns The most relevant content sections
+     */
+    private async extractRelevantContent(content: string, query: string, maxChars: number = 2000): Promise<string> {
+        if (!content || content.length <= maxChars) {
+            return content; // Return full content if it's already short enough
+        }
+
+        try {
+            // Get the vector search tool for relevance calculation
+            const agentManager = aiServiceManager.getInstance();
+            const vectorSearchTool = agentManager.getVectorSearchTool();
+
+            // Split content into chunks of reasonable size (300-500 chars with overlap)
+            const chunkSize = 400;
+            const overlap = 100;
+            const chunks: string[] = [];
+
+            for (let i = 0; i < content.length; i += (chunkSize - overlap)) {
+                const end = Math.min(i + chunkSize, content.length);
+                chunks.push(content.substring(i, end));
+                if (end === content.length) break;
+            }
+
+            log.info(`Split note content into ${chunks.length} chunks for relevance extraction`);
+
+            // Get embedding provider from service
+            const provider = await providerManager.getPreferredEmbeddingProvider();
+            if (!provider) {
+                throw new Error("No embedding provider available");
+            }
+
+            // Get embeddings for the query and all chunks
+            const queryEmbedding = await provider.createEmbedding(query);
+
+            // Process chunks in smaller batches to avoid overwhelming the provider
+            const batchSize = 5;
+            const chunkEmbeddings = [];
+
+            for (let i = 0; i < chunks.length; i += batchSize) {
+                const batch = chunks.slice(i, i + batchSize);
+                const batchEmbeddings = await Promise.all(
+                    batch.map(chunk => provider.createEmbedding(chunk))
+                );
+                chunkEmbeddings.push(...batchEmbeddings);
+            }
+
+            // Calculate similarity between query and each chunk
+            const similarities: Array<{index: number, similarity: number, content: string}> =
+                chunkEmbeddings.map((embedding, index) => {
+                    const similarity = provider.calculateSimilarity(queryEmbedding, embedding);
+                    return { index, similarity, content: chunks[index] };
+                });
+
+            // Sort chunks by similarity (most relevant first)
+            similarities.sort((a, b) => b.similarity - a.similarity);
+
+            // DEBUG: Log some info about the top chunks
+            log.info(`Top 3 most relevant chunks for query "${query.substring(0, 30)}..." (out of ${chunks.length} total):`);
+            similarities.slice(0, 3).forEach((chunk, idx) => {
+                log.info(`  Chunk ${idx+1}: Similarity ${Math.round(chunk.similarity * 100)}%, Content: "${chunk.content.substring(0, 50)}..."`);
+            });
+
+            // Take the most relevant chunks up to maxChars
+            let result = '';
+            let totalChars = 0;
+            let chunksIncluded = 0;
+
+            for (const chunk of similarities) {
+                if (totalChars + chunk.content.length > maxChars) {
+                    // If adding full chunk would exceed limit, add as much as possible
+                    const remainingSpace = maxChars - totalChars;
+                    if (remainingSpace > 100) { // Only add if we can include something meaningful
+                        result += `\n...\n${chunk.content.substring(0, remainingSpace)}...`;
+                        log.info(`  Added partial chunk with similarity ${Math.round(chunk.similarity * 100)}% (${remainingSpace} chars)`);
+                    }
+                    break;
+                }
+
+                if (result.length > 0) result += '\n...\n';
+                result += chunk.content;
+                totalChars += chunk.content.length;
+                chunksIncluded++;
+            }
+
+            log.info(`Extracted ${totalChars} chars of relevant content from ${content.length} chars total (${chunksIncluded} chunks included)`);
+            return result;
+        } catch (error) {
+            log.error(`Error extracting relevant content: ${error}`);
+            // Fallback to simple truncation if extraction fails
+            return content.substring(0, maxChars) + '...';
+        }
+    }
 }

 // Export singleton instance