diff --git a/src/services/llm/context/modules/context_formatter.ts b/src/services/llm/context/modules/context_formatter.ts index 995fbbffd..0e89952dd 100644 --- a/src/services/llm/context/modules/context_formatter.ts +++ b/src/services/llm/context/modules/context_formatter.ts @@ -75,11 +75,14 @@ export class ContextFormatter { content = this.sanitizeNoteContent(source.content, source.type, source.mime); } else { sourcesSkipped++; + log.info(`Skipping note with no content: ${source.title || 'Untitled'}`); continue; // Skip invalid sources } - if (!content || content.trim().length === 0) { + // Skip if content is empty or just whitespace/minimal + if (!content || content.trim().length <= 10) { sourcesSkipped++; + log.info(`Skipping note with minimal content: ${source.title || 'Untitled'}`); continue; } diff --git a/src/services/llm/context/modules/context_service.ts b/src/services/llm/context/modules/context_service.ts index 5774bdf6a..dde3cd49c 100644 --- a/src/services/llm/context/modules/context_service.ts +++ b/src/services/llm/context/modules/context_service.ts @@ -133,8 +133,18 @@ export class ContextService { // Convert map to array and limit to top results relevantNotes = Array.from(allResults.values()) + .filter(note => { + // Filter out notes with no content or very minimal content (less than 10 chars) + const hasContent = note.content && note.content.trim().length > 10; + if (!hasContent) { + log.info(`Filtering out empty/minimal note: "${note.title}" (${note.noteId})`); + } + return hasContent; + }) .sort((a, b) => b.similarity - a.similarity) .slice(0, 20); // Increased from 8 to 20 notes + + log.info(`After filtering out empty notes, ${relevantNotes.length} relevant notes remain`); } catch (error) { log.error(`Error finding relevant notes: ${error}`); // Continue with empty notes list @@ -406,9 +416,17 @@ export class ContextService { // Convert the combined Map to an array and sort by similarity const combinedNotes = Array.from(allNotes.values()) + .filter(note => { + // Filter out notes with no content or very minimal content + const hasContent = note.content && note.content.trim().length > 10; + if (!hasContent) { + log.info(`Filtering out empty/minimal note from combined results: "${note.title}" (${note.noteId})`); + } + return hasContent; + }) .sort((a, b) => b.similarity - a.similarity); - log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`); + log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes after filtering out empty notes`); // Filter for Qu-related notes const quNotes = combinedNotes.filter(result => diff --git a/src/services/llm/context/modules/semantic_search.ts b/src/services/llm/context/modules/semantic_search.ts index e95c07cff..88a4e5e4d 100644 --- a/src/services/llm/context/modules/semantic_search.ts +++ b/src/services/llm/context/modules/semantic_search.ts @@ -165,23 +165,48 @@ export class SemanticSearch { // Get note content const content = await this.contextExtractor.getNoteContent(result.noteId); + // Adjust similarity score based on content quality + let adjustedSimilarity = result.similarity; + + // Penalize notes with empty or minimal content + if (!content || content.trim().length <= 10) { + // Reduce similarity by 80% for empty/minimal notes + adjustedSimilarity *= 0.2; + log.info(`Adjusting similarity for empty/minimal note "${note.title}" from ${Math.round(result.similarity * 100)}% to ${Math.round(adjustedSimilarity * 100)}%`); + } + // Slightly boost notes with substantial content + else if (content.length > 100) { + // Small boost of 10% for notes with substantial content + adjustedSimilarity = Math.min(1.0, adjustedSimilarity * 1.1); + } + return { noteId: result.noteId, title: note.title, content, - similarity: result.similarity + similarity: adjustedSimilarity }; }) ); // Filter out null results - const filteredResults = enrichedResults.filter(Boolean) as { + const filteredResults = enrichedResults.filter(result => { + // Filter out null results and notes with empty or minimal content + if (!result) return false; + + // Instead of hard filtering by content length, now we use an adjusted + // similarity score, but we can still filter extremely low scores + return result.similarity > 0.2; + }) as { noteId: string, title: string, content: string | null, similarity: number }[]; + // Sort results by adjusted similarity + filteredResults.sort((a, b) => b.similarity - a.similarity); + // Cache results cacheManager.storeQueryResults(cacheKey, filteredResults); @@ -224,48 +249,17 @@ export class SemanticSearch { const model = provider.getConfig().model || ''; const providerName = provider.name; - // Check if vectorStore has the findSimilarNotesInSet method - if (typeof vectorStore.findSimilarNotesInSet === 'function') { - // Use the dedicated method if available - return await vectorStore.findSimilarNotesInSet( - embedding, - noteIds, - providerName, - model, - limit - ); - } - - // Fallback: Manually search through the notes in the subtree - const similarities: {noteId: string, similarity: number}[] = []; - - for (const noteId of noteIds) { - try { - const noteEmbedding = await vectorStore.getEmbeddingForNote( - noteId, - providerName, - model - ); - - if (noteEmbedding && noteEmbedding.embedding) { - const similarity = cosineSimilarity(embedding, noteEmbedding.embedding); - if (similarity > 0.5) { // Apply a similarity threshold - similarities.push({ - noteId, - similarity - }); - } - } - } catch (error) { - // Skip notes that don't have embeddings - continue; - } - } - - // Sort by similarity and return top results - return similarities - .sort((a, b) => b.similarity - a.similarity) - .slice(0, limit); + // Use vectorStore to find similar notes within this subset + // Ideally we'd have a method to find within a specific set, but we'll use the general findSimilarNotes + return await vectorStore.findSimilarNotes( + embedding, + providerName, + model, + limit + ).then(results => { + // Filter to only include notes within our noteIds set + return results.filter(result => noteIds.includes(result.noteId)); + }); } catch (error) { log.error(`Error finding notes in branch: ${error}`); return []; diff --git a/src/services/llm/prompts/base_system_prompt.md b/src/services/llm/prompts/base_system_prompt.md index 6559620c6..f509d8154 100644 --- a/src/services/llm/prompts/base_system_prompt.md +++ b/src/services/llm/prompts/base_system_prompt.md @@ -10,7 +10,7 @@ You are an AI assistant integrated into TriliumNext Notes, a powerful note-takin - Relation maps for visualizing connections between notes - Synchronization between devices -Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features. +Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features. Be sure to summarize the notes and include the title of the notes when providing a summary. When responding to queries: 1. For complex queries, decompose them into simpler parts and address each one @@ -29,4 +29,5 @@ When responding to queries: 14. When suggesting improvements to a user's note organization or structure, present these as optional enhancements rather than corrections 15. Maintain a helpful, knowledgeable tone focused on enhancing the user's knowledge management experience 16. Frame responses as collaborative assistance rather than authoritative instruction -17. Instead of telling a user on what Notes they have, provide them an overview of the notes and include the title of the notes when providing the overview. \ No newline at end of file +17. Instead of telling a user on what Notes they have, summarize the notes and include the title of the notes when providing a summary. +18. \ No newline at end of file