more aggressively filter notes out that don't work for us

This commit is contained in:
perf3ct 2025-03-20 19:42:38 +00:00
parent 1be70f1163
commit 915c95f7cb
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 64 additions and 48 deletions

View File

@ -75,11 +75,14 @@ export class ContextFormatter {
content = this.sanitizeNoteContent(source.content, source.type, source.mime); content = this.sanitizeNoteContent(source.content, source.type, source.mime);
} else { } else {
sourcesSkipped++; sourcesSkipped++;
log.info(`Skipping note with no content: ${source.title || 'Untitled'}`);
continue; // Skip invalid sources continue; // Skip invalid sources
} }
if (!content || content.trim().length === 0) { // Skip if content is empty or just whitespace/minimal
if (!content || content.trim().length <= 10) {
sourcesSkipped++; sourcesSkipped++;
log.info(`Skipping note with minimal content: ${source.title || 'Untitled'}`);
continue; continue;
} }

View File

@ -133,8 +133,18 @@ export class ContextService {
// Convert map to array and limit to top results // Convert map to array and limit to top results
relevantNotes = Array.from(allResults.values()) relevantNotes = Array.from(allResults.values())
.filter(note => {
// Filter out notes with no content or very minimal content (less than 10 chars)
const hasContent = note.content && note.content.trim().length > 10;
if (!hasContent) {
log.info(`Filtering out empty/minimal note: "${note.title}" (${note.noteId})`);
}
return hasContent;
})
.sort((a, b) => b.similarity - a.similarity) .sort((a, b) => b.similarity - a.similarity)
.slice(0, 20); // Increased from 8 to 20 notes .slice(0, 20); // Increased from 8 to 20 notes
log.info(`After filtering out empty notes, ${relevantNotes.length} relevant notes remain`);
} catch (error) { } catch (error) {
log.error(`Error finding relevant notes: ${error}`); log.error(`Error finding relevant notes: ${error}`);
// Continue with empty notes list // Continue with empty notes list
@ -406,9 +416,17 @@ export class ContextService {
// Convert the combined Map to an array and sort by similarity // Convert the combined Map to an array and sort by similarity
const combinedNotes = Array.from(allNotes.values()) const combinedNotes = Array.from(allNotes.values())
.filter(note => {
// Filter out notes with no content or very minimal content
const hasContent = note.content && note.content.trim().length > 10;
if (!hasContent) {
log.info(`Filtering out empty/minimal note from combined results: "${note.title}" (${note.noteId})`);
}
return hasContent;
})
.sort((a, b) => b.similarity - a.similarity); .sort((a, b) => b.similarity - a.similarity);
log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`); log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes after filtering out empty notes`);
// Filter for Qu-related notes // Filter for Qu-related notes
const quNotes = combinedNotes.filter(result => const quNotes = combinedNotes.filter(result =>

View File

@ -165,23 +165,48 @@ export class SemanticSearch {
// Get note content // Get note content
const content = await this.contextExtractor.getNoteContent(result.noteId); const content = await this.contextExtractor.getNoteContent(result.noteId);
// Adjust similarity score based on content quality
let adjustedSimilarity = result.similarity;
// Penalize notes with empty or minimal content
if (!content || content.trim().length <= 10) {
// Reduce similarity by 80% for empty/minimal notes
adjustedSimilarity *= 0.2;
log.info(`Adjusting similarity for empty/minimal note "${note.title}" from ${Math.round(result.similarity * 100)}% to ${Math.round(adjustedSimilarity * 100)}%`);
}
// Slightly boost notes with substantial content
else if (content.length > 100) {
// Small boost of 10% for notes with substantial content
adjustedSimilarity = Math.min(1.0, adjustedSimilarity * 1.1);
}
return { return {
noteId: result.noteId, noteId: result.noteId,
title: note.title, title: note.title,
content, content,
similarity: result.similarity similarity: adjustedSimilarity
}; };
}) })
); );
// Filter out null results // Filter out null results
const filteredResults = enrichedResults.filter(Boolean) as { const filteredResults = enrichedResults.filter(result => {
// Filter out null results and notes with empty or minimal content
if (!result) return false;
// Instead of hard filtering by content length, now we use an adjusted
// similarity score, but we can still filter extremely low scores
return result.similarity > 0.2;
}) as {
noteId: string, noteId: string,
title: string, title: string,
content: string | null, content: string | null,
similarity: number similarity: number
}[]; }[];
// Sort results by adjusted similarity
filteredResults.sort((a, b) => b.similarity - a.similarity);
// Cache results // Cache results
cacheManager.storeQueryResults(cacheKey, filteredResults); cacheManager.storeQueryResults(cacheKey, filteredResults);
@ -224,48 +249,17 @@ export class SemanticSearch {
const model = provider.getConfig().model || ''; const model = provider.getConfig().model || '';
const providerName = provider.name; const providerName = provider.name;
// Check if vectorStore has the findSimilarNotesInSet method // Use vectorStore to find similar notes within this subset
if (typeof vectorStore.findSimilarNotesInSet === 'function') { // Ideally we'd have a method to find within a specific set, but we'll use the general findSimilarNotes
// Use the dedicated method if available return await vectorStore.findSimilarNotes(
return await vectorStore.findSimilarNotesInSet(
embedding, embedding,
noteIds,
providerName, providerName,
model, model,
limit limit
); ).then(results => {
} // Filter to only include notes within our noteIds set
return results.filter(result => noteIds.includes(result.noteId));
// Fallback: Manually search through the notes in the subtree
const similarities: {noteId: string, similarity: number}[] = [];
for (const noteId of noteIds) {
try {
const noteEmbedding = await vectorStore.getEmbeddingForNote(
noteId,
providerName,
model
);
if (noteEmbedding && noteEmbedding.embedding) {
const similarity = cosineSimilarity(embedding, noteEmbedding.embedding);
if (similarity > 0.5) { // Apply a similarity threshold
similarities.push({
noteId,
similarity
}); });
}
}
} catch (error) {
// Skip notes that don't have embeddings
continue;
}
}
// Sort by similarity and return top results
return similarities
.sort((a, b) => b.similarity - a.similarity)
.slice(0, limit);
} catch (error) { } catch (error) {
log.error(`Error finding notes in branch: ${error}`); log.error(`Error finding notes in branch: ${error}`);
return []; return [];

View File

@ -10,7 +10,7 @@ You are an AI assistant integrated into TriliumNext Notes, a powerful note-takin
- Relation maps for visualizing connections between notes - Relation maps for visualizing connections between notes
- Synchronization between devices - Synchronization between devices
Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features. Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features. Be sure to summarize the notes and include the title of the notes when providing a summary.
When responding to queries: When responding to queries:
1. For complex queries, decompose them into simpler parts and address each one 1. For complex queries, decompose them into simpler parts and address each one
@ -29,4 +29,5 @@ When responding to queries:
14. When suggesting improvements to a user's note organization or structure, present these as optional enhancements rather than corrections 14. When suggesting improvements to a user's note organization or structure, present these as optional enhancements rather than corrections
15. Maintain a helpful, knowledgeable tone focused on enhancing the user's knowledge management experience 15. Maintain a helpful, knowledgeable tone focused on enhancing the user's knowledge management experience
16. Frame responses as collaborative assistance rather than authoritative instruction 16. Frame responses as collaborative assistance rather than authoritative instruction
17. Instead of telling a user on what Notes they have, provide them an overview of the notes and include the title of the notes when providing the overview. 17. Instead of telling a user on what Notes they have, summarize the notes and include the title of the notes when providing a summary.
18.