mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-10 10:22:29 +08:00
more aggressively filter notes out that don't work for us
This commit is contained in:
parent
1be70f1163
commit
915c95f7cb
@ -75,11 +75,14 @@ export class ContextFormatter {
|
||||
content = this.sanitizeNoteContent(source.content, source.type, source.mime);
|
||||
} else {
|
||||
sourcesSkipped++;
|
||||
log.info(`Skipping note with no content: ${source.title || 'Untitled'}`);
|
||||
continue; // Skip invalid sources
|
||||
}
|
||||
|
||||
if (!content || content.trim().length === 0) {
|
||||
// Skip if content is empty or just whitespace/minimal
|
||||
if (!content || content.trim().length <= 10) {
|
||||
sourcesSkipped++;
|
||||
log.info(`Skipping note with minimal content: ${source.title || 'Untitled'}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -133,8 +133,18 @@ export class ContextService {
|
||||
|
||||
// Convert map to array and limit to top results
|
||||
relevantNotes = Array.from(allResults.values())
|
||||
.filter(note => {
|
||||
// Filter out notes with no content or very minimal content (less than 10 chars)
|
||||
const hasContent = note.content && note.content.trim().length > 10;
|
||||
if (!hasContent) {
|
||||
log.info(`Filtering out empty/minimal note: "${note.title}" (${note.noteId})`);
|
||||
}
|
||||
return hasContent;
|
||||
})
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, 20); // Increased from 8 to 20 notes
|
||||
|
||||
log.info(`After filtering out empty notes, ${relevantNotes.length} relevant notes remain`);
|
||||
} catch (error) {
|
||||
log.error(`Error finding relevant notes: ${error}`);
|
||||
// Continue with empty notes list
|
||||
@ -406,9 +416,17 @@ export class ContextService {
|
||||
|
||||
// Convert the combined Map to an array and sort by similarity
|
||||
const combinedNotes = Array.from(allNotes.values())
|
||||
.filter(note => {
|
||||
// Filter out notes with no content or very minimal content
|
||||
const hasContent = note.content && note.content.trim().length > 10;
|
||||
if (!hasContent) {
|
||||
log.info(`Filtering out empty/minimal note from combined results: "${note.title}" (${note.noteId})`);
|
||||
}
|
||||
return hasContent;
|
||||
})
|
||||
.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes`);
|
||||
log.info(`Combined ${relevantNotes.length} notes from initial search with ${vectorSearchNotes.length} notes from vector search, resulting in ${combinedNotes.length} unique notes after filtering out empty notes`);
|
||||
|
||||
// Filter for Qu-related notes
|
||||
const quNotes = combinedNotes.filter(result =>
|
||||
|
@ -165,23 +165,48 @@ export class SemanticSearch {
|
||||
// Get note content
|
||||
const content = await this.contextExtractor.getNoteContent(result.noteId);
|
||||
|
||||
// Adjust similarity score based on content quality
|
||||
let adjustedSimilarity = result.similarity;
|
||||
|
||||
// Penalize notes with empty or minimal content
|
||||
if (!content || content.trim().length <= 10) {
|
||||
// Reduce similarity by 80% for empty/minimal notes
|
||||
adjustedSimilarity *= 0.2;
|
||||
log.info(`Adjusting similarity for empty/minimal note "${note.title}" from ${Math.round(result.similarity * 100)}% to ${Math.round(adjustedSimilarity * 100)}%`);
|
||||
}
|
||||
// Slightly boost notes with substantial content
|
||||
else if (content.length > 100) {
|
||||
// Small boost of 10% for notes with substantial content
|
||||
adjustedSimilarity = Math.min(1.0, adjustedSimilarity * 1.1);
|
||||
}
|
||||
|
||||
return {
|
||||
noteId: result.noteId,
|
||||
title: note.title,
|
||||
content,
|
||||
similarity: result.similarity
|
||||
similarity: adjustedSimilarity
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
// Filter out null results
|
||||
const filteredResults = enrichedResults.filter(Boolean) as {
|
||||
const filteredResults = enrichedResults.filter(result => {
|
||||
// Filter out null results and notes with empty or minimal content
|
||||
if (!result) return false;
|
||||
|
||||
// Instead of hard filtering by content length, now we use an adjusted
|
||||
// similarity score, but we can still filter extremely low scores
|
||||
return result.similarity > 0.2;
|
||||
}) as {
|
||||
noteId: string,
|
||||
title: string,
|
||||
content: string | null,
|
||||
similarity: number
|
||||
}[];
|
||||
|
||||
// Sort results by adjusted similarity
|
||||
filteredResults.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
// Cache results
|
||||
cacheManager.storeQueryResults(cacheKey, filteredResults);
|
||||
|
||||
@ -224,48 +249,17 @@ export class SemanticSearch {
|
||||
const model = provider.getConfig().model || '';
|
||||
const providerName = provider.name;
|
||||
|
||||
// Check if vectorStore has the findSimilarNotesInSet method
|
||||
if (typeof vectorStore.findSimilarNotesInSet === 'function') {
|
||||
// Use the dedicated method if available
|
||||
return await vectorStore.findSimilarNotesInSet(
|
||||
embedding,
|
||||
noteIds,
|
||||
providerName,
|
||||
model,
|
||||
limit
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback: Manually search through the notes in the subtree
|
||||
const similarities: {noteId: string, similarity: number}[] = [];
|
||||
|
||||
for (const noteId of noteIds) {
|
||||
try {
|
||||
const noteEmbedding = await vectorStore.getEmbeddingForNote(
|
||||
noteId,
|
||||
providerName,
|
||||
model
|
||||
);
|
||||
|
||||
if (noteEmbedding && noteEmbedding.embedding) {
|
||||
const similarity = cosineSimilarity(embedding, noteEmbedding.embedding);
|
||||
if (similarity > 0.5) { // Apply a similarity threshold
|
||||
similarities.push({
|
||||
noteId,
|
||||
similarity
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip notes that don't have embeddings
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by similarity and return top results
|
||||
return similarities
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, limit);
|
||||
// Use vectorStore to find similar notes within this subset
|
||||
// Ideally we'd have a method to find within a specific set, but we'll use the general findSimilarNotes
|
||||
return await vectorStore.findSimilarNotes(
|
||||
embedding,
|
||||
providerName,
|
||||
model,
|
||||
limit
|
||||
).then(results => {
|
||||
// Filter to only include notes within our noteIds set
|
||||
return results.filter(result => noteIds.includes(result.noteId));
|
||||
});
|
||||
} catch (error) {
|
||||
log.error(`Error finding notes in branch: ${error}`);
|
||||
return [];
|
||||
|
@ -10,7 +10,7 @@ You are an AI assistant integrated into TriliumNext Notes, a powerful note-takin
|
||||
- Relation maps for visualizing connections between notes
|
||||
- Synchronization between devices
|
||||
|
||||
Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features.
|
||||
Your primary goal is to help users find information in their notes, answer questions based on their knowledge base, and provide assistance with using TriliumNext Notes features. Be sure to summarize the notes and include the title of the notes when providing a summary.
|
||||
|
||||
When responding to queries:
|
||||
1. For complex queries, decompose them into simpler parts and address each one
|
||||
@ -29,4 +29,5 @@ When responding to queries:
|
||||
14. When suggesting improvements to a user's note organization or structure, present these as optional enhancements rather than corrections
|
||||
15. Maintain a helpful, knowledgeable tone focused on enhancing the user's knowledge management experience
|
||||
16. Frame responses as collaborative assistance rather than authoritative instruction
|
||||
17. Instead of telling a user on what Notes they have, provide them an overview of the notes and include the title of the notes when providing the overview.
|
||||
17. Instead of telling a user on what Notes they have, summarize the notes and include the title of the notes when providing a summary.
|
||||
18.
|
Loading…
x
Reference in New Issue
Block a user