mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-10-15 02:31:36 +08:00
more heavily weigh notes with title matches when giving context to LLM
This commit is contained in:
parent
35fbc731a7
commit
a7cafceac9
@ -87,7 +87,22 @@ export class ProviderManager {
|
||||
log.error('No embedding provider available');
|
||||
return null;
|
||||
}
|
||||
return await provider.generateEmbeddings(query);
|
||||
|
||||
// Generate the embedding
|
||||
const embedding = await provider.generateEmbeddings(query);
|
||||
|
||||
if (embedding) {
|
||||
// Add the original query as a property to the embedding
|
||||
// This is used for title matching in the vector search
|
||||
Object.defineProperty(embedding, 'originalQuery', {
|
||||
value: query,
|
||||
writable: false,
|
||||
enumerable: true,
|
||||
configurable: false
|
||||
});
|
||||
}
|
||||
|
||||
return embedding;
|
||||
} catch (error) {
|
||||
log.error(`Error generating query embedding: ${error}`);
|
||||
return null;
|
||||
|
@ -333,6 +333,12 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
|
||||
const similarities = [];
|
||||
|
||||
try {
|
||||
// Try to extract the original query text if it was added to the metadata
|
||||
// This will help us determine title matches
|
||||
const queryText = queryEmbedding.hasOwnProperty('originalQuery')
|
||||
? (queryEmbedding as any).originalQuery
|
||||
: '';
|
||||
|
||||
for (const e of embeddings) {
|
||||
const embVector = bufferToEmbedding(e.embedding, e.dimension);
|
||||
|
||||
@ -351,7 +357,7 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
|
||||
const isCrossModel = e.providerId !== e.queryProviderId || e.modelId !== e.queryModelId;
|
||||
|
||||
// Calculate similarity with content-aware parameters
|
||||
const similarity = enhancedCosineSimilarity(
|
||||
let similarity = enhancedCosineSimilarity(
|
||||
queryEmbedding,
|
||||
embVector,
|
||||
true, // normalize vectors to ensure consistent comparison
|
||||
@ -361,6 +367,51 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
|
||||
performanceProfile
|
||||
);
|
||||
|
||||
// Apply title match bonus if we have both a query and title
|
||||
if (queryText && e.title) {
|
||||
const titleLower = e.title.toLowerCase();
|
||||
const queryLower = queryText.toLowerCase();
|
||||
|
||||
// Check for exact title match (case insensitive)
|
||||
if (titleLower === queryLower) {
|
||||
// Add a large bonus for exact title match
|
||||
similarity += 0.3;
|
||||
log.info(`Added 0.3 exact title match bonus for note "${e.title}" (${e.noteId})`);
|
||||
}
|
||||
// Check for title containing the entire query as a substring
|
||||
else if (titleLower.includes(queryLower)) {
|
||||
// Add a significant bonus for title containing the whole query
|
||||
similarity += 0.2;
|
||||
log.info(`Added 0.2 title contains query bonus for note "${e.title}" (${e.noteId})`);
|
||||
}
|
||||
// Check for query terms appearing in the title
|
||||
else {
|
||||
// Split query into terms and check if title contains them
|
||||
const queryTerms = queryLower.split(/\s+/).filter((term: string) => term.length > 2);
|
||||
let matchCount = 0;
|
||||
|
||||
for (const term of queryTerms) {
|
||||
if (titleLower.includes(term)) {
|
||||
matchCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (matchCount > 0 && queryTerms.length > 0) {
|
||||
// Calculate proportion of matching terms and apply a scaled bonus
|
||||
const matchProportion = matchCount / queryTerms.length;
|
||||
const bonus = 0.1 * matchProportion;
|
||||
similarity += bonus;
|
||||
|
||||
if (bonus >= 0.05) {
|
||||
log.info(`Added ${bonus.toFixed(2)} partial title match bonus for note "${e.title}" (${e.noteId})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cap similarity at 1.0 to maintain expected range
|
||||
similarity = Math.min(similarity, 1.0);
|
||||
}
|
||||
|
||||
if (similarity >= threshold) {
|
||||
similarities.push({
|
||||
noteId: e.noteId,
|
||||
|
@ -544,6 +544,15 @@ class IndexService {
|
||||
const embedding = await provider.generateEmbeddings(query);
|
||||
log.info(`Generated embedding for query: "${query}" (${embedding.length} dimensions)`);
|
||||
|
||||
// Add the original query as a property to the embedding
|
||||
// This is used for title matching in the vector search
|
||||
Object.defineProperty(embedding, 'originalQuery', {
|
||||
value: query,
|
||||
writable: false,
|
||||
enumerable: true,
|
||||
configurable: false
|
||||
});
|
||||
|
||||
// Store query text in a global cache for possible regeneration with different providers
|
||||
// Use a type declaration to avoid TypeScript errors
|
||||
interface CustomGlobal {
|
||||
|
Loading…
x
Reference in New Issue
Block a user