more heavily weigh notes with title matches when giving context to LLM

2025-10-15 18:56:07 +08:00 · 2025-03-26 23:05:16 +00:00 · 2025-03-26 23:05:16 +00:00 · a7cafceac9
commit a7cafceac9
parent 35fbc731a7
3 changed files with 77 additions and 2 deletions
--- a/src/services/llm/context/modules/provider_manager.ts
+++ b/src/services/llm/context/modules/provider_manager.ts
@ -87,7 +87,22 @@ export class ProviderManager {
                log.error('No embedding provider available');
                return null;
            }
-            return await provider.generateEmbeddings(query);
+
            // Generate the embedding
            const embedding = await provider.generateEmbeddings(query);
            if (embedding) {
                // Add the original query as a property to the embedding
                // This is used for title matching in the vector search
                Object.defineProperty(embedding, 'originalQuery', {
                    value: query,
                    writable: false,
                    enumerable: true,
                    configurable: false
                });
            }
            return embedding;
        } catch (error) {
            log.error(`Error generating query embedding: ${error}`);
            return null;
--- a/src/services/llm/embeddings/storage.ts
+++ b/src/services/llm/embeddings/storage.ts
@ -333,6 +333,12 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
    const similarities = [];
    try {
        // Try to extract the original query text if it was added to the metadata
        // This will help us determine title matches
        const queryText = queryEmbedding.hasOwnProperty('originalQuery')
            ? (queryEmbedding as any).originalQuery
            : '';
        for (const e of embeddings) {
            const embVector = bufferToEmbedding(e.embedding, e.dimension);
@ -351,7 +357,7 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
            const isCrossModel = e.providerId !== e.queryProviderId || e.modelId !== e.queryModelId;
            // Calculate similarity with content-aware parameters
-            const similarity = enhancedCosineSimilarity(
+            let similarity = enhancedCosineSimilarity(
                queryEmbedding,
                embVector,
                true, // normalize vectors to ensure consistent comparison
@ -361,6 +367,51 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
                performanceProfile
            );
            // Apply title match bonus if we have both a query and title
            if (queryText && e.title) {
                const titleLower = e.title.toLowerCase();
                const queryLower = queryText.toLowerCase();
                // Check for exact title match (case insensitive)
                if (titleLower === queryLower) {
                    // Add a large bonus for exact title match
                    similarity += 0.3;
                    log.info(`Added 0.3 exact title match bonus for note "${e.title}" (${e.noteId})`);
                }
                // Check for title containing the entire query as a substring
                else if (titleLower.includes(queryLower)) {
                    // Add a significant bonus for title containing the whole query
                    similarity += 0.2;
                    log.info(`Added 0.2 title contains query bonus for note "${e.title}" (${e.noteId})`);
                }
                // Check for query terms appearing in the title
                else {
                    // Split query into terms and check if title contains them
                    const queryTerms = queryLower.split(/\s+/).filter((term: string) => term.length > 2);
                    let matchCount = 0;
                    for (const term of queryTerms) {
                        if (titleLower.includes(term)) {
                            matchCount++;
                        }
                    }
                    if (matchCount > 0 && queryTerms.length > 0) {
                        // Calculate proportion of matching terms and apply a scaled bonus
                        const matchProportion = matchCount / queryTerms.length;
                        const bonus = 0.1 * matchProportion;
                        similarity += bonus;
                        if (bonus >= 0.05) {
                            log.info(`Added ${bonus.toFixed(2)} partial title match bonus for note "${e.title}" (${e.noteId})`);
                        }
                    }
                }
                // Cap similarity at 1.0 to maintain expected range
                similarity = Math.min(similarity, 1.0);
            }
            if (similarity >= threshold) {
                similarities.push({
                    noteId: e.noteId,
--- a/src/services/llm/index_service.ts
+++ b/src/services/llm/index_service.ts
@ -544,6 +544,15 @@ class IndexService {
            const embedding = await provider.generateEmbeddings(query);
            log.info(`Generated embedding for query: "${query}" (${embedding.length} dimensions)`);
            // Add the original query as a property to the embedding
            // This is used for title matching in the vector search
            Object.defineProperty(embedding, 'originalQuery', {
                value: query,
                writable: false,
                enumerable: true,
                configurable: false
            });
            // Store query text in a global cache for possible regeneration with different providers
            // Use a type declaration to avoid TypeScript errors
            interface CustomGlobal {