From 84a8473beb95c36a7266df9348d9b03ca19932c2 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Mon, 17 Mar 2025 21:47:11 +0000 Subject: [PATCH] adapt or regenerate embeddings - allows users to decide --- .../type_widgets/options/ai_settings.ts | 17 ++- src/routes/api/options.ts | 3 +- src/services/llm/embeddings/storage.ts | 103 +++++++++++------- src/services/llm/embeddings/vector_utils.ts | 50 ++++++++- src/services/llm/index_service.ts | 21 ++++ src/services/options_init.ts | 3 +- src/services/options_interface.ts | 1 + 7 files changed, 154 insertions(+), 44 deletions(-) diff --git a/src/public/app/widgets/type_widgets/options/ai_settings.ts b/src/public/app/widgets/type_widgets/options/ai_settings.ts index 57049dcc6..b48dc28a1 100644 --- a/src/public/app/widgets/type_widgets/options/ai_settings.ts +++ b/src/public/app/widgets/type_widgets/options/ai_settings.ts @@ -340,6 +340,15 @@ export default class AiSettingsWidget extends OptionsWidget {
${t("ai_llm.embedding_default_provider_description")}
+
+ + +
${t("ai_llm.embedding_dimension_strategy_description") || "Choose how to handle different embedding dimensions between providers. 'Adapt' is faster but less accurate, 'Regenerate' is more accurate but requires API calls."}
+
+
@@ -812,6 +821,11 @@ export default class AiSettingsWidget extends OptionsWidget { await this.displayValidationWarnings(); }); + const $embeddingDimensionStrategy = this.$widget.find('.embedding-dimension-strategy'); + $embeddingDimensionStrategy.on('change', async () => { + await this.updateOption('embeddingDimensionStrategy', $embeddingDimensionStrategy.val() as string); + }); + const $embeddingProviderPrecedence = this.$widget.find('.embedding-provider-precedence'); $embeddingProviderPrecedence.on('change', async () => { await this.updateOption('embeddingProviderPrecedence', $embeddingProviderPrecedence.val() as string); @@ -1151,7 +1165,8 @@ export default class AiSettingsWidget extends OptionsWidget { this.$widget.find('.embedding-similarity-threshold').val(options.embeddingSimilarityThreshold || '0.65'); this.$widget.find('.max-notes-per-llm-query').val(options.maxNotesPerLlmQuery || '10'); this.$widget.find('.embedding-default-provider').val(options.embeddingsDefaultProvider || 'openai'); - this.$widget.find('.embedding-provider-precedence').val(options.embeddingProviderPrecedence || 'openai,ollama,anthropic'); + this.$widget.find('.embedding-provider-precedence').val(options.embeddingProviderPrecedence || 'openai,ollama'); + this.$widget.find('.embedding-dimension-strategy').val(options.embeddingDimensionStrategy || 'adapt'); this.$widget.find('.embedding-generation-location').val(options.embeddingGenerationLocation || 'client'); this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10'); this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000'); diff --git a/src/routes/api/options.ts b/src/routes/api/options.ts index 9ecee8ce1..0a061c857 100644 --- a/src/routes/api/options.ts +++ b/src/routes/api/options.ts @@ -106,7 +106,8 @@ const ALLOWED_OPTIONS = new Set([ "embeddingSimilarityThreshold", "maxNotesPerLlmQuery", "enableAutomaticIndexing", - "embeddingGenerationLocation" + "embeddingGenerationLocation", + "embeddingDimensionStrategy" ]); function getOptions() { diff --git a/src/services/llm/embeddings/storage.ts b/src/services/llm/embeddings/storage.ts index db939990c..a154dfa13 100644 --- a/src/services/llm/embeddings/storage.ts +++ b/src/services/llm/embeddings/storage.ts @@ -165,18 +165,25 @@ export async function findSimilarNotes( log.info(`Available embeddings: ${JSON.stringify(availableEmbeddings.map(e => ({ providerId: e.providerId, modelId: e.modelId, - count: e.count + count: e.count, + dimension: e.dimension })))}`); // Import the AIServiceManager to get provider precedence const { default: aiManager } = await import('../ai_service_manager.js'); + // Import vector utils for dimension adaptation + const { adaptEmbeddingDimensions } = await import('./vector_utils.js'); + + // Get user dimension strategy preference + const options = (await import('../../options.js')).default; + const dimensionStrategy = await options.getOption('embeddingDimensionStrategy') || 'adapt'; + log.info(`Using embedding dimension strategy: ${dimensionStrategy}`); + // Get providers in user-defined precedence order - // This uses the internal providerOrder property that's set from user preferences const availableProviderIds = availableEmbeddings.map(e => e.providerId); // Get dedicated embedding provider precedence from options - const options = (await import('../../options.js')).default; let preferredProviders: string[] = []; const embeddingPrecedence = await options.getOption('embeddingProviderPrecedence'); @@ -215,53 +222,54 @@ export async function findSimilarNotes( const providerEmbeddings = availableEmbeddings.filter(e => e.providerId === provider); if (providerEmbeddings.length > 0) { - // Find models that match the current embedding's dimensions - const dimensionMatchingModels = providerEmbeddings.filter(e => e.dimension === embedding.length); + // Use the model with the most embeddings + const bestModel = providerEmbeddings.sort((a, b) => b.count - a.count)[0]; + log.info(`Found fallback provider: ${provider}, model: ${bestModel.modelId}, dimension: ${bestModel.dimension}`); - // If we have models with matching dimensions, use the one with most embeddings - if (dimensionMatchingModels.length > 0) { - const bestModel = dimensionMatchingModels.sort((a, b) => b.count - a.count)[0]; - log.info(`Found fallback provider with matching dimensions (${embedding.length}): ${provider}, model: ${bestModel.modelId}`); + if (dimensionStrategy === 'adapt') { + // Dimension adaptation strategy (simple truncation/padding) + const adaptedEmbedding = adaptEmbeddingDimensions(embedding, bestModel.dimension); + log.info(`Adapted query embedding from dimension ${embedding.length} to ${adaptedEmbedding.length}`); - // Recursive call with the new provider/model, but disable further fallbacks + // Use the adapted embedding with the fallback provider return findSimilarNotes( - embedding, + adaptedEmbedding, provider, bestModel.modelId, limit, threshold, false // Prevent infinite recursion ); - } else { - // We need to regenerate embeddings with the new provider - log.info(`No models with matching dimensions found for ${provider}. Available models: ${JSON.stringify( - providerEmbeddings.map(e => ({ model: e.modelId, dimension: e.dimension })) - )}`); - + } + else if (dimensionStrategy === 'regenerate') { + // Regeneration strategy (regenerate embedding with fallback provider) try { // Import provider manager to get a provider instance const { default: providerManager } = await import('./providers.js'); const providerInstance = providerManager.getEmbeddingProvider(provider); if (providerInstance) { - // Use the model with the most embeddings - const bestModel = providerEmbeddings.sort((a, b) => b.count - a.count)[0]; - // Configure the model by setting it in the config - try { - // Access the config safely through the getConfig method + // Try to get the original query text + // This is a challenge - ideally we would have the original query + // For now, we'll use a global cache to store recent queries + interface CustomGlobal { + recentEmbeddingQueries?: Record; + } + const globalWithCache = global as unknown as CustomGlobal; + const recentQueries = globalWithCache.recentEmbeddingQueries || {}; + const embeddingKey = embedding.toString().substring(0, 100); + const originalQuery = recentQueries[embeddingKey]; + + if (originalQuery) { + log.info(`Found original query "${originalQuery}" for regeneration with ${provider}`); + + // Configure the model const config = providerInstance.getConfig(); config.model = bestModel.modelId; - log.info(`Trying to convert query to ${provider}/${bestModel.modelId} embedding format (dimension: ${bestModel.dimension})`); - - // Get the original query from the embedding cache if possible, or use a placeholder - // This is a hack - ideally we'd pass the query text through the whole chain - const originalQuery = "query"; // This is a placeholder, we'd need the original query text - // Generate a new embedding with the fallback provider const newEmbedding = await providerInstance.generateEmbeddings(originalQuery); - - log.info(`Successfully generated new embedding with provider ${provider}/${bestModel.modelId} (dimension: ${newEmbedding.length})`); + log.info(`Successfully regenerated embedding with provider ${provider}/${bestModel.modelId} (dimension: ${newEmbedding.length})`); // Now try finding similar notes with the new embedding return findSimilarNotes( @@ -272,18 +280,38 @@ export async function findSimilarNotes( threshold, false // Prevent infinite recursion ); - } catch (configErr: any) { - log.error(`Error configuring provider ${provider}: ${configErr.message}`); + } else { + log.info(`Original query not found for regeneration, falling back to adaptation`); + // Fall back to adaptation if we can't find the original query + const adaptedEmbedding = adaptEmbeddingDimensions(embedding, bestModel.dimension); + return findSimilarNotes( + adaptedEmbedding, + provider, + bestModel.modelId, + limit, + threshold, + false + ); } } } catch (err: any) { - log.error(`Error converting embedding format: ${err.message}`); + log.error(`Error regenerating embedding: ${err.message}`); + // Fall back to adaptation on error + const adaptedEmbedding = adaptEmbeddingDimensions(embedding, bestModel.dimension); + return findSimilarNotes( + adaptedEmbedding, + provider, + bestModel.modelId, + limit, + threshold, + false + ); } } } } - log.error(`No suitable fallback providers found with compatible dimensions. Current embedding dimension: ${embedding.length}`); + log.error(`No suitable fallback providers found. Current embedding dimension: ${embedding.length}`); log.info(`Available embeddings: ${JSON.stringify(availableEmbeddings.map(e => ({ providerId: e.providerId, modelId: e.modelId, @@ -307,13 +335,8 @@ export async function findSimilarNotes( const rowData = row as any; const rowEmbedding = bufferToEmbedding(rowData.embedding, rowData.dimension); - // Check if dimensions match before calculating similarity - if (rowEmbedding.length !== embedding.length) { - log.info(`Skipping embedding ${rowData.embedId} - dimension mismatch: ${rowEmbedding.length} vs ${embedding.length}`); - continue; - } - try { + // cosineSimilarity will automatically adapt dimensions if needed const similarity = cosineSimilarity(embedding, rowEmbedding); similarities.push({ noteId: rowData.noteId, diff --git a/src/services/llm/embeddings/vector_utils.ts b/src/services/llm/embeddings/vector_utils.ts index a1f3e7bd7..40c56c7dc 100644 --- a/src/services/llm/embeddings/vector_utils.ts +++ b/src/services/llm/embeddings/vector_utils.ts @@ -1,9 +1,11 @@ /** * Computes the cosine similarity between two vectors + * If dimensions don't match, automatically adapts the first vector to match the second */ export function cosineSimilarity(a: Float32Array, b: Float32Array): number { + // If dimensions don't match, adapt 'a' to match 'b' if (a.length !== b.length) { - throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`); + a = adaptEmbeddingDimensions(a, b.length); } let dotProduct = 0; @@ -26,6 +28,52 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number { return dotProduct / (aMagnitude * bMagnitude); } +/** + * Adapts an embedding to match target dimensions + * Uses a simple truncation (if source is larger) or zero-padding (if source is smaller) + * + * @param sourceEmbedding The original embedding + * @param targetDimension The desired dimension + * @returns A new embedding with the target dimensions + */ +export function adaptEmbeddingDimensions(sourceEmbedding: Float32Array, targetDimension: number): Float32Array { + const sourceDimension = sourceEmbedding.length; + + // If dimensions already match, return the original + if (sourceDimension === targetDimension) { + return sourceEmbedding; + } + + // Create a new embedding with target dimensions + const adaptedEmbedding = new Float32Array(targetDimension); + + if (sourceDimension < targetDimension) { + // If source is smaller, copy all values and pad with zeros + adaptedEmbedding.set(sourceEmbedding); + // Rest of the array is already initialized to zeros + } else { + // If source is larger, truncate to target dimension + for (let i = 0; i < targetDimension; i++) { + adaptedEmbedding[i] = sourceEmbedding[i]; + } + } + + // Normalize the adapted embedding to maintain unit length + let magnitude = 0; + for (let i = 0; i < targetDimension; i++) { + magnitude += adaptedEmbedding[i] * adaptedEmbedding[i]; + } + + magnitude = Math.sqrt(magnitude); + if (magnitude > 0) { + for (let i = 0; i < targetDimension; i++) { + adaptedEmbedding[i] /= magnitude; + } + } + + return adaptedEmbedding; +} + /** * Converts embedding Float32Array to Buffer for storage in SQLite */ diff --git a/src/services/llm/index_service.ts b/src/services/llm/index_service.ts index 34c055fdd..ef6a4f64e 100644 --- a/src/services/llm/index_service.ts +++ b/src/services/llm/index_service.ts @@ -543,6 +543,27 @@ class IndexService { const embedding = await provider.generateEmbeddings(query); log.info(`Generated embedding for query: "${query}" (${embedding.length} dimensions)`); + // Store query text in a global cache for possible regeneration with different providers + // Use a type declaration to avoid TypeScript errors + interface CustomGlobal { + recentEmbeddingQueries?: Record; + } + const globalWithCache = global as unknown as CustomGlobal; + + if (!globalWithCache.recentEmbeddingQueries) { + globalWithCache.recentEmbeddingQueries = {}; + } + + // Use a substring of the embedding as a key (full embedding is too large) + const embeddingKey = embedding.toString().substring(0, 100); + globalWithCache.recentEmbeddingQueries[embeddingKey] = query; + + // Limit cache size to prevent memory leaks (keep max 50 recent queries) + const keys = Object.keys(globalWithCache.recentEmbeddingQueries); + if (keys.length > 50) { + delete globalWithCache.recentEmbeddingQueries[keys[0]]; + } + // Get Note IDs to search, optionally filtered by branch let similarNotes = []; diff --git a/src/services/options_init.ts b/src/services/options_init.ts index 0f11c9a90..ccc529bae 100644 --- a/src/services/options_init.ts +++ b/src/services/options_init.ts @@ -189,7 +189,8 @@ const defaultOptions: DefaultOption[] = [ { name: "aiSystemPrompt", value: "", isSynced: true }, { name: "aiProviderPrecedence", value: "openai,anthropic,ollama", isSynced: true }, { name: "embeddingsDefaultProvider", value: "openai", isSynced: true }, - { name: "embeddingProviderPrecedence", value: "openai,ollama,anthropic", isSynced: true }, + { name: "embeddingProviderPrecedence", value: "openai,ollama", isSynced: true }, + { name: "embeddingDimensionStrategy", value: "adapt", isSynced: true }, { name: "enableAutomaticIndexing", value: "true", isSynced: true }, { name: "embeddingSimilarityThreshold", value: "0.65", isSynced: true }, { name: "maxNotesPerLlmQuery", value: "10", isSynced: true }, diff --git a/src/services/options_interface.ts b/src/services/options_interface.ts index 857d4b765..38f0778d9 100644 --- a/src/services/options_interface.ts +++ b/src/services/options_interface.ts @@ -77,6 +77,7 @@ export interface OptionDefinitions extends KeyboardShortcutsOptions