diff --git a/src/services/llm/base_ai_service.ts b/src/services/llm/base_ai_service.ts index 080dd1fa5..3a02d0fc7 100644 --- a/src/services/llm/base_ai_service.ts +++ b/src/services/llm/base_ai_service.ts @@ -11,7 +11,7 @@ export abstract class BaseAIService implements AIService { abstract generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise; isAvailable(): boolean { - return options.getOption('aiEnabled') === 'true'; // Base check if AI is enabled globally + return options.getOptionBool('aiEnabled'); // Base check if AI is enabled globally } getName(): string { diff --git a/src/services/llm/embeddings/providers.ts b/src/services/llm/embeddings/providers.ts new file mode 100644 index 000000000..34d8c8754 --- /dev/null +++ b/src/services/llm/embeddings/providers.ts @@ -0,0 +1,299 @@ +import options from "../../options.js"; +import log from "../../log.js"; +import sql from "../../sql.js"; +import dateUtils from "../../date_utils.js"; +import { randomString } from "../../utils.js"; +import type { EmbeddingProvider, EmbeddingConfig } from "./embeddings_interface.js"; +import { OpenAIEmbeddingProvider } from "./providers/openai.js"; +import { OllamaEmbeddingProvider } from "./providers/ollama.js"; +import { AnthropicEmbeddingProvider } from "./providers/anthropic.js"; +import { LocalEmbeddingProvider } from "./providers/local.js"; + +const providers = new Map(); + +/** + * Register a new embedding provider + */ +export function registerEmbeddingProvider(provider: EmbeddingProvider) { + providers.set(provider.name, provider); + log.info(`Registered embedding provider: ${provider.name}`); +} + +/** + * Get all registered embedding providers + */ +export function getEmbeddingProviders(): EmbeddingProvider[] { + return Array.from(providers.values()); +} + +/** + * Get a specific embedding provider by name + */ +export function getEmbeddingProvider(name: string): EmbeddingProvider | undefined { + return providers.get(name); +} + +/** + * Get all enabled embedding providers + */ +export async function getEnabledEmbeddingProviders(): Promise { + if (!(await options.getOptionBool('aiEnabled'))) { + return []; + } + + // Get enabled providers from database + const enabledProviders = await sql.getRows(` + SELECT providerId, name, config + FROM embedding_providers + WHERE isEnabled = 1 + ORDER BY priority DESC` + ); + + const result: EmbeddingProvider[] = []; + + for (const row of enabledProviders) { + const rowData = row as any; + const provider = providers.get(rowData.name); + + if (provider) { + result.push(provider); + } else { + // Use error instead of warn if warn is not available + log.error(`Enabled embedding provider ${rowData.name} not found in registered providers`); + } + } + + return result; +} + +/** + * Create a new embedding provider configuration in the database + */ +export async function createEmbeddingProviderConfig( + name: string, + config: EmbeddingConfig, + isEnabled = false, + priority = 0 +): Promise { + const providerId = randomString(16); + const now = dateUtils.localNowDateTime(); + const utcNow = dateUtils.utcNowDateTime(); + + await sql.execute(` + INSERT INTO embedding_providers + (providerId, name, isEnabled, priority, config, + dateCreated, utcDateCreated, dateModified, utcDateModified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [providerId, name, isEnabled ? 1 : 0, priority, JSON.stringify(config), + now, utcNow, now, utcNow] + ); + + return providerId; +} + +/** + * Update an existing embedding provider configuration + */ +export async function updateEmbeddingProviderConfig( + providerId: string, + isEnabled?: boolean, + priority?: number, + config?: EmbeddingConfig +): Promise { + const now = dateUtils.localNowDateTime(); + const utcNow = dateUtils.utcNowDateTime(); + + // Get existing provider + const provider = await sql.getRow( + "SELECT * FROM embedding_providers WHERE providerId = ?", + [providerId] + ); + + if (!provider) { + return false; + } + + // Build update query parts + const updates = []; + const params: any[] = []; + + if (isEnabled !== undefined) { + updates.push("isEnabled = ?"); + params.push(isEnabled ? 1 : 0); + } + + if (priority !== undefined) { + updates.push("priority = ?"); + params.push(priority); + } + + if (config) { + updates.push("config = ?"); + params.push(JSON.stringify(config)); + } + + if (updates.length === 0) { + return true; // Nothing to update + } + + updates.push("dateModified = ?"); + updates.push("utcDateModified = ?"); + params.push(now, utcNow); + + params.push(providerId); + + // Execute update + await sql.execute( + `UPDATE embedding_providers SET ${updates.join(", ")} WHERE providerId = ?`, + params + ); + + return true; +} + +/** + * Delete an embedding provider configuration + */ +export async function deleteEmbeddingProviderConfig(providerId: string): Promise { + const result = await sql.execute( + "DELETE FROM embedding_providers WHERE providerId = ?", + [providerId] + ); + + return result.changes > 0; +} + +/** + * Get all embedding provider configurations from the database + */ +export async function getEmbeddingProviderConfigs() { + return await sql.getRows("SELECT * FROM embedding_providers ORDER BY priority DESC"); +} + +/** + * Initialize the default embedding providers + */ +export async function initializeDefaultProviders() { + // Register built-in providers + try { + // Register OpenAI provider if API key is configured + const openaiApiKey = await options.getOption('openaiApiKey'); + if (openaiApiKey) { + const openaiModel = await options.getOption('openaiDefaultModel') || 'text-embedding-3-small'; + const openaiBaseUrl = await options.getOption('openaiBaseUrl') || 'https://api.openai.com/v1'; + + registerEmbeddingProvider(new OpenAIEmbeddingProvider({ + model: openaiModel, + dimension: 1536, // OpenAI's typical dimension + type: 'float32', + apiKey: openaiApiKey, + baseUrl: openaiBaseUrl + })); + + // Create OpenAI provider config if it doesn't exist + const existingOpenAI = await sql.getRow( + "SELECT * FROM embedding_providers WHERE name = ?", + ['openai'] + ); + + if (!existingOpenAI) { + await createEmbeddingProviderConfig('openai', { + model: openaiModel, + dimension: 1536, + type: 'float32' + }, true, 100); + } + } + + // Register Anthropic provider if API key is configured + const anthropicApiKey = await options.getOption('anthropicApiKey'); + if (anthropicApiKey) { + const anthropicModel = await options.getOption('anthropicDefaultModel') || 'claude-3-haiku-20240307'; + const anthropicBaseUrl = await options.getOption('anthropicBaseUrl') || 'https://api.anthropic.com/v1'; + + registerEmbeddingProvider(new AnthropicEmbeddingProvider({ + model: anthropicModel, + dimension: 1024, // Anthropic's embedding dimension + type: 'float32', + apiKey: anthropicApiKey, + baseUrl: anthropicBaseUrl + })); + + // Create Anthropic provider config if it doesn't exist + const existingAnthropic = await sql.getRow( + "SELECT * FROM embedding_providers WHERE name = ?", + ['anthropic'] + ); + + if (!existingAnthropic) { + await createEmbeddingProviderConfig('anthropic', { + model: anthropicModel, + dimension: 1024, + type: 'float32' + }, true, 75); + } + } + + // Register Ollama provider if enabled + if (await options.getOptionBool('ollamaEnabled')) { + const ollamaModel = await options.getOption('ollamaDefaultModel') || 'llama3'; + const ollamaBaseUrl = await options.getOption('ollamaBaseUrl') || 'http://localhost:11434'; + + registerEmbeddingProvider(new OllamaEmbeddingProvider({ + model: ollamaModel, + dimension: 4096, // Typical for Ollama models + type: 'float32', + baseUrl: ollamaBaseUrl + })); + + // Create Ollama provider config if it doesn't exist + const existingOllama = await sql.getRow( + "SELECT * FROM embedding_providers WHERE name = ?", + ['ollama'] + ); + + if (!existingOllama) { + await createEmbeddingProviderConfig('ollama', { + model: ollamaModel, + dimension: 4096, + type: 'float32' + }, true, 50); + } + } + + // Always register local provider as fallback + registerEmbeddingProvider(new LocalEmbeddingProvider({ + model: 'local', + dimension: 384, + type: 'float32' + })); + + // Create local provider config if it doesn't exist + const existingLocal = await sql.getRow( + "SELECT * FROM embedding_providers WHERE name = ?", + ['local'] + ); + + if (!existingLocal) { + await createEmbeddingProviderConfig('local', { + model: 'local', + dimension: 384, + type: 'float32' + }, true, 10); + } + } catch (error: any) { + log.error(`Error initializing default embedding providers: ${error.message || 'Unknown error'}`); + } +} + +export default { + registerEmbeddingProvider, + getEmbeddingProviders, + getEmbeddingProvider, + getEnabledEmbeddingProviders, + createEmbeddingProviderConfig, + updateEmbeddingProviderConfig, + deleteEmbeddingProviderConfig, + getEmbeddingProviderConfigs, + initializeDefaultProviders +}; diff --git a/src/services/llm/embeddings/providers/anthropic.ts b/src/services/llm/embeddings/providers/anthropic.ts new file mode 100644 index 000000000..3c5156f54 --- /dev/null +++ b/src/services/llm/embeddings/providers/anthropic.ts @@ -0,0 +1,78 @@ +import { BaseEmbeddingProvider } from "../base_embeddings.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; +import axios from "axios"; +import log from "../../../log.js"; + +interface AnthropicEmbeddingConfig extends EmbeddingConfig { + apiKey: string; + baseUrl: string; +} + +/** + * Anthropic (Claude) embedding provider implementation + */ +export class AnthropicEmbeddingProvider extends BaseEmbeddingProvider { + name = "anthropic"; + private apiKey: string; + private baseUrl: string; + + constructor(config: AnthropicEmbeddingConfig) { + super(config); + this.apiKey = config.apiKey; + this.baseUrl = config.baseUrl; + } + + /** + * Generate embeddings for a single text + */ + async generateEmbeddings(text: string): Promise { + try { + const response = await axios.post( + `${this.baseUrl}/embeddings`, + { + model: this.config.model || "claude-3-haiku-20240307", + input: text, + encoding_format: "float" + }, + { + headers: { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + "anthropic-version": "2023-06-01" + } + } + ); + + if (response.data && response.data.embedding) { + const embedding = response.data.embedding; + return new Float32Array(embedding); + } else { + throw new Error("Unexpected response structure from Anthropic API"); + } + } catch (error: any) { + const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + log.error(`Anthropic embedding error: ${errorMessage}`); + throw new Error(`Anthropic embedding error: ${errorMessage}`); + } + } + + /** + * Generate embeddings for multiple texts in a single batch + * + * Note: Anthropic doesn't currently support batch embedding, so we process each text individually + */ + async generateBatchEmbeddings(texts: string[]): Promise { + if (texts.length === 0) { + return []; + } + + const results: Float32Array[] = []; + + for (const text of texts) { + const embedding = await this.generateEmbeddings(text); + results.push(embedding); + } + + return results; + } +} diff --git a/src/services/llm/embeddings/providers/local.ts b/src/services/llm/embeddings/providers/local.ts new file mode 100644 index 000000000..1477af40e --- /dev/null +++ b/src/services/llm/embeddings/providers/local.ts @@ -0,0 +1,73 @@ +import { BaseEmbeddingProvider } from "../base_embeddings.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; +import crypto from "crypto"; + +/** + * Local embedding provider implementation + * + * This is a fallback provider that generates simple deterministic embeddings + * using cryptographic hashing. These are not semantic vectors but can be used + * for exact matches when no other providers are available. + */ +export class LocalEmbeddingProvider extends BaseEmbeddingProvider { + name = "local"; + + constructor(config: EmbeddingConfig) { + super(config); + } + + /** + * Generate a simple embedding by hashing the text + */ + async generateEmbeddings(text: string): Promise { + const dimension = this.config.dimension || 384; + const result = new Float32Array(dimension); + + // Generate a hash of the input text + const hash = crypto.createHash('sha256').update(text).digest(); + + // Use the hash to seed a deterministic PRNG + let seed = 0; + for (let i = 0; i < hash.length; i += 4) { + seed = (seed * 65536 + hash.readUInt32LE(i % (hash.length - 3))) >>> 0; + } + + // Generate pseudo-random but deterministic values for the embedding + for (let i = 0; i < dimension; i++) { + // Generate next pseudo-random number + seed = (seed * 1664525 + 1013904223) >>> 0; + + // Convert to a float between -1 and 1 + result[i] = (seed / 2147483648) - 1; + } + + // Normalize the vector + let magnitude = 0; + for (let i = 0; i < dimension; i++) { + magnitude += result[i] * result[i]; + } + + magnitude = Math.sqrt(magnitude); + if (magnitude > 0) { + for (let i = 0; i < dimension; i++) { + result[i] /= magnitude; + } + } + + return result; + } + + /** + * Generate embeddings for multiple texts + */ + async generateBatchEmbeddings(texts: string[]): Promise { + const results: Float32Array[] = []; + + for (const text of texts) { + const embedding = await this.generateEmbeddings(text); + results.push(embedding); + } + + return results; + } +} diff --git a/src/services/llm/embeddings/providers/ollama.ts b/src/services/llm/embeddings/providers/ollama.ts new file mode 100644 index 000000000..41a583a5d --- /dev/null +++ b/src/services/llm/embeddings/providers/ollama.ts @@ -0,0 +1,77 @@ +import { BaseEmbeddingProvider } from "../base_embeddings.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; +import axios from "axios"; +import log from "../../../log.js"; + +interface OllamaEmbeddingConfig extends EmbeddingConfig { + baseUrl: string; +} + +/** + * Ollama embedding provider implementation + */ +export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { + name = "ollama"; + private baseUrl: string; + + constructor(config: OllamaEmbeddingConfig) { + super(config); + this.baseUrl = config.baseUrl; + } + + /** + * Generate embeddings for a single text + */ + async generateEmbeddings(text: string): Promise { + try { + const response = await axios.post( + `${this.baseUrl}/api/embeddings`, + { + model: this.config.model || "llama3", + prompt: text + }, + { + headers: { + "Content-Type": "application/json" + } + } + ); + + if (response.data && Array.isArray(response.data.embedding)) { + return new Float32Array(response.data.embedding); + } else { + throw new Error("Unexpected response structure from Ollama API"); + } + } catch (error: any) { + const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + log.error(`Ollama embedding error: ${errorMessage}`); + throw new Error(`Ollama embedding error: ${errorMessage}`); + } + } + + /** + * Generate embeddings for multiple texts + * + * Note: Ollama API doesn't support batch embedding, so we process them sequentially + */ + async generateBatchEmbeddings(texts: string[]): Promise { + if (texts.length === 0) { + return []; + } + + const results: Float32Array[] = []; + + for (const text of texts) { + try { + const embedding = await this.generateEmbeddings(text); + results.push(embedding); + } catch (error: any) { + const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + log.error(`Ollama batch embedding error: ${errorMessage}`); + throw new Error(`Ollama batch embedding error: ${errorMessage}`); + } + } + + return results; + } +} diff --git a/src/services/llm/embeddings/providers/openai.ts b/src/services/llm/embeddings/providers/openai.ts new file mode 100644 index 000000000..0ad8ca51b --- /dev/null +++ b/src/services/llm/embeddings/providers/openai.ts @@ -0,0 +1,107 @@ +import { BaseEmbeddingProvider } from "../base_embeddings.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; +import axios from "axios"; +import log from "../../../log.js"; + +interface OpenAIEmbeddingConfig extends EmbeddingConfig { + apiKey: string; + baseUrl: string; +} + +/** + * OpenAI embedding provider implementation + */ +export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { + name = "openai"; + private apiKey: string; + private baseUrl: string; + + constructor(config: OpenAIEmbeddingConfig) { + super(config); + this.apiKey = config.apiKey; + this.baseUrl = config.baseUrl; + } + + /** + * Generate embeddings for a single text + */ + async generateEmbeddings(text: string): Promise { + try { + const response = await axios.post( + `${this.baseUrl}/embeddings`, + { + input: text, + model: this.config.model || "text-embedding-3-small", + encoding_format: "float" + }, + { + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + } + } + ); + + if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) { + const embedding = response.data.data[0].embedding; + return new Float32Array(embedding); + } else { + throw new Error("Unexpected response structure from OpenAI API"); + } + } catch (error: any) { + const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + log.error(`OpenAI embedding error: ${errorMessage}`); + throw new Error(`OpenAI embedding error: ${errorMessage}`); + } + } + + /** + * Generate embeddings for multiple texts in a single batch + */ + async generateBatchEmbeddings(texts: string[]): Promise { + if (texts.length === 0) { + return []; + } + + const batchSize = this.config.batchSize || 10; + const results: Float32Array[] = []; + + // Process in batches to avoid API limits + for (let i = 0; i < texts.length; i += batchSize) { + const batch = texts.slice(i, i + batchSize); + try { + const response = await axios.post( + `${this.baseUrl}/embeddings`, + { + input: batch, + model: this.config.model || "text-embedding-3-small", + encoding_format: "float" + }, + { + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${this.apiKey}` + } + } + ); + + if (response.data && response.data.data) { + // Sort the embeddings by index to ensure they match the input order + const sortedEmbeddings = response.data.data + .sort((a: any, b: any) => a.index - b.index) + .map((item: any) => new Float32Array(item.embedding)); + + results.push(...sortedEmbeddings); + } else { + throw new Error("Unexpected response structure from OpenAI API"); + } + } catch (error: any) { + const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error"; + log.error(`OpenAI batch embedding error: ${errorMessage}`); + throw new Error(`OpenAI batch embedding error: ${errorMessage}`); + } + } + + return results; + } +} diff --git a/src/services/llm/embeddings/vector_store.ts b/src/services/llm/embeddings/vector_store.ts new file mode 100644 index 000000000..067ff13f1 --- /dev/null +++ b/src/services/llm/embeddings/vector_store.ts @@ -0,0 +1,483 @@ +import sql from "../../sql.js"; +import { randomString } from "../../utils.js"; +import options from "../../options.js"; +import dateUtils from "../../date_utils.js"; +import log from "../../log.js"; +import becca from "../../../becca/becca.js"; +import type { NoteEmbeddingContext } from "./embeddings_interface.js"; +import { getEmbeddingProviders, getEnabledEmbeddingProviders } from "./providers.js"; + +// Type definition for embedding result +interface EmbeddingResult { + embedId: string; + noteId: string; + providerId: string; + modelId: string; + dimension: number; + embedding: Float32Array; + version: number; + dateCreated: string; + utcDateCreated: string; + dateModified: string; + utcDateModified: string; +} + +// Type for queue item +interface QueueItem { + noteId: string; + operation: string; + attempts: number; +} + +/** + * Computes the cosine similarity between two vectors + */ +export function cosineSimilarity(a: Float32Array, b: Float32Array): number { + if (a.length !== b.length) { + throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`); + } + + let dotProduct = 0; + let aMagnitude = 0; + let bMagnitude = 0; + + for (let i = 0; i < a.length; i++) { + dotProduct += a[i] * b[i]; + aMagnitude += a[i] * a[i]; + bMagnitude += b[i] * b[i]; + } + + aMagnitude = Math.sqrt(aMagnitude); + bMagnitude = Math.sqrt(bMagnitude); + + if (aMagnitude === 0 || bMagnitude === 0) { + return 0; + } + + return dotProduct / (aMagnitude * bMagnitude); +} + +/** + * Converts embedding Float32Array to Buffer for storage in SQLite + */ +export function embeddingToBuffer(embedding: Float32Array): Buffer { + return Buffer.from(embedding.buffer); +} + +/** + * Converts Buffer from SQLite back to Float32Array + */ +export function bufferToEmbedding(buffer: Buffer, dimension: number): Float32Array { + return new Float32Array(buffer.buffer, buffer.byteOffset, dimension); +} + +/** + * Creates or updates an embedding for a note + */ +export async function storeNoteEmbedding( + noteId: string, + providerId: string, + modelId: string, + embedding: Float32Array +): Promise { + const dimension = embedding.length; + const embeddingBlob = embeddingToBuffer(embedding); + const now = dateUtils.localNowDateTime(); + const utcNow = dateUtils.utcNowDateTime(); + + // Check if an embedding already exists for this note and provider/model + const existingEmbed = await getEmbeddingForNote(noteId, providerId, modelId); + + if (existingEmbed) { + // Update existing embedding + await sql.execute(` + UPDATE note_embeddings + SET embedding = ?, dimension = ?, version = version + 1, + dateModified = ?, utcDateModified = ? + WHERE embedId = ?`, + [embeddingBlob, dimension, now, utcNow, existingEmbed.embedId] + ); + return existingEmbed.embedId; + } else { + // Create new embedding + const embedId = randomString(16); + await sql.execute(` + INSERT INTO note_embeddings + (embedId, noteId, providerId, modelId, dimension, embedding, + dateCreated, utcDateCreated, dateModified, utcDateModified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [embedId, noteId, providerId, modelId, dimension, embeddingBlob, + now, utcNow, now, utcNow] + ); + return embedId; + } +} + +/** + * Retrieves embedding for a specific note + */ +export async function getEmbeddingForNote(noteId: string, providerId: string, modelId: string): Promise { + const row = await sql.getRow(` + SELECT embedId, noteId, providerId, modelId, dimension, embedding, version, + dateCreated, utcDateCreated, dateModified, utcDateModified + FROM note_embeddings + WHERE noteId = ? AND providerId = ? AND modelId = ?`, + [noteId, providerId, modelId] + ); + + if (!row) { + return null; + } + + // Need to cast row to any as it doesn't have type information + const rowData = row as any; + + return { + ...rowData, + embedding: bufferToEmbedding(rowData.embedding, rowData.dimension) + }; +} + +/** + * Finds similar notes based on vector similarity + */ +export async function findSimilarNotes( + embedding: Float32Array, + providerId: string, + modelId: string, + limit = 10, + threshold = 0.7 +): Promise<{noteId: string, similarity: number}[]> { + // Get all embeddings for the given provider and model + const rows = await sql.getRows(` + SELECT embedId, noteId, providerId, modelId, dimension, embedding + FROM note_embeddings + WHERE providerId = ? AND modelId = ?`, + [providerId, modelId] + ); + + if (!rows.length) { + return []; + } + + // Calculate similarity for each embedding + const similarities = rows.map(row => { + const rowData = row as any; + const rowEmbedding = bufferToEmbedding(rowData.embedding, rowData.dimension); + return { + noteId: rowData.noteId, + similarity: cosineSimilarity(embedding, rowEmbedding) + }; + }); + + // Filter by threshold and sort by similarity (descending) + return similarities + .filter(item => item.similarity >= threshold) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, limit); +} + +/** + * Gets context for a note to be embedded + */ +export async function getNoteEmbeddingContext(noteId: string): Promise { + const note = becca.getNote(noteId); + + if (!note) { + throw new Error(`Note ${noteId} not found`); + } + + // Get parent note titles + const parentNotes = note.getParentNotes(); + const parentTitles = parentNotes.map(note => note.title); + + // Get child note titles + const childNotes = note.getChildNotes(); + const childTitles = childNotes.map(note => note.title); + + // Get attributes + const attributes = note.getOwnedAttributes().map(attr => ({ + type: attr.type, + name: attr.name, + value: attr.value + })); + + // Get attachments + const attachments = note.getAttachments().map(att => ({ + title: att.title, + mime: att.mime + })); + + // Get content + let content = ""; + if (note.type === 'text') { + content = String(await note.getContent()); + } else if (note.type === 'code') { + content = String(await note.getContent()); + } else if (note.type === 'image' || note.type === 'file') { + content = `[${note.type} attachment: ${note.mime}]`; + } + + return { + noteId: note.noteId, + title: note.title, + content: content, + type: note.type, + mime: note.mime, + dateCreated: note.dateCreated || "", + dateModified: note.dateModified || "", + attributes, + parentTitles, + childTitles, + attachments + }; +} + +/** + * Queues a note for embedding update + */ +export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE') { + const now = dateUtils.localNowDateTime(); + const utcNow = dateUtils.utcNowDateTime(); + + // Check if note is already in queue + const existing = await sql.getValue( + "SELECT 1 FROM embedding_queue WHERE noteId = ?", + [noteId] + ); + + if (existing) { + // Update existing queue entry + await sql.execute(` + UPDATE embedding_queue + SET operation = ?, dateQueued = ?, utcDateQueued = ?, attempts = 0, error = NULL + WHERE noteId = ?`, + [operation, now, utcNow, noteId] + ); + } else { + // Add new queue entry + await sql.execute(` + INSERT INTO embedding_queue + (noteId, operation, dateQueued, utcDateQueued) + VALUES (?, ?, ?, ?)`, + [noteId, operation, now, utcNow] + ); + } +} + +/** + * Deletes all embeddings for a note + */ +export async function deleteNoteEmbeddings(noteId: string) { + await sql.execute( + "DELETE FROM note_embeddings WHERE noteId = ?", + [noteId] + ); + + // Remove from queue if present + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteId] + ); +} + +/** + * Process the embedding queue + */ +export async function processEmbeddingQueue() { + if (!(await options.getOptionBool('aiEnabled'))) { + return; + } + + const batchSize = parseInt(await options.getOption('embeddingBatchSize') || '10', 10); + const enabledProviders = await getEnabledEmbeddingProviders(); + + if (enabledProviders.length === 0) { + return; + } + + // Get notes from queue + const notes = await sql.getRows(` + SELECT noteId, operation, attempts + FROM embedding_queue + ORDER BY priority DESC, utcDateQueued ASC + LIMIT ?`, + [batchSize] + ); + + if (notes.length === 0) { + return; + } + + for (const note of notes) { + try { + const noteData = note as unknown as QueueItem; + + // Skip if note no longer exists + if (!becca.getNote(noteData.noteId)) { + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteData.noteId] + ); + await deleteNoteEmbeddings(noteData.noteId); + continue; + } + + if (noteData.operation === 'DELETE') { + await deleteNoteEmbeddings(noteData.noteId); + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteData.noteId] + ); + continue; + } + + // Get note context for embedding + const context = await getNoteEmbeddingContext(noteData.noteId); + + // Process with each enabled provider + for (const provider of enabledProviders) { + try { + // Generate embedding + const embedding = await provider.generateNoteEmbeddings(context); + + // Store embedding + const config = provider.getConfig(); + await storeNoteEmbedding(noteData.noteId, provider.name, config.model, embedding); + } catch (providerError: any) { + log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`); + } + } + + // Remove from queue on success + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteData.noteId] + ); + } catch (error: any) { + const noteData = note as unknown as QueueItem; + + // Update attempt count and log error + await sql.execute(` + UPDATE embedding_queue + SET attempts = attempts + 1, + lastAttempt = ?, + error = ? + WHERE noteId = ?`, + [dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteData.noteId] + ); + + log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`); + + // Remove from queue if too many attempts + if (noteData.attempts + 1 >= 3) { + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteData.noteId] + ); + log.error(`Removed note ${noteData.noteId} from embedding queue after multiple failures`); + } + } + } +} + +/** + * Setup note event listeners to keep embeddings up to date + */ +export function setupEmbeddingEventListeners() { + require("../../../becca/entity_events.js").subscribe({ + entityName: "notes", + eventType: "created", + handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'CREATE') + }); + + require("../../../becca/entity_events.js").subscribe({ + entityName: "notes", + eventType: "updated", + handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE') + }); + + require("../../../becca/entity_events.js").subscribe({ + entityName: "notes", + eventType: "deleted", + handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'DELETE') + }); + + require("../../../becca/entity_events.js").subscribe({ + entityName: "attributes", + eventType: ["created", "updated", "deleted"], + handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE') + }); + + require("../../../becca/entity_events.js").subscribe({ + entityName: "branches", + eventType: ["created", "updated", "deleted"], + handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE') + }); +} + +/** + * Setup background processing of the embedding queue + */ +export async function setupEmbeddingBackgroundProcessing() { + const interval = parseInt(await options.getOption('embeddingUpdateInterval') || '5000', 10); + + setInterval(async () => { + try { + await processEmbeddingQueue(); + } catch (error: any) { + log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`); + } + }, interval); +} + +/** + * Initialize embeddings system + */ +export async function initEmbeddings() { + if (await options.getOptionBool('aiEnabled')) { + setupEmbeddingEventListeners(); + await setupEmbeddingBackgroundProcessing(); + log.info("Embeddings system initialized"); + } else { + log.info("Embeddings system disabled"); + } +} + +/** + * Reprocess all notes to update embeddings + */ +export async function reprocessAllNotes() { + if (!(await options.getOptionBool('aiEnabled'))) { + return; + } + + log.info("Queueing all notes for embedding updates"); + + const noteIds = await sql.getColumn( + "SELECT noteId FROM notes WHERE isDeleted = 0" + ); + + log.info(`Adding ${noteIds.length} notes to embedding queue`); + + for (const noteId of noteIds) { + await queueNoteForEmbedding(noteId as string, 'UPDATE'); + } +} + +export default { + cosineSimilarity, + embeddingToBuffer, + bufferToEmbedding, + storeNoteEmbedding, + getEmbeddingForNote, + findSimilarNotes, + getNoteEmbeddingContext, + queueNoteForEmbedding, + deleteNoteEmbeddings, + processEmbeddingQueue, + setupEmbeddingEventListeners, + setupEmbeddingBackgroundProcessing, + initEmbeddings, + reprocessAllNotes +};