"lock" notes that are having their embeddings created

2025-08-10 10:22:29 +08:00 · 2025-03-16 20:36:47 +00:00 · 2025-03-16 20:36:47 +00:00 · d2072c2a6f
commit d2072c2a6f
parent 781a2506f0
6 changed files with 160 additions and 76 deletions
--- a/db/migrations/0230__vector_embeddings.sql
+++ b/db/migrations/0230__vector_embeddings.sql
@ -29,7 +29,8 @@ CREATE TABLE IF NOT EXISTS "embedding_queue" (
    "attempts" INTEGER NOT NULL DEFAULT 0,
    "lastAttempt" TEXT NULL,
    "error" TEXT NULL,
-    "failed" INTEGER NOT NULL DEFAULT 0
+    "failed" INTEGER NOT NULL DEFAULT 0,
    "isProcessing" INTEGER NOT NULL DEFAULT 0
 );
 -- Table to store embedding provider configurations
--- a/db/schema.sql
+++ b/db/schema.sql
@ -159,7 +159,8 @@ CREATE TABLE IF NOT EXISTS "embedding_queue" (
    "attempts" INTEGER NOT NULL DEFAULT 0,
    "lastAttempt" TEXT NULL,
    "error" TEXT NULL,
-    "failed" INTEGER NOT NULL DEFAULT 0
+    "failed" INTEGER NOT NULL DEFAULT 0,
    "isProcessing" INTEGER NOT NULL DEFAULT 0
 );
 CREATE TABLE IF NOT EXISTS "embedding_providers" (
--- a/src/services/llm/embeddings/chunking/chunking_processor.ts
+++ b/src/services/llm/embeddings/chunking/chunking_processor.ts
@ -34,7 +34,8 @@ const MAX_TOTAL_PROCESSING_TIME = 5 * 60 * 1000; // 5 minutes
 const MAX_CHUNK_RETRY_ATTEMPTS = 2;
 // Maximum time per chunk processing (to prevent individual chunks from hanging)
-const MAX_CHUNK_PROCESSING_TIME = 60 * 1000; // 1 minute
+const DEFAULT_MAX_CHUNK_PROCESSING_TIME = 60 * 1000; // 1 minute
 const OLLAMA_MAX_CHUNK_PROCESSING_TIME = 120 * 1000; // 2 minutes
 /**
 * Categorize an error as temporary or permanent based on its message
@ -166,6 +167,11 @@ export async function processNoteWithChunking(
        log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`);
        // Get the current time to prevent duplicate processing from timeouts
        const processingStartTime = Date.now();
        const processingId = `${noteId}-${processingStartTime}`;
        log.info(`Starting processing run ${processingId}`);
        // Process each chunk with a delay based on provider to avoid rate limits
        for (let i = 0; i < chunks.length; i++) {
            // Check if we've exceeded the overall time limit
@ -194,7 +200,7 @@ export async function processNoteWithChunking(
                    const embedding = await processChunkWithTimeout(
                        provider,
                        chunk,
-                        MAX_CHUNK_PROCESSING_TIME
+                        provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME
                    );
                    // Store with chunk information in a unique ID format
@ -212,7 +218,7 @@ export async function processNoteWithChunking(
                // Small delay between chunks to avoid rate limits - longer for Ollama
                if (i < chunks.length - 1) {
                    await new Promise(resolve => setTimeout(resolve,
-                        provider.name === 'ollama' ? 500 : 100));
+                        provider.name === 'ollama' ? 2000 : 100));
                }
            } catch (error: any) {
                const errorMessage = error.message || 'Unknown error';
@ -274,7 +280,7 @@ export async function processNoteWithChunking(
                        const embedding = await processChunkWithTimeout(
                            provider,
                            item.chunk,
-                            MAX_CHUNK_PROCESSING_TIME
+                            provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME
                        );
                        // Store with unique ID that indicates it was a retry
@ -335,7 +341,7 @@ export async function processNoteWithChunking(
        // Log information about the processed chunks
        if (successfulChunks > 0) {
-            log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`);
+            log.info(`[${processingId}] Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`);
        }
        if (failedChunks > 0) {
@ -344,7 +350,7 @@ export async function processNoteWithChunking(
            const temporaryErrors = failedChunkDetails.filter(d => d.category === 'temporary').length;
            const unknownErrors = failedChunkDetails.filter(d => d.category === 'unknown').length;
-            log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title}). ` +
+            log.info(`[${processingId}] Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title}). ` +
                    `Permanent: ${permanentErrors}, Temporary: ${temporaryErrors}, Unknown: ${unknownErrors}`);
        }
@ -394,7 +400,7 @@ export async function processNoteWithChunking(
        // Track total processing time
        const totalTime = Date.now() - startTime;
-        log.info(`Total processing time for note ${noteId}: ${totalTime}ms`);
+        log.info(`[${processingId}] Total processing time for note ${noteId}: ${totalTime}ms`);
    } catch (error: any) {
        log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`);
--- a/src/services/llm/embeddings/events.ts
+++ b/src/services/llm/embeddings/events.ts
@ -6,6 +6,9 @@ import { processEmbeddingQueue, queueNoteForEmbedding } from "./queue.js";
 import eventService from "../../../services/events.js";
 import becca from "../../../becca/becca.js";
 // Add mutex to prevent concurrent processing
 let isProcessingEmbeddings = false;
 /**
 * Setup event listeners for embedding-related events
 */
@ -54,12 +57,23 @@ export async function setupEmbeddingBackgroundProcessing() {
    setInterval(async () => {
        try {
            // Skip if already processing
            if (isProcessingEmbeddings) {
                return;
            }
            // Set mutex
            isProcessingEmbeddings = true;
            // Wrap in cls.init to ensure proper context
            cls.init(async () => {
                await processEmbeddingQueue();
            });
        } catch (error: any) {
            log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
        } finally {
            // Always release the mutex
            isProcessingEmbeddings = false;
        }
    }, interval);
 }
--- a/src/services/llm/embeddings/providers/ollama.ts
+++ b/src/services/llm/embeddings/providers/ollama.ts
@ -173,46 +173,77 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
     * Generate embeddings for a single text
     */
    async generateEmbeddings(text: string): Promise<Float32Array> {
-        try {
+        // Handle empty text
-            if (!text.trim()) {
+        if (!text.trim()) {
-                return new Float32Array(this.config.dimension);
+            return new Float32Array(this.config.dimension);
            }
            const modelName = this.config.model || "llama3";
            // Ensure we have model info
            const modelInfo = await this.getModelInfo(modelName);
            // Trim text if it might exceed context window (rough character estimate)
            // This is a simplistic approach - ideally we'd count tokens properly
            const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
            const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
            const response = await axios.post(
                `${this.baseUrl}/api/embeddings`,
                {
                    model: modelName,
                    prompt: trimmedText,
                    format: "json"
                },
                {
                    headers: {
                        "Content-Type": "application/json"
                    },
                    timeout: 30000 // Longer timeout for larger texts
                }
            );
            if (response.data && Array.isArray(response.data.embedding)) {
                return new Float32Array(response.data.embedding);
            } else {
                throw new Error("Unexpected response structure from Ollama API");
            }
        } catch (error: any) {
            const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
            log.error(`Ollama embedding error: ${errorMessage}`);
            throw new Error(`Ollama embedding error: ${errorMessage}`);
        }
        // Configuration for retries
        const maxRetries = 3;
        let retryCount = 0;
        let lastError: any = null;
        while (retryCount <= maxRetries) {
            try {
                const modelName = this.config.model || "llama3";
                // Ensure we have model info
                const modelInfo = await this.getModelInfo(modelName);
                // Trim text if it might exceed context window (rough character estimate)
                // This is a simplistic approach - ideally we'd count tokens properly
                const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
                const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
                const response = await axios.post(
                    `${this.baseUrl}/api/embeddings`,
                    {
                        model: modelName,
                        prompt: trimmedText,
                        format: "json"
                    },
                    {
                        headers: {
                            "Content-Type": "application/json"
                        },
                        timeout: 60000 // Increased timeout for larger texts (60 seconds)
                    }
                );
                if (response.data && Array.isArray(response.data.embedding)) {
                    // Success! Return the embedding
                    return new Float32Array(response.data.embedding);
                } else {
                    throw new Error("Unexpected response structure from Ollama API");
                }
            } catch (error: any) {
                lastError = error;
                // Only retry on timeout or connection errors
                const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
                const isTimeoutError = errorMessage.includes('timeout') ||
                                     errorMessage.includes('socket hang up') ||
                                     errorMessage.includes('ECONNREFUSED') ||
                                     errorMessage.includes('ECONNRESET');
                if (isTimeoutError && retryCount < maxRetries) {
                    // Exponential backoff with jitter
                    const delay = Math.min(Math.pow(2, retryCount) * 1000 + Math.random() * 1000, 15000);
                    log.info(`Ollama embedding timeout, retrying in ${Math.round(delay/1000)}s (attempt ${retryCount + 1}/${maxRetries})`);
                    await new Promise(resolve => setTimeout(resolve, delay));
                    retryCount++;
                } else {
                    // Non-retryable error or max retries exceeded
                    const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
                    log.error(`Ollama embedding error: ${errorMessage}`);
                    throw new Error(`Ollama embedding error: ${errorMessage}`);
                }
            }
        }
        // If we get here, we've exceeded our retry limit
        const errorMessage = lastError.response?.data?.error?.message || lastError.message || "Unknown error";
        log.error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
        throw new Error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
    }
    /**
--- a/src/services/llm/embeddings/queue.ts
+++ b/src/services/llm/embeddings/queue.ts
@ -10,6 +10,9 @@ import type { QueueItem } from "./types.js";
 import { getChunkingOperations } from "./chunking/chunking_interface.js";
 import indexService from '../index_service.js';
 // Track which notes are currently being processed
 const notesInProcess = new Set<string>();
 /**
 * Queues a note for embedding update
 */
@ -19,11 +22,17 @@ export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE'
    // Check if note is already in queue and whether it's marked as permanently failed
    const queueInfo = await sql.getRow(
-        "SELECT 1 as exists_flag, failed FROM embedding_queue WHERE noteId = ?",
+        "SELECT 1 as exists_flag, failed, isProcessing FROM embedding_queue WHERE noteId = ?",
        [noteId]
-    ) as {exists_flag: number, failed: number} | null;
+    ) as {exists_flag: number, failed: number, isProcessing: number} | null;
    if (queueInfo) {
        // If the note is currently being processed, don't change its status
        if (queueInfo.isProcessing === 1) {
            log.info(`Note ${noteId} is currently being processed, skipping queue update`);
            return;
        }
        // Only update if not permanently failed
        if (queueInfo.failed !== 1) {
            // Update existing queue entry but preserve the failed status
@ -41,8 +50,8 @@ export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE'
        // Add new queue entry
        await sql.execute(`
            INSERT INTO embedding_queue
-            (noteId, operation, dateQueued, utcDateQueued, failed)
+            (noteId, operation, dateQueued, utcDateQueued, failed, isProcessing)
-            VALUES (?, ?, ?, ?, 0)`,
+            VALUES (?, ?, ?, ?, 0, 0)`,
            [noteId, operation, now, utcNow]
        );
    }
@ -180,11 +189,11 @@ export async function processEmbeddingQueue() {
        return;
    }
-    // Get notes from queue (excluding failed ones)
+    // Get notes from queue (excluding failed ones and those being processed)
    const notes = await sql.getRows(`
        SELECT noteId, operation, attempts
        FROM embedding_queue
-        WHERE failed = 0
+        WHERE failed = 0 AND isProcessing = 0
        ORDER BY priority DESC, utcDateQueued ASC
        LIMIT ?`,
        [batchSize]
@ -198,30 +207,47 @@ export async function processEmbeddingQueue() {
    let processedCount = 0;
    for (const note of notes) {
        const noteData = note as unknown as QueueItem;
        const noteId = noteData.noteId;
        // Double-check that this note isn't already being processed
        if (notesInProcess.has(noteId)) {
            log.info(`Note ${noteId} is already being processed by another thread, skipping`);
            continue;
        }
        try {
-            const noteData = note as unknown as QueueItem;
+            // Mark the note as being processed
            notesInProcess.add(noteId);
            await sql.execute(
                "UPDATE embedding_queue SET isProcessing = 1 WHERE noteId = ?",
                [noteId]
            );
            // Skip if note no longer exists
-            if (!becca.getNote(noteData.noteId)) {
+            if (!becca.getNote(noteId)) {
                await sql.execute(
                    "DELETE FROM embedding_queue WHERE noteId = ?",
-                    [noteData.noteId]
+                    [noteId]
                );
-                await deleteNoteEmbeddings(noteData.noteId);
+                await deleteNoteEmbeddings(noteId);
                continue;
            }
            if (noteData.operation === 'DELETE') {
-                await deleteNoteEmbeddings(noteData.noteId);
+                await deleteNoteEmbeddings(noteId);
                await sql.execute(
                    "DELETE FROM embedding_queue WHERE noteId = ?",
-                    [noteData.noteId]
+                    [noteId]
                );
                continue;
            }
            // Log that we're starting to process this note
            log.info(`Starting embedding generation for note ${noteId}`);
            // Get note context for embedding
-            const context = await getNoteEmbeddingContext(noteData.noteId);
+            const context = await getNoteEmbeddingContext(noteId);
            // Check if we should use chunking for large content
            const useChunking = context.content.length > 5000;
@ -236,7 +262,7 @@ export async function processEmbeddingQueue() {
                    if (useChunking) {
                        // Process large notes using chunking
                        const chunkingOps = await getChunkingOperations();
-                        await chunkingOps.processNoteWithChunking(noteData.noteId, provider, context);
+                        await chunkingOps.processNoteWithChunking(noteId, provider, context);
                        allProvidersFailed = false;
                    } else {
                        // Standard approach: Generate a single embedding for the whole note
@ -246,7 +272,7 @@ export async function processEmbeddingQueue() {
                        const config = provider.getConfig();
                        await import('./storage.js').then(storage => {
                            return storage.storeNoteEmbedding(
-                                noteData.noteId,
+                                noteId,
                                provider.name,
                                config.model,
                                embedding
@ -259,7 +285,7 @@ export async function processEmbeddingQueue() {
                } catch (providerError: any) {
                    // This provider failed
                    allProvidersSucceeded = false;
-                    log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
+                    log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${providerError.message || 'Unknown error'}`);
                }
            }
@ -267,8 +293,10 @@ export async function processEmbeddingQueue() {
                // At least one provider succeeded, remove from queue
                await sql.execute(
                    "DELETE FROM embedding_queue WHERE noteId = ?",
-                    [noteData.noteId]
+                    [noteId]
                );
                log.info(`Successfully completed embedding processing for note ${noteId}`);
                // Count as successfully processed
                processedCount++;
            } else {
@ -277,49 +305,52 @@ export async function processEmbeddingQueue() {
                    UPDATE embedding_queue
                    SET attempts = attempts + 1,
                        lastAttempt = ?,
-                        error = ?
+                        error = ?,
                        isProcessing = 0
                    WHERE noteId = ?`,
-                    [dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteData.noteId]
+                    [dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteId]
                );
                // Mark as permanently failed if too many attempts
                if (noteData.attempts + 1 >= 3) {
-                    log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
+                    log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`);
                    // Set the failed flag but keep the actual attempts count
                    await sql.execute(`
                        UPDATE embedding_queue
                        SET failed = 1
                        WHERE noteId = ?
-                    `, [noteData.noteId]);
+                    `, [noteId]);
                }
            }
        } catch (error: any) {
            const noteData = note as unknown as QueueItem;
            // Update attempt count and log error
            await sql.execute(`
                UPDATE embedding_queue
                SET attempts = attempts + 1,
                    lastAttempt = ?,
-                    error = ?
+                    error = ?,
                    isProcessing = 0
                WHERE noteId = ?`,
-                [dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteData.noteId]
+                [dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteId]
            );
-            log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
+            log.error(`Error processing embedding for note ${noteId}: ${error.message || 'Unknown error'}`);
            // Mark as permanently failed if too many attempts
            if (noteData.attempts + 1 >= 3) {
-                log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
+                log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`);
                // Set the failed flag but keep the actual attempts count
                await sql.execute(`
                    UPDATE embedding_queue
                    SET failed = 1
                    WHERE noteId = ?
-                `, [noteData.noteId]);
+                `, [noteId]);
            }
        } finally {
            // Always clean up the processing status in the in-memory set
            notesInProcess.delete(noteId);
        }
    }