mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-10 10:22:29 +08:00
"lock" notes that are having their embeddings created
This commit is contained in:
parent
781a2506f0
commit
d2072c2a6f
@ -29,7 +29,8 @@ CREATE TABLE IF NOT EXISTS "embedding_queue" (
|
|||||||
"attempts" INTEGER NOT NULL DEFAULT 0,
|
"attempts" INTEGER NOT NULL DEFAULT 0,
|
||||||
"lastAttempt" TEXT NULL,
|
"lastAttempt" TEXT NULL,
|
||||||
"error" TEXT NULL,
|
"error" TEXT NULL,
|
||||||
"failed" INTEGER NOT NULL DEFAULT 0
|
"failed" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"isProcessing" INTEGER NOT NULL DEFAULT 0
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Table to store embedding provider configurations
|
-- Table to store embedding provider configurations
|
||||||
|
@ -159,7 +159,8 @@ CREATE TABLE IF NOT EXISTS "embedding_queue" (
|
|||||||
"attempts" INTEGER NOT NULL DEFAULT 0,
|
"attempts" INTEGER NOT NULL DEFAULT 0,
|
||||||
"lastAttempt" TEXT NULL,
|
"lastAttempt" TEXT NULL,
|
||||||
"error" TEXT NULL,
|
"error" TEXT NULL,
|
||||||
"failed" INTEGER NOT NULL DEFAULT 0
|
"failed" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"isProcessing" INTEGER NOT NULL DEFAULT 0
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS "embedding_providers" (
|
CREATE TABLE IF NOT EXISTS "embedding_providers" (
|
||||||
|
@ -34,7 +34,8 @@ const MAX_TOTAL_PROCESSING_TIME = 5 * 60 * 1000; // 5 minutes
|
|||||||
const MAX_CHUNK_RETRY_ATTEMPTS = 2;
|
const MAX_CHUNK_RETRY_ATTEMPTS = 2;
|
||||||
|
|
||||||
// Maximum time per chunk processing (to prevent individual chunks from hanging)
|
// Maximum time per chunk processing (to prevent individual chunks from hanging)
|
||||||
const MAX_CHUNK_PROCESSING_TIME = 60 * 1000; // 1 minute
|
const DEFAULT_MAX_CHUNK_PROCESSING_TIME = 60 * 1000; // 1 minute
|
||||||
|
const OLLAMA_MAX_CHUNK_PROCESSING_TIME = 120 * 1000; // 2 minutes
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Categorize an error as temporary or permanent based on its message
|
* Categorize an error as temporary or permanent based on its message
|
||||||
@ -166,6 +167,11 @@ export async function processNoteWithChunking(
|
|||||||
|
|
||||||
log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`);
|
log.info(`Processing ${chunks.length} chunks for note ${noteId} (${note.title})`);
|
||||||
|
|
||||||
|
// Get the current time to prevent duplicate processing from timeouts
|
||||||
|
const processingStartTime = Date.now();
|
||||||
|
const processingId = `${noteId}-${processingStartTime}`;
|
||||||
|
log.info(`Starting processing run ${processingId}`);
|
||||||
|
|
||||||
// Process each chunk with a delay based on provider to avoid rate limits
|
// Process each chunk with a delay based on provider to avoid rate limits
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
// Check if we've exceeded the overall time limit
|
// Check if we've exceeded the overall time limit
|
||||||
@ -194,7 +200,7 @@ export async function processNoteWithChunking(
|
|||||||
const embedding = await processChunkWithTimeout(
|
const embedding = await processChunkWithTimeout(
|
||||||
provider,
|
provider,
|
||||||
chunk,
|
chunk,
|
||||||
MAX_CHUNK_PROCESSING_TIME
|
provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME
|
||||||
);
|
);
|
||||||
|
|
||||||
// Store with chunk information in a unique ID format
|
// Store with chunk information in a unique ID format
|
||||||
@ -212,7 +218,7 @@ export async function processNoteWithChunking(
|
|||||||
// Small delay between chunks to avoid rate limits - longer for Ollama
|
// Small delay between chunks to avoid rate limits - longer for Ollama
|
||||||
if (i < chunks.length - 1) {
|
if (i < chunks.length - 1) {
|
||||||
await new Promise(resolve => setTimeout(resolve,
|
await new Promise(resolve => setTimeout(resolve,
|
||||||
provider.name === 'ollama' ? 500 : 100));
|
provider.name === 'ollama' ? 2000 : 100));
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
const errorMessage = error.message || 'Unknown error';
|
const errorMessage = error.message || 'Unknown error';
|
||||||
@ -274,7 +280,7 @@ export async function processNoteWithChunking(
|
|||||||
const embedding = await processChunkWithTimeout(
|
const embedding = await processChunkWithTimeout(
|
||||||
provider,
|
provider,
|
||||||
item.chunk,
|
item.chunk,
|
||||||
MAX_CHUNK_PROCESSING_TIME
|
provider.name === 'ollama' ? OLLAMA_MAX_CHUNK_PROCESSING_TIME : DEFAULT_MAX_CHUNK_PROCESSING_TIME
|
||||||
);
|
);
|
||||||
|
|
||||||
// Store with unique ID that indicates it was a retry
|
// Store with unique ID that indicates it was a retry
|
||||||
@ -335,7 +341,7 @@ export async function processNoteWithChunking(
|
|||||||
|
|
||||||
// Log information about the processed chunks
|
// Log information about the processed chunks
|
||||||
if (successfulChunks > 0) {
|
if (successfulChunks > 0) {
|
||||||
log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`);
|
log.info(`[${processingId}] Generated ${successfulChunks} chunk embeddings for note ${noteId} (${note.title})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failedChunks > 0) {
|
if (failedChunks > 0) {
|
||||||
@ -344,7 +350,7 @@ export async function processNoteWithChunking(
|
|||||||
const temporaryErrors = failedChunkDetails.filter(d => d.category === 'temporary').length;
|
const temporaryErrors = failedChunkDetails.filter(d => d.category === 'temporary').length;
|
||||||
const unknownErrors = failedChunkDetails.filter(d => d.category === 'unknown').length;
|
const unknownErrors = failedChunkDetails.filter(d => d.category === 'unknown').length;
|
||||||
|
|
||||||
log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title}). ` +
|
log.info(`[${processingId}] Failed to generate ${failedChunks} chunk embeddings for note ${noteId} (${note.title}). ` +
|
||||||
`Permanent: ${permanentErrors}, Temporary: ${temporaryErrors}, Unknown: ${unknownErrors}`);
|
`Permanent: ${permanentErrors}, Temporary: ${temporaryErrors}, Unknown: ${unknownErrors}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -394,7 +400,7 @@ export async function processNoteWithChunking(
|
|||||||
|
|
||||||
// Track total processing time
|
// Track total processing time
|
||||||
const totalTime = Date.now() - startTime;
|
const totalTime = Date.now() - startTime;
|
||||||
log.info(`Total processing time for note ${noteId}: ${totalTime}ms`);
|
log.info(`[${processingId}] Total processing time for note ${noteId}: ${totalTime}ms`);
|
||||||
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`);
|
log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`);
|
||||||
|
@ -6,6 +6,9 @@ import { processEmbeddingQueue, queueNoteForEmbedding } from "./queue.js";
|
|||||||
import eventService from "../../../services/events.js";
|
import eventService from "../../../services/events.js";
|
||||||
import becca from "../../../becca/becca.js";
|
import becca from "../../../becca/becca.js";
|
||||||
|
|
||||||
|
// Add mutex to prevent concurrent processing
|
||||||
|
let isProcessingEmbeddings = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setup event listeners for embedding-related events
|
* Setup event listeners for embedding-related events
|
||||||
*/
|
*/
|
||||||
@ -54,12 +57,23 @@ export async function setupEmbeddingBackgroundProcessing() {
|
|||||||
|
|
||||||
setInterval(async () => {
|
setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
|
// Skip if already processing
|
||||||
|
if (isProcessingEmbeddings) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set mutex
|
||||||
|
isProcessingEmbeddings = true;
|
||||||
|
|
||||||
// Wrap in cls.init to ensure proper context
|
// Wrap in cls.init to ensure proper context
|
||||||
cls.init(async () => {
|
cls.init(async () => {
|
||||||
await processEmbeddingQueue();
|
await processEmbeddingQueue();
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
|
log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
|
||||||
|
} finally {
|
||||||
|
// Always release the mutex
|
||||||
|
isProcessingEmbeddings = false;
|
||||||
}
|
}
|
||||||
}, interval);
|
}, interval);
|
||||||
}
|
}
|
||||||
|
@ -173,46 +173,77 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
|
|||||||
* Generate embeddings for a single text
|
* Generate embeddings for a single text
|
||||||
*/
|
*/
|
||||||
async generateEmbeddings(text: string): Promise<Float32Array> {
|
async generateEmbeddings(text: string): Promise<Float32Array> {
|
||||||
try {
|
// Handle empty text
|
||||||
if (!text.trim()) {
|
if (!text.trim()) {
|
||||||
return new Float32Array(this.config.dimension);
|
return new Float32Array(this.config.dimension);
|
||||||
}
|
|
||||||
|
|
||||||
const modelName = this.config.model || "llama3";
|
|
||||||
|
|
||||||
// Ensure we have model info
|
|
||||||
const modelInfo = await this.getModelInfo(modelName);
|
|
||||||
|
|
||||||
// Trim text if it might exceed context window (rough character estimate)
|
|
||||||
// This is a simplistic approach - ideally we'd count tokens properly
|
|
||||||
const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
|
|
||||||
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
|
|
||||||
|
|
||||||
const response = await axios.post(
|
|
||||||
`${this.baseUrl}/api/embeddings`,
|
|
||||||
{
|
|
||||||
model: modelName,
|
|
||||||
prompt: trimmedText,
|
|
||||||
format: "json"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
},
|
|
||||||
timeout: 30000 // Longer timeout for larger texts
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
if (response.data && Array.isArray(response.data.embedding)) {
|
|
||||||
return new Float32Array(response.data.embedding);
|
|
||||||
} else {
|
|
||||||
throw new Error("Unexpected response structure from Ollama API");
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
|
||||||
log.error(`Ollama embedding error: ${errorMessage}`);
|
|
||||||
throw new Error(`Ollama embedding error: ${errorMessage}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Configuration for retries
|
||||||
|
const maxRetries = 3;
|
||||||
|
let retryCount = 0;
|
||||||
|
let lastError: any = null;
|
||||||
|
|
||||||
|
while (retryCount <= maxRetries) {
|
||||||
|
try {
|
||||||
|
const modelName = this.config.model || "llama3";
|
||||||
|
|
||||||
|
// Ensure we have model info
|
||||||
|
const modelInfo = await this.getModelInfo(modelName);
|
||||||
|
|
||||||
|
// Trim text if it might exceed context window (rough character estimate)
|
||||||
|
// This is a simplistic approach - ideally we'd count tokens properly
|
||||||
|
const charLimit = modelInfo.contextWindow * 4; // Rough estimate: avg 4 chars per token
|
||||||
|
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
|
||||||
|
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/api/embeddings`,
|
||||||
|
{
|
||||||
|
model: modelName,
|
||||||
|
prompt: trimmedText,
|
||||||
|
format: "json"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
timeout: 60000 // Increased timeout for larger texts (60 seconds)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && Array.isArray(response.data.embedding)) {
|
||||||
|
// Success! Return the embedding
|
||||||
|
return new Float32Array(response.data.embedding);
|
||||||
|
} else {
|
||||||
|
throw new Error("Unexpected response structure from Ollama API");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
lastError = error;
|
||||||
|
// Only retry on timeout or connection errors
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
const isTimeoutError = errorMessage.includes('timeout') ||
|
||||||
|
errorMessage.includes('socket hang up') ||
|
||||||
|
errorMessage.includes('ECONNREFUSED') ||
|
||||||
|
errorMessage.includes('ECONNRESET');
|
||||||
|
|
||||||
|
if (isTimeoutError && retryCount < maxRetries) {
|
||||||
|
// Exponential backoff with jitter
|
||||||
|
const delay = Math.min(Math.pow(2, retryCount) * 1000 + Math.random() * 1000, 15000);
|
||||||
|
log.info(`Ollama embedding timeout, retrying in ${Math.round(delay/1000)}s (attempt ${retryCount + 1}/${maxRetries})`);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
retryCount++;
|
||||||
|
} else {
|
||||||
|
// Non-retryable error or max retries exceeded
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`Ollama embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`Ollama embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get here, we've exceeded our retry limit
|
||||||
|
const errorMessage = lastError.response?.data?.error?.message || lastError.message || "Unknown error";
|
||||||
|
log.error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
|
||||||
|
throw new Error(`Ollama embedding error after ${maxRetries} retries: ${errorMessage}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -10,6 +10,9 @@ import type { QueueItem } from "./types.js";
|
|||||||
import { getChunkingOperations } from "./chunking/chunking_interface.js";
|
import { getChunkingOperations } from "./chunking/chunking_interface.js";
|
||||||
import indexService from '../index_service.js';
|
import indexService from '../index_service.js';
|
||||||
|
|
||||||
|
// Track which notes are currently being processed
|
||||||
|
const notesInProcess = new Set<string>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queues a note for embedding update
|
* Queues a note for embedding update
|
||||||
*/
|
*/
|
||||||
@ -19,11 +22,17 @@ export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE'
|
|||||||
|
|
||||||
// Check if note is already in queue and whether it's marked as permanently failed
|
// Check if note is already in queue and whether it's marked as permanently failed
|
||||||
const queueInfo = await sql.getRow(
|
const queueInfo = await sql.getRow(
|
||||||
"SELECT 1 as exists_flag, failed FROM embedding_queue WHERE noteId = ?",
|
"SELECT 1 as exists_flag, failed, isProcessing FROM embedding_queue WHERE noteId = ?",
|
||||||
[noteId]
|
[noteId]
|
||||||
) as {exists_flag: number, failed: number} | null;
|
) as {exists_flag: number, failed: number, isProcessing: number} | null;
|
||||||
|
|
||||||
if (queueInfo) {
|
if (queueInfo) {
|
||||||
|
// If the note is currently being processed, don't change its status
|
||||||
|
if (queueInfo.isProcessing === 1) {
|
||||||
|
log.info(`Note ${noteId} is currently being processed, skipping queue update`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Only update if not permanently failed
|
// Only update if not permanently failed
|
||||||
if (queueInfo.failed !== 1) {
|
if (queueInfo.failed !== 1) {
|
||||||
// Update existing queue entry but preserve the failed status
|
// Update existing queue entry but preserve the failed status
|
||||||
@ -41,8 +50,8 @@ export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE'
|
|||||||
// Add new queue entry
|
// Add new queue entry
|
||||||
await sql.execute(`
|
await sql.execute(`
|
||||||
INSERT INTO embedding_queue
|
INSERT INTO embedding_queue
|
||||||
(noteId, operation, dateQueued, utcDateQueued, failed)
|
(noteId, operation, dateQueued, utcDateQueued, failed, isProcessing)
|
||||||
VALUES (?, ?, ?, ?, 0)`,
|
VALUES (?, ?, ?, ?, 0, 0)`,
|
||||||
[noteId, operation, now, utcNow]
|
[noteId, operation, now, utcNow]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -180,11 +189,11 @@ export async function processEmbeddingQueue() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get notes from queue (excluding failed ones)
|
// Get notes from queue (excluding failed ones and those being processed)
|
||||||
const notes = await sql.getRows(`
|
const notes = await sql.getRows(`
|
||||||
SELECT noteId, operation, attempts
|
SELECT noteId, operation, attempts
|
||||||
FROM embedding_queue
|
FROM embedding_queue
|
||||||
WHERE failed = 0
|
WHERE failed = 0 AND isProcessing = 0
|
||||||
ORDER BY priority DESC, utcDateQueued ASC
|
ORDER BY priority DESC, utcDateQueued ASC
|
||||||
LIMIT ?`,
|
LIMIT ?`,
|
||||||
[batchSize]
|
[batchSize]
|
||||||
@ -198,30 +207,47 @@ export async function processEmbeddingQueue() {
|
|||||||
let processedCount = 0;
|
let processedCount = 0;
|
||||||
|
|
||||||
for (const note of notes) {
|
for (const note of notes) {
|
||||||
|
const noteData = note as unknown as QueueItem;
|
||||||
|
const noteId = noteData.noteId;
|
||||||
|
|
||||||
|
// Double-check that this note isn't already being processed
|
||||||
|
if (notesInProcess.has(noteId)) {
|
||||||
|
log.info(`Note ${noteId} is already being processed by another thread, skipping`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const noteData = note as unknown as QueueItem;
|
// Mark the note as being processed
|
||||||
|
notesInProcess.add(noteId);
|
||||||
|
await sql.execute(
|
||||||
|
"UPDATE embedding_queue SET isProcessing = 1 WHERE noteId = ?",
|
||||||
|
[noteId]
|
||||||
|
);
|
||||||
|
|
||||||
// Skip if note no longer exists
|
// Skip if note no longer exists
|
||||||
if (!becca.getNote(noteData.noteId)) {
|
if (!becca.getNote(noteId)) {
|
||||||
await sql.execute(
|
await sql.execute(
|
||||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
[noteData.noteId]
|
[noteId]
|
||||||
);
|
);
|
||||||
await deleteNoteEmbeddings(noteData.noteId);
|
await deleteNoteEmbeddings(noteId);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (noteData.operation === 'DELETE') {
|
if (noteData.operation === 'DELETE') {
|
||||||
await deleteNoteEmbeddings(noteData.noteId);
|
await deleteNoteEmbeddings(noteId);
|
||||||
await sql.execute(
|
await sql.execute(
|
||||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
[noteData.noteId]
|
[noteId]
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Log that we're starting to process this note
|
||||||
|
log.info(`Starting embedding generation for note ${noteId}`);
|
||||||
|
|
||||||
// Get note context for embedding
|
// Get note context for embedding
|
||||||
const context = await getNoteEmbeddingContext(noteData.noteId);
|
const context = await getNoteEmbeddingContext(noteId);
|
||||||
|
|
||||||
// Check if we should use chunking for large content
|
// Check if we should use chunking for large content
|
||||||
const useChunking = context.content.length > 5000;
|
const useChunking = context.content.length > 5000;
|
||||||
@ -236,7 +262,7 @@ export async function processEmbeddingQueue() {
|
|||||||
if (useChunking) {
|
if (useChunking) {
|
||||||
// Process large notes using chunking
|
// Process large notes using chunking
|
||||||
const chunkingOps = await getChunkingOperations();
|
const chunkingOps = await getChunkingOperations();
|
||||||
await chunkingOps.processNoteWithChunking(noteData.noteId, provider, context);
|
await chunkingOps.processNoteWithChunking(noteId, provider, context);
|
||||||
allProvidersFailed = false;
|
allProvidersFailed = false;
|
||||||
} else {
|
} else {
|
||||||
// Standard approach: Generate a single embedding for the whole note
|
// Standard approach: Generate a single embedding for the whole note
|
||||||
@ -246,7 +272,7 @@ export async function processEmbeddingQueue() {
|
|||||||
const config = provider.getConfig();
|
const config = provider.getConfig();
|
||||||
await import('./storage.js').then(storage => {
|
await import('./storage.js').then(storage => {
|
||||||
return storage.storeNoteEmbedding(
|
return storage.storeNoteEmbedding(
|
||||||
noteData.noteId,
|
noteId,
|
||||||
provider.name,
|
provider.name,
|
||||||
config.model,
|
config.model,
|
||||||
embedding
|
embedding
|
||||||
@ -259,7 +285,7 @@ export async function processEmbeddingQueue() {
|
|||||||
} catch (providerError: any) {
|
} catch (providerError: any) {
|
||||||
// This provider failed
|
// This provider failed
|
||||||
allProvidersSucceeded = false;
|
allProvidersSucceeded = false;
|
||||||
log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
|
log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${providerError.message || 'Unknown error'}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,8 +293,10 @@ export async function processEmbeddingQueue() {
|
|||||||
// At least one provider succeeded, remove from queue
|
// At least one provider succeeded, remove from queue
|
||||||
await sql.execute(
|
await sql.execute(
|
||||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
[noteData.noteId]
|
[noteId]
|
||||||
);
|
);
|
||||||
|
log.info(`Successfully completed embedding processing for note ${noteId}`);
|
||||||
|
|
||||||
// Count as successfully processed
|
// Count as successfully processed
|
||||||
processedCount++;
|
processedCount++;
|
||||||
} else {
|
} else {
|
||||||
@ -277,49 +305,52 @@ export async function processEmbeddingQueue() {
|
|||||||
UPDATE embedding_queue
|
UPDATE embedding_queue
|
||||||
SET attempts = attempts + 1,
|
SET attempts = attempts + 1,
|
||||||
lastAttempt = ?,
|
lastAttempt = ?,
|
||||||
error = ?
|
error = ?,
|
||||||
|
isProcessing = 0
|
||||||
WHERE noteId = ?`,
|
WHERE noteId = ?`,
|
||||||
[dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteData.noteId]
|
[dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteId]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Mark as permanently failed if too many attempts
|
// Mark as permanently failed if too many attempts
|
||||||
if (noteData.attempts + 1 >= 3) {
|
if (noteData.attempts + 1 >= 3) {
|
||||||
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
|
log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`);
|
||||||
|
|
||||||
// Set the failed flag but keep the actual attempts count
|
// Set the failed flag but keep the actual attempts count
|
||||||
await sql.execute(`
|
await sql.execute(`
|
||||||
UPDATE embedding_queue
|
UPDATE embedding_queue
|
||||||
SET failed = 1
|
SET failed = 1
|
||||||
WHERE noteId = ?
|
WHERE noteId = ?
|
||||||
`, [noteData.noteId]);
|
`, [noteId]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
const noteData = note as unknown as QueueItem;
|
|
||||||
|
|
||||||
// Update attempt count and log error
|
// Update attempt count and log error
|
||||||
await sql.execute(`
|
await sql.execute(`
|
||||||
UPDATE embedding_queue
|
UPDATE embedding_queue
|
||||||
SET attempts = attempts + 1,
|
SET attempts = attempts + 1,
|
||||||
lastAttempt = ?,
|
lastAttempt = ?,
|
||||||
error = ?
|
error = ?,
|
||||||
|
isProcessing = 0
|
||||||
WHERE noteId = ?`,
|
WHERE noteId = ?`,
|
||||||
[dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteData.noteId]
|
[dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteId]
|
||||||
);
|
);
|
||||||
|
|
||||||
log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
|
log.error(`Error processing embedding for note ${noteId}: ${error.message || 'Unknown error'}`);
|
||||||
|
|
||||||
// Mark as permanently failed if too many attempts
|
// Mark as permanently failed if too many attempts
|
||||||
if (noteData.attempts + 1 >= 3) {
|
if (noteData.attempts + 1 >= 3) {
|
||||||
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
|
log.error(`Marked note ${noteId} as permanently failed after multiple embedding attempts`);
|
||||||
|
|
||||||
// Set the failed flag but keep the actual attempts count
|
// Set the failed flag but keep the actual attempts count
|
||||||
await sql.execute(`
|
await sql.execute(`
|
||||||
UPDATE embedding_queue
|
UPDATE embedding_queue
|
||||||
SET failed = 1
|
SET failed = 1
|
||||||
WHERE noteId = ?
|
WHERE noteId = ?
|
||||||
`, [noteData.noteId]);
|
`, [noteId]);
|
||||||
}
|
}
|
||||||
|
} finally {
|
||||||
|
// Always clean up the processing status in the in-memory set
|
||||||
|
notesInProcess.delete(noteId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user