I think this works to handle failed embeddings

This commit is contained in:
perf3ct 2025-03-11 20:22:01 +00:00
parent 71b3b04c53
commit f47b070f0f
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
5 changed files with 642 additions and 73 deletions

View File

@ -30,6 +30,21 @@ interface EmbeddingStats {
}
}
// Interface for failed embedding notes
interface FailedEmbeddingNotes {
success: boolean;
failedNotes: Array<{
noteId: string;
title?: string;
operation: string;
attempts: number;
lastAttempt: string;
error: string;
failureType: string;
chunks: number;
}>;
}
export default class AiSettingsWidget extends OptionsWidget {
private statsRefreshInterval: NodeJS.Timeout | null = null;
private readonly STATS_REFRESH_INTERVAL = 5000; // 5 seconds
@ -227,6 +242,16 @@ export default class AiSettingsWidget extends OptionsWidget {
</div>
</div>
</div>
<!-- Failed embeddings section -->
<div class="form-group mt-4">
<label>${t("ai_llm.failed_notes")}</label>
<div class="embedding-failed-notes-container">
<div class="embedding-failed-notes-list">
<div class="text-muted small">${t("ai_llm.no_failed_embeddings")}</div>
</div>
</div>
</div>
</div>
</div>`);
@ -416,6 +441,54 @@ export default class AiSettingsWidget extends OptionsWidget {
return this.$widget;
}
optionsLoaded(options: OptionMap) {
if (!this.$widget) return;
this.setCheckboxState(this.$widget.find('.ai-enabled'), options.aiEnabled || 'false');
this.setCheckboxState(this.$widget.find('.ollama-enabled'), options.ollamaEnabled || 'false');
this.$widget.find('.ai-provider-precedence').val(options.aiProviderPrecedence || 'openai,anthropic,ollama');
this.$widget.find('.ai-temperature').val(options.aiTemperature || '0.7');
this.$widget.find('.ai-system-prompt').val(options.aiSystemPrompt || '');
this.$widget.find('.openai-api-key').val(options.openaiApiKey || '');
this.$widget.find('.openai-default-model').val(options.openaiDefaultModel || 'gpt-4o');
this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1');
this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || '');
this.$widget.find('.anthropic-default-model').val(options.anthropicDefaultModel || 'claude-3-opus-20240229');
this.$widget.find('.anthropic-base-url').val(options.anthropicBaseUrl || 'https://api.anthropic.com/v1');
this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434');
this.$widget.find('.ollama-default-model').val(options.ollamaDefaultModel || 'llama3');
this.$widget.find('.ollama-embedding-model').val(options.ollamaEmbeddingModel || 'nomic-embed-text');
// Load embedding options
this.$widget.find('.embedding-default-provider').val(options.embeddingsDefaultProvider || 'openai');
this.setCheckboxState(this.$widget.find('.embedding-auto-update-enabled'), options.embeddingAutoUpdateEnabled || 'true');
this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10');
this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000');
this.$widget.find('.embedding-default-dimension').val(options.embeddingDefaultDimension || '1536');
this.updateAiSectionVisibility();
}
updateAiSectionVisibility() {
if (!this.$widget) return;
const aiEnabled = this.$widget.find('.ai-enabled').prop('checked');
this.$widget.find('.ai-providers-section').toggle(aiEnabled);
this.$widget.find('.ai-provider').toggle(aiEnabled);
this.$widget.find('.embedding-section').toggle(aiEnabled);
// Start or stop polling based on visibility
if (aiEnabled && this.$widget.find('.embedding-section').is(':visible')) {
this.startStatsPolling();
} else {
this.stopStatsPolling();
}
}
/**
* Start automatic polling for embedding statistics
*/
@ -429,6 +502,9 @@ export default class AiSettingsWidget extends OptionsWidget {
if (this.$widget && this.$widget.is(':visible') &&
this.$widget.find('.embedding-section').is(':visible')) {
await this.refreshEmbeddingStats(true);
// Also update failed embeddings list periodically
await this.updateFailedEmbeddingsList();
}
}, this.STATS_REFRESH_INTERVAL);
}
@ -443,6 +519,62 @@ export default class AiSettingsWidget extends OptionsWidget {
}
}
// Clean up when the widget is removed
cleanup() {
this.stopStatsPolling();
super.cleanup();
}
/**
* Get embedding stats from the server
*/
async getEmbeddingStats(): Promise<EmbeddingStats | null> {
try {
return await server.get('embeddings/stats') as EmbeddingStats;
} catch (error) {
console.error('Error fetching embedding stats:', error);
return null;
}
}
/**
* Get failed embedding notes from the server
*/
async getFailedEmbeddingNotes(): Promise<FailedEmbeddingNotes | null> {
try {
return await server.get('embeddings/failed') as FailedEmbeddingNotes;
} catch (error) {
console.error('Error fetching failed embedding notes:', error);
return null;
}
}
/**
* Retry a specific failed embedding
*/
async retryFailedEmbedding(noteId: string): Promise<boolean> {
try {
const result = await server.post(`embeddings/retry/${noteId}`) as {success: boolean};
return result.success;
} catch (error) {
console.error('Error retrying failed embedding:', error);
return false;
}
}
/**
* Retry all failed embeddings
*/
async retryAllFailedEmbeddings(): Promise<boolean> {
try {
const result = await server.post('embeddings/retry-all-failed') as {success: boolean};
return result.success;
} catch (error) {
console.error('Error retrying all failed embeddings:', error);
return false;
}
}
async refreshEmbeddingStats(silent = false) {
if (!this.$widget) return;
@ -455,7 +587,7 @@ export default class AiSettingsWidget extends OptionsWidget {
$refreshButton.text(t("ai_llm.refreshing"));
}
const response = await server.get<EmbeddingStats>('embeddings/stats');
const response = await this.getEmbeddingStats();
if (response && response.success) {
const stats = response.stats;
@ -498,6 +630,11 @@ export default class AiSettingsWidget extends OptionsWidget {
$progressBar.removeClass('progress-bar-striped progress-bar-animated bg-info');
$progressBar.addClass('bg-success');
}
// Update failed embeddings list if there are failures
if (stats.failedNotesCount > 0 && !silent) {
await this.updateFailedEmbeddingsList();
}
}
} catch (error) {
console.error("Error fetching embedding stats:", error);
@ -514,57 +651,85 @@ export default class AiSettingsWidget extends OptionsWidget {
}
}
updateAiSectionVisibility() {
async updateFailedEmbeddingsList() {
if (!this.$widget) return;
const aiEnabled = this.$widget.find('.ai-enabled').prop('checked');
this.$widget.find('.ai-providers-section').toggle(aiEnabled);
this.$widget.find('.ai-provider').toggle(aiEnabled);
this.$widget.find('.embedding-section').toggle(aiEnabled);
// Start or stop polling based on visibility
if (aiEnabled && this.$widget.find('.embedding-section').is(':visible')) {
this.startStatsPolling();
} else {
this.stopStatsPolling();
const failedResult = await this.getFailedEmbeddingNotes();
if (!failedResult || !failedResult.failedNotes.length) {
// Use consistent styling with the rest of the application
this.$widget.find('.embedding-failed-notes-list').html(
`<div class="text-muted small">No failed embeddings</div>`
);
return;
}
}
// Clean up when the widget is removed
cleanup() {
this.stopStatsPolling();
super.cleanup();
}
const $failedHeader = $(`
<div class="d-flex justify-content-between align-items-center mb-2">
<h6>Failed Embeddings (${failedResult.failedNotes.length})</h6>
<button class="btn btn-sm btn-outline-primary retry-all-btn">Retry All Failed</button>
</div>
`);
optionsLoaded(options: OptionMap) {
if (!this.$widget) return;
const $failedList = $('<div class="list-group failed-list mb-3">');
this.setCheckboxState(this.$widget.find('.ai-enabled'), options.aiEnabled || 'false');
this.setCheckboxState(this.$widget.find('.ollama-enabled'), options.ollamaEnabled || 'false');
for (const note of failedResult.failedNotes) {
// Determine if this is a full note failure or just failed chunks
const isFullFailure = note.failureType === 'full';
const badgeClass = isFullFailure ? 'badge-danger' : 'badge-warning';
const badgeText = isFullFailure ? 'Full Note' : `${note.chunks} Chunks`;
this.$widget.find('.ai-provider-precedence').val(options.aiProviderPrecedence || 'openai,anthropic,ollama');
this.$widget.find('.ai-temperature').val(options.aiTemperature || '0.7');
this.$widget.find('.ai-system-prompt').val(options.aiSystemPrompt || '');
const $item = $(`
<div class="list-group-item list-group-item-action flex-column align-items-start p-2">
<div class="d-flex justify-content-between">
<div>
<h6 class="mb-1">${note.title || note.noteId}</h6>
<span class="badge ${badgeClass} mb-1">${badgeText}</span>
</div>
<button class="btn btn-sm btn-outline-secondary retry-btn" data-note-id="${note.noteId}">Retry</button>
</div>
<div class="small text-muted">
<div>Attempts: ${note.attempts}</div>
<div>Last attempt: ${note.lastAttempt}</div>
<div>Error: ${note.error}</div>
</div>
</div>
`);
this.$widget.find('.openai-api-key').val(options.openaiApiKey || '');
this.$widget.find('.openai-default-model').val(options.openaiDefaultModel || 'gpt-4o');
this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1');
$failedList.append($item);
}
this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || '');
this.$widget.find('.anthropic-default-model').val(options.anthropicDefaultModel || 'claude-3-opus-20240229');
this.$widget.find('.anthropic-base-url').val(options.anthropicBaseUrl || 'https://api.anthropic.com/v1');
this.$widget.find('.embedding-failed-notes-list').empty().append($failedHeader, $failedList);
this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434');
this.$widget.find('.ollama-default-model').val(options.ollamaDefaultModel || 'llama3');
this.$widget.find('.ollama-embedding-model').val(options.ollamaEmbeddingModel || 'nomic-embed-text');
// Add event handlers using local variables to avoid 'this' issues
const self = this;
// Load embedding options
this.$widget.find('.embedding-default-provider').val(options.embeddingsDefaultProvider || 'openai');
this.setCheckboxState(this.$widget.find('.embedding-auto-update-enabled'), options.embeddingAutoUpdateEnabled || 'true');
this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10');
this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000');
this.$widget.find('.embedding-default-dimension').val(options.embeddingDefaultDimension || '1536');
this.$widget.find('.retry-btn').on('click', async function() {
const noteId = $(this).data('note-id');
$(this).prop('disabled', true).text('Retrying...');
this.updateAiSectionVisibility();
const success = await self.retryFailedEmbedding(noteId);
if (success) {
toastService.showMessage("Note queued for retry");
await self.refreshEmbeddingStats();
} else {
toastService.showError("Failed to retry note");
$(this).prop('disabled', false).text('Retry');
}
});
this.$widget.find('.retry-all-btn').on('click', async function() {
$(this).prop('disabled', true).text('Retrying All...');
const success = await self.retryAllFailedEmbeddings();
if (success) {
toastService.showMessage("All failed notes queued for retry");
await self.refreshEmbeddingStats();
} else {
toastService.showError("Failed to retry notes");
$(this).prop('disabled', false).text('Retry All Failed');
}
});
}
}

View File

@ -1123,6 +1123,8 @@
},
"ai_llm": {
"title": "AI/LLM Integration",
"enable_ai": "Enable AI/LLM features",
"enable_ai_desc": "Enable AI features like note summarization, content generation, and other LLM capabilities",
"enable_ai_features": "Enable AI/LLM features",
"enable_ai_description": "Enable AI features like note summarization, content generation, and other LLM capabilities",
"provider_configuration": "AI Provider Configuration",
@ -1157,6 +1159,8 @@
"embedding_default_provider_description": "Select the default provider used for generating note embeddings",
"enable_auto_update_embeddings": "Auto-update Embeddings",
"enable_auto_update_embeddings_description": "Automatically update embeddings when notes are modified",
"auto_update_embeddings": "Auto-update Embeddings",
"auto_update_embeddings_desc": "Automatically update embeddings when notes are modified",
"embedding_batch_size": "Batch Size",
"embedding_batch_size_description": "Number of notes to process in a single batch (1-50)",
"embedding_update_interval": "Update Interval (ms)",

View File

@ -203,6 +203,60 @@ async function getEmbeddingStats(req: Request, res: Response) {
};
}
/**
* Get list of failed embedding notes
*/
async function getFailedNotes(req: Request, res: Response) {
const limit = parseInt(req.query.limit as string || '100', 10);
const failedNotes = await vectorStore.getFailedEmbeddingNotes(limit);
// No need to fetch note titles here anymore as they're already included in the response
return {
success: true,
failedNotes: failedNotes
};
}
/**
* Retry a specific failed note embedding
*/
async function retryFailedNote(req: Request, res: Response) {
const { noteId } = req.params;
if (!noteId) {
return [400, {
success: false,
message: "Note ID is required"
}];
}
const success = await vectorStore.retryFailedEmbedding(noteId);
if (!success) {
return [404, {
success: false,
message: "Failed note not found or note is not marked as failed"
}];
}
return {
success: true,
message: "Note queued for retry"
};
}
/**
* Retry all failed note embeddings
*/
async function retryAllFailedNotes(req: Request, res: Response) {
const count = await vectorStore.retryAllFailedEmbeddings();
return {
success: true,
message: `${count} failed notes queued for retry`
};
}
export default {
findSimilarNotes,
searchByText,
@ -210,5 +264,8 @@ export default {
updateProvider,
reprocessAllNotes,
getQueueStatus,
getEmbeddingStats
getEmbeddingStats,
getFailedNotes,
retryFailedNote,
retryAllFailedNotes
};

View File

@ -380,6 +380,9 @@ function register(app: express.Application) {
apiRoute(PST, "/api/embeddings/reprocess", embeddingsRoute.reprocessAllNotes);
apiRoute(GET, "/api/embeddings/queue-status", embeddingsRoute.getQueueStatus);
apiRoute(GET, "/api/embeddings/stats", embeddingsRoute.getEmbeddingStats);
apiRoute(GET, "/api/embeddings/failed", embeddingsRoute.getFailedNotes);
apiRoute(PST, "/api/embeddings/retry/:noteId", embeddingsRoute.retryFailedNote);
apiRoute(PST, "/api/embeddings/retry-all-failed", embeddingsRoute.retryAllFailedNotes);
apiRoute(PST, "/api/llm/sessions", llmRoute.createSession);
apiRoute(GET, "/api/llm/sessions", llmRoute.listSessions);

View File

@ -566,6 +566,213 @@ export async function deleteNoteEmbeddings(noteId: string, providerId?: string,
}
}
/**
* Get notes that have failed embedding generation
*
* @param limit - Maximum number of failed notes to return
* @returns List of failed notes with their error information
*/
export async function getFailedEmbeddingNotes(limit: number = 100): Promise<any[]> {
// Get notes with failed embedding attempts
const failedQueueItems = await sql.getRows(`
SELECT noteId, operation, attempts, lastAttempt, error
FROM embedding_queue
WHERE attempts > 0
ORDER BY attempts DESC, lastAttempt DESC
LIMIT ?`,
[limit]
) as {noteId: string, operation: string, attempts: number, lastAttempt: string, error: string}[];
// Add titles to the failed notes
const failedNotesWithTitles = [];
for (const item of failedQueueItems) {
const note = becca.getNote(item.noteId);
if (note) {
failedNotesWithTitles.push({
...item,
title: note.title,
failureType: 'full' // This indicates a complete embedding failure
});
} else {
failedNotesWithTitles.push({
...item,
failureType: 'full'
});
}
}
// Now get notes with failed chunks
// We need to search for labels that contain failed chunks data
const notes = await sql.getRows(`
SELECT noteId, name, value
FROM attributes
WHERE type = 'label' AND name LIKE '%FailedChunks'
`) as {noteId: string, name: string, value: string}[];
// Process notes with failed chunks
for (const item of notes) {
try {
const noteId = item.noteId;
const note = becca.getNote(noteId);
if (!note) continue;
// Parse the failed chunks data
const failedChunks = JSON.parse(item.value) as Record<string, {attempts: number, lastAttempt: string, error: string}>;
const chunkCount = Object.keys(failedChunks).length;
if (chunkCount === 0) continue;
// Get the most recent failed chunk
let latestAttempt = '';
let totalAttempts = 0;
let errorExample = '';
for (const chunkId in failedChunks) {
const chunk = failedChunks[chunkId];
totalAttempts += chunk.attempts;
if (!latestAttempt || chunk.lastAttempt > latestAttempt) {
latestAttempt = chunk.lastAttempt;
errorExample = chunk.error;
}
}
// Add this to our list of failed notes
failedNotesWithTitles.push({
noteId,
title: note.title,
failureType: 'chunks',
chunks: chunkCount,
attempts: totalAttempts,
lastAttempt: latestAttempt,
error: `${chunkCount} chunks failed: ${errorExample}`
});
} catch (error) {
console.error("Error processing note with failed chunks:", error);
}
}
// Sort by latest attempt
failedNotesWithTitles.sort((a, b) => {
if (a.lastAttempt && b.lastAttempt) {
return b.lastAttempt.localeCompare(a.lastAttempt);
}
return 0;
});
// Limit to the specified number
return failedNotesWithTitles.slice(0, limit);
}
/**
* Retry embedding generation for a specific failed note
*
* @param noteId - ID of the note to retry
* @returns Success flag
*/
export async function retryFailedEmbedding(noteId: string): Promise<boolean> {
let success = false;
// First, check if the note is in the embedding queue with failed attempts
const exists = await sql.getValue(
"SELECT 1 FROM embedding_queue WHERE noteId = ? AND attempts > 0",
[noteId]
);
if (exists) {
// Reset the note in the queue
const now = dateUtils.localNowDateTime();
const utcNow = dateUtils.utcNowDateTime();
await sql.execute(`
UPDATE embedding_queue
SET attempts = 0, error = NULL, dateQueued = ?, utcDateQueued = ?
WHERE noteId = ?`,
[now, utcNow, noteId]
);
success = true;
}
// Next, check for failed chunks in labels
const note = becca.getNote(noteId);
if (note) {
// Look for any provider-specific failed chunks
const labels = note.getLabels();
const failedChunksLabels = labels.filter(label => label.name.endsWith('FailedChunks'));
for (const label of failedChunksLabels) {
// Remove the label - this will cause all chunks to be retried
await note.removeLabel(label.name);
success = true;
}
// If we had chunk failures but no queue entry, we need to add one
if (failedChunksLabels.length > 0 && !exists) {
await queueNoteForEmbedding(noteId, 'UPDATE');
}
}
return success;
}
/**
* Retry all failed embeddings
*
* @returns Number of notes queued for retry
*/
export async function retryAllFailedEmbeddings(): Promise<number> {
let totalRetried = 0;
// Get count of failed notes in queue
const failedCount = await sql.getValue(
"SELECT COUNT(*) FROM embedding_queue WHERE attempts > 0"
) as number;
if (failedCount > 0) {
// Reset all failed notes in the queue
const now = dateUtils.localNowDateTime();
const utcNow = dateUtils.utcNowDateTime();
await sql.execute(`
UPDATE embedding_queue
SET attempts = 0, error = NULL, dateQueued = ?, utcDateQueued = ?
WHERE attempts > 0`,
[now, utcNow]
);
totalRetried += failedCount;
}
// Now find notes with failed chunks
const notesWithFailedChunks = await sql.getRows(`
SELECT DISTINCT noteId
FROM attributes
WHERE type = 'label' AND name LIKE '%FailedChunks'
`) as {noteId: string}[];
// Process each note with failed chunks
for (const item of notesWithFailedChunks) {
const noteId = item.noteId;
const note = becca.getNote(noteId);
if (note) {
// Get all failed chunks labels
const labels = note.getLabels();
const failedChunksLabels = labels.filter(label => label.name.endsWith('FailedChunks'));
for (const label of failedChunksLabels) {
// Remove the label - this will cause all chunks to be retried
await note.removeLabel(label.name);
}
// Make sure the note is in the queue
await queueNoteForEmbedding(noteId, 'UPDATE');
totalRetried++;
}
}
return totalRetried;
}
/**
* Process the embedding queue
*/
@ -621,7 +828,10 @@ export async function processEmbeddingQueue() {
const context = await getNoteEmbeddingContext(noteData.noteId);
// Check if we should use chunking for large content
const useChunking = context.content.length > 5000; // Use chunking for large notes by default
const useChunking = context.content.length > 5000;
// Track if all providers failed
let allProvidersFailed = true;
// Process with each enabled provider
for (const provider of enabledProviders) {
@ -642,16 +852,35 @@ export async function processEmbeddingQueue() {
embedding
);
}
// At least one provider succeeded
allProvidersFailed = false;
} catch (providerError: any) {
log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
}
}
// Remove from queue on success
await sql.execute(
"DELETE FROM embedding_queue WHERE noteId = ?",
[noteData.noteId]
);
// Only remove from queue on success if at least one provider succeeded
if (!allProvidersFailed) {
await sql.execute(
"DELETE FROM embedding_queue WHERE noteId = ?",
[noteData.noteId]
);
} else {
// If all providers failed, mark as failed but keep in queue
await sql.execute(`
UPDATE embedding_queue
SET attempts = attempts + 1,
lastAttempt = ?,
error = ?
WHERE noteId = ?`,
[dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteData.noteId]
);
// Remove from queue if too many attempts
if (noteData.attempts + 1 >= 3) {
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
}
}
} catch (error: any) {
const noteData = note as unknown as QueueItem;
@ -667,13 +896,10 @@ export async function processEmbeddingQueue() {
log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
// Remove from queue if too many attempts
// Don't remove from queue even after multiple failures, just mark as failed
// This allows manual retries later
if (noteData.attempts + 1 >= 3) {
await sql.execute(
"DELETE FROM embedding_queue WHERE noteId = ?",
[noteData.noteId]
);
log.error(`Removed note ${noteData.noteId} from embedding queue after multiple failures`);
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
}
}
}
@ -857,40 +1083,151 @@ async function processNoteWithChunking(
// Delete existing embeddings first to avoid duplicates
await deleteNoteEmbeddings(noteId, provider.name, config.model);
// Track successful and failed chunks
let successfulChunks = 0;
let failedChunks = 0;
const totalChunks = chunks.length;
// Get existing chunk failure data from the database
// We'll store this in a special attribute on the note to track per-chunk failures
const failedChunksData = await getFailedChunksData(noteId, provider.name);
// Process each chunk with a slight delay to avoid rate limits
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const chunkId = `chunk_${i + 1}_of_${chunks.length}`;
// Create a modified context object with just this chunk's content
const chunkContext: NoteEmbeddingContext = {
...context,
content: chunk
};
// Skip chunks that have failed multiple times
if (failedChunksData[chunkId] && failedChunksData[chunkId].attempts >= 3) {
log.info(`Skipping chunk ${chunkId} for note ${noteId} after ${failedChunksData[chunkId].attempts} failed attempts`);
failedChunks++;
continue;
}
// Generate embedding for this chunk
const embedding = await provider.generateNoteEmbeddings(chunkContext);
try {
// Create a modified context object with just this chunk's content
const chunkContext: NoteEmbeddingContext = {
...context,
content: chunk
};
// Store with chunk information
await storeNoteEmbedding(
noteId,
provider.name,
config.model,
embedding
);
// Generate embedding for this chunk
const embedding = await provider.generateNoteEmbeddings(chunkContext);
// Small delay between chunks to avoid rate limits
if (i < chunks.length - 1) {
await new Promise(resolve => setTimeout(resolve, 100));
// Store with chunk information
await storeNoteEmbedding(
noteId,
provider.name,
config.model,
embedding
);
successfulChunks++;
// Remove this chunk from failed chunks if it was previously failed
if (failedChunksData[chunkId]) {
delete failedChunksData[chunkId];
await updateFailedChunksData(noteId, provider.name, failedChunksData);
}
// Small delay between chunks to avoid rate limits
if (i < chunks.length - 1) {
await new Promise(resolve => setTimeout(resolve, 100));
}
} catch (error: any) {
// Track the failure for this specific chunk
failedChunks++;
if (!failedChunksData[chunkId]) {
failedChunksData[chunkId] = {
attempts: 1,
lastAttempt: dateUtils.utcNowDateTime(),
error: error.message || 'Unknown error'
};
} else {
failedChunksData[chunkId].attempts++;
failedChunksData[chunkId].lastAttempt = dateUtils.utcNowDateTime();
failedChunksData[chunkId].error = error.message || 'Unknown error';
}
// Update the failed chunks data in the database
await updateFailedChunksData(noteId, provider.name, failedChunksData);
log.error(`Error processing chunk ${chunkId} for note ${noteId}: ${error.message || 'Unknown error'}`);
}
}
log.info(`Generated ${chunks.length} chunk embeddings for note ${noteId}`);
// Log information about the processed chunks
if (successfulChunks > 0) {
log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId}`);
}
if (failedChunks > 0) {
log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId}`);
}
// If all chunks failed, throw an error so the note will be marked as failed
if (successfulChunks === 0 && failedChunks > 0) {
throw new Error(`All ${failedChunks} chunks failed for note ${noteId}`);
}
} catch (error: any) {
log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`);
throw error;
}
}
/**
* Store failed chunk data for a note
* This is stored in a special attribute on the note so we can track per-chunk failures
*/
async function getFailedChunksData(noteId: string, providerId: string): Promise<Record<string, {attempts: number, lastAttempt: string, error: string}>> {
try {
const attributeName = `${providerId}FailedChunks`;
const note = becca.getNote(noteId);
if (!note) {
return {};
}
const attr = note.getLabels().find(attr => attr.name === attributeName);
if (!attr || !attr.value) {
return {};
}
return JSON.parse(attr.value);
} catch (e) {
return {};
}
}
/**
* Update failed chunk data for a note
*/
async function updateFailedChunksData(noteId: string, providerId: string, data: Record<string, {attempts: number, lastAttempt: string, error: string}>): Promise<void> {
try {
const attributeName = `${providerId}FailedChunks`;
const note = becca.getNote(noteId);
if (!note) {
return;
}
// Only store if there are failed chunks
if (Object.keys(data).length > 0) {
await note.setLabel(attributeName, JSON.stringify(data));
} else {
// If no failed chunks, remove the attribute if it exists
const attr = note.getLabels().find(attr => attr.name === attributeName);
if (attr) {
await note.removeLabel(attributeName);
}
}
} catch (e) {
log.error(`Error updating failed chunks data for note ${noteId}: ${e}`);
}
}
export function cleanupEmbeddings() {
// Cleanup function implementation
}
@ -910,5 +1247,8 @@ export default {
setupEmbeddingBackgroundProcessing,
initEmbeddings,
reprocessAllNotes,
getEmbeddingStats
getEmbeddingStats,
getFailedEmbeddingNotes,
retryFailedEmbedding,
retryAllFailedEmbeddings
};