mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-10 02:02:29 +08:00
I think this works to handle failed embeddings
This commit is contained in:
parent
71b3b04c53
commit
f47b070f0f
@ -30,6 +30,21 @@ interface EmbeddingStats {
|
||||
}
|
||||
}
|
||||
|
||||
// Interface for failed embedding notes
|
||||
interface FailedEmbeddingNotes {
|
||||
success: boolean;
|
||||
failedNotes: Array<{
|
||||
noteId: string;
|
||||
title?: string;
|
||||
operation: string;
|
||||
attempts: number;
|
||||
lastAttempt: string;
|
||||
error: string;
|
||||
failureType: string;
|
||||
chunks: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
export default class AiSettingsWidget extends OptionsWidget {
|
||||
private statsRefreshInterval: NodeJS.Timeout | null = null;
|
||||
private readonly STATS_REFRESH_INTERVAL = 5000; // 5 seconds
|
||||
@ -227,6 +242,16 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Failed embeddings section -->
|
||||
<div class="form-group mt-4">
|
||||
<label>${t("ai_llm.failed_notes")}</label>
|
||||
<div class="embedding-failed-notes-container">
|
||||
<div class="embedding-failed-notes-list">
|
||||
<div class="text-muted small">${t("ai_llm.no_failed_embeddings")}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>`);
|
||||
|
||||
@ -416,6 +441,54 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
return this.$widget;
|
||||
}
|
||||
|
||||
optionsLoaded(options: OptionMap) {
|
||||
if (!this.$widget) return;
|
||||
|
||||
this.setCheckboxState(this.$widget.find('.ai-enabled'), options.aiEnabled || 'false');
|
||||
this.setCheckboxState(this.$widget.find('.ollama-enabled'), options.ollamaEnabled || 'false');
|
||||
|
||||
this.$widget.find('.ai-provider-precedence').val(options.aiProviderPrecedence || 'openai,anthropic,ollama');
|
||||
this.$widget.find('.ai-temperature').val(options.aiTemperature || '0.7');
|
||||
this.$widget.find('.ai-system-prompt').val(options.aiSystemPrompt || '');
|
||||
|
||||
this.$widget.find('.openai-api-key').val(options.openaiApiKey || '');
|
||||
this.$widget.find('.openai-default-model').val(options.openaiDefaultModel || 'gpt-4o');
|
||||
this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1');
|
||||
|
||||
this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || '');
|
||||
this.$widget.find('.anthropic-default-model').val(options.anthropicDefaultModel || 'claude-3-opus-20240229');
|
||||
this.$widget.find('.anthropic-base-url').val(options.anthropicBaseUrl || 'https://api.anthropic.com/v1');
|
||||
|
||||
this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434');
|
||||
this.$widget.find('.ollama-default-model').val(options.ollamaDefaultModel || 'llama3');
|
||||
this.$widget.find('.ollama-embedding-model').val(options.ollamaEmbeddingModel || 'nomic-embed-text');
|
||||
|
||||
// Load embedding options
|
||||
this.$widget.find('.embedding-default-provider').val(options.embeddingsDefaultProvider || 'openai');
|
||||
this.setCheckboxState(this.$widget.find('.embedding-auto-update-enabled'), options.embeddingAutoUpdateEnabled || 'true');
|
||||
this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10');
|
||||
this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000');
|
||||
this.$widget.find('.embedding-default-dimension').val(options.embeddingDefaultDimension || '1536');
|
||||
|
||||
this.updateAiSectionVisibility();
|
||||
}
|
||||
|
||||
updateAiSectionVisibility() {
|
||||
if (!this.$widget) return;
|
||||
|
||||
const aiEnabled = this.$widget.find('.ai-enabled').prop('checked');
|
||||
this.$widget.find('.ai-providers-section').toggle(aiEnabled);
|
||||
this.$widget.find('.ai-provider').toggle(aiEnabled);
|
||||
this.$widget.find('.embedding-section').toggle(aiEnabled);
|
||||
|
||||
// Start or stop polling based on visibility
|
||||
if (aiEnabled && this.$widget.find('.embedding-section').is(':visible')) {
|
||||
this.startStatsPolling();
|
||||
} else {
|
||||
this.stopStatsPolling();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start automatic polling for embedding statistics
|
||||
*/
|
||||
@ -429,6 +502,9 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
if (this.$widget && this.$widget.is(':visible') &&
|
||||
this.$widget.find('.embedding-section').is(':visible')) {
|
||||
await this.refreshEmbeddingStats(true);
|
||||
|
||||
// Also update failed embeddings list periodically
|
||||
await this.updateFailedEmbeddingsList();
|
||||
}
|
||||
}, this.STATS_REFRESH_INTERVAL);
|
||||
}
|
||||
@ -443,6 +519,62 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up when the widget is removed
|
||||
cleanup() {
|
||||
this.stopStatsPolling();
|
||||
super.cleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding stats from the server
|
||||
*/
|
||||
async getEmbeddingStats(): Promise<EmbeddingStats | null> {
|
||||
try {
|
||||
return await server.get('embeddings/stats') as EmbeddingStats;
|
||||
} catch (error) {
|
||||
console.error('Error fetching embedding stats:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get failed embedding notes from the server
|
||||
*/
|
||||
async getFailedEmbeddingNotes(): Promise<FailedEmbeddingNotes | null> {
|
||||
try {
|
||||
return await server.get('embeddings/failed') as FailedEmbeddingNotes;
|
||||
} catch (error) {
|
||||
console.error('Error fetching failed embedding notes:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry a specific failed embedding
|
||||
*/
|
||||
async retryFailedEmbedding(noteId: string): Promise<boolean> {
|
||||
try {
|
||||
const result = await server.post(`embeddings/retry/${noteId}`) as {success: boolean};
|
||||
return result.success;
|
||||
} catch (error) {
|
||||
console.error('Error retrying failed embedding:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry all failed embeddings
|
||||
*/
|
||||
async retryAllFailedEmbeddings(): Promise<boolean> {
|
||||
try {
|
||||
const result = await server.post('embeddings/retry-all-failed') as {success: boolean};
|
||||
return result.success;
|
||||
} catch (error) {
|
||||
console.error('Error retrying all failed embeddings:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async refreshEmbeddingStats(silent = false) {
|
||||
if (!this.$widget) return;
|
||||
|
||||
@ -455,7 +587,7 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
$refreshButton.text(t("ai_llm.refreshing"));
|
||||
}
|
||||
|
||||
const response = await server.get<EmbeddingStats>('embeddings/stats');
|
||||
const response = await this.getEmbeddingStats();
|
||||
|
||||
if (response && response.success) {
|
||||
const stats = response.stats;
|
||||
@ -498,6 +630,11 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
$progressBar.removeClass('progress-bar-striped progress-bar-animated bg-info');
|
||||
$progressBar.addClass('bg-success');
|
||||
}
|
||||
|
||||
// Update failed embeddings list if there are failures
|
||||
if (stats.failedNotesCount > 0 && !silent) {
|
||||
await this.updateFailedEmbeddingsList();
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error fetching embedding stats:", error);
|
||||
@ -514,57 +651,85 @@ export default class AiSettingsWidget extends OptionsWidget {
|
||||
}
|
||||
}
|
||||
|
||||
updateAiSectionVisibility() {
|
||||
async updateFailedEmbeddingsList() {
|
||||
if (!this.$widget) return;
|
||||
|
||||
const aiEnabled = this.$widget.find('.ai-enabled').prop('checked');
|
||||
this.$widget.find('.ai-providers-section').toggle(aiEnabled);
|
||||
this.$widget.find('.ai-provider').toggle(aiEnabled);
|
||||
this.$widget.find('.embedding-section').toggle(aiEnabled);
|
||||
|
||||
// Start or stop polling based on visibility
|
||||
if (aiEnabled && this.$widget.find('.embedding-section').is(':visible')) {
|
||||
this.startStatsPolling();
|
||||
} else {
|
||||
this.stopStatsPolling();
|
||||
const failedResult = await this.getFailedEmbeddingNotes();
|
||||
if (!failedResult || !failedResult.failedNotes.length) {
|
||||
// Use consistent styling with the rest of the application
|
||||
this.$widget.find('.embedding-failed-notes-list').html(
|
||||
`<div class="text-muted small">No failed embeddings</div>`
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up when the widget is removed
|
||||
cleanup() {
|
||||
this.stopStatsPolling();
|
||||
super.cleanup();
|
||||
}
|
||||
const $failedHeader = $(`
|
||||
<div class="d-flex justify-content-between align-items-center mb-2">
|
||||
<h6>Failed Embeddings (${failedResult.failedNotes.length})</h6>
|
||||
<button class="btn btn-sm btn-outline-primary retry-all-btn">Retry All Failed</button>
|
||||
</div>
|
||||
`);
|
||||
|
||||
optionsLoaded(options: OptionMap) {
|
||||
if (!this.$widget) return;
|
||||
const $failedList = $('<div class="list-group failed-list mb-3">');
|
||||
|
||||
this.setCheckboxState(this.$widget.find('.ai-enabled'), options.aiEnabled || 'false');
|
||||
this.setCheckboxState(this.$widget.find('.ollama-enabled'), options.ollamaEnabled || 'false');
|
||||
for (const note of failedResult.failedNotes) {
|
||||
// Determine if this is a full note failure or just failed chunks
|
||||
const isFullFailure = note.failureType === 'full';
|
||||
const badgeClass = isFullFailure ? 'badge-danger' : 'badge-warning';
|
||||
const badgeText = isFullFailure ? 'Full Note' : `${note.chunks} Chunks`;
|
||||
|
||||
this.$widget.find('.ai-provider-precedence').val(options.aiProviderPrecedence || 'openai,anthropic,ollama');
|
||||
this.$widget.find('.ai-temperature').val(options.aiTemperature || '0.7');
|
||||
this.$widget.find('.ai-system-prompt').val(options.aiSystemPrompt || '');
|
||||
const $item = $(`
|
||||
<div class="list-group-item list-group-item-action flex-column align-items-start p-2">
|
||||
<div class="d-flex justify-content-between">
|
||||
<div>
|
||||
<h6 class="mb-1">${note.title || note.noteId}</h6>
|
||||
<span class="badge ${badgeClass} mb-1">${badgeText}</span>
|
||||
</div>
|
||||
<button class="btn btn-sm btn-outline-secondary retry-btn" data-note-id="${note.noteId}">Retry</button>
|
||||
</div>
|
||||
<div class="small text-muted">
|
||||
<div>Attempts: ${note.attempts}</div>
|
||||
<div>Last attempt: ${note.lastAttempt}</div>
|
||||
<div>Error: ${note.error}</div>
|
||||
</div>
|
||||
</div>
|
||||
`);
|
||||
|
||||
this.$widget.find('.openai-api-key').val(options.openaiApiKey || '');
|
||||
this.$widget.find('.openai-default-model').val(options.openaiDefaultModel || 'gpt-4o');
|
||||
this.$widget.find('.openai-base-url').val(options.openaiBaseUrl || 'https://api.openai.com/v1');
|
||||
$failedList.append($item);
|
||||
}
|
||||
|
||||
this.$widget.find('.anthropic-api-key').val(options.anthropicApiKey || '');
|
||||
this.$widget.find('.anthropic-default-model').val(options.anthropicDefaultModel || 'claude-3-opus-20240229');
|
||||
this.$widget.find('.anthropic-base-url').val(options.anthropicBaseUrl || 'https://api.anthropic.com/v1');
|
||||
this.$widget.find('.embedding-failed-notes-list').empty().append($failedHeader, $failedList);
|
||||
|
||||
this.$widget.find('.ollama-base-url').val(options.ollamaBaseUrl || 'http://localhost:11434');
|
||||
this.$widget.find('.ollama-default-model').val(options.ollamaDefaultModel || 'llama3');
|
||||
this.$widget.find('.ollama-embedding-model').val(options.ollamaEmbeddingModel || 'nomic-embed-text');
|
||||
// Add event handlers using local variables to avoid 'this' issues
|
||||
const self = this;
|
||||
|
||||
// Load embedding options
|
||||
this.$widget.find('.embedding-default-provider').val(options.embeddingsDefaultProvider || 'openai');
|
||||
this.setCheckboxState(this.$widget.find('.embedding-auto-update-enabled'), options.embeddingAutoUpdateEnabled || 'true');
|
||||
this.$widget.find('.embedding-batch-size').val(options.embeddingBatchSize || '10');
|
||||
this.$widget.find('.embedding-update-interval').val(options.embeddingUpdateInterval || '5000');
|
||||
this.$widget.find('.embedding-default-dimension').val(options.embeddingDefaultDimension || '1536');
|
||||
this.$widget.find('.retry-btn').on('click', async function() {
|
||||
const noteId = $(this).data('note-id');
|
||||
$(this).prop('disabled', true).text('Retrying...');
|
||||
|
||||
this.updateAiSectionVisibility();
|
||||
const success = await self.retryFailedEmbedding(noteId);
|
||||
|
||||
if (success) {
|
||||
toastService.showMessage("Note queued for retry");
|
||||
await self.refreshEmbeddingStats();
|
||||
} else {
|
||||
toastService.showError("Failed to retry note");
|
||||
$(this).prop('disabled', false).text('Retry');
|
||||
}
|
||||
});
|
||||
|
||||
this.$widget.find('.retry-all-btn').on('click', async function() {
|
||||
$(this).prop('disabled', true).text('Retrying All...');
|
||||
|
||||
const success = await self.retryAllFailedEmbeddings();
|
||||
|
||||
if (success) {
|
||||
toastService.showMessage("All failed notes queued for retry");
|
||||
await self.refreshEmbeddingStats();
|
||||
} else {
|
||||
toastService.showError("Failed to retry notes");
|
||||
$(this).prop('disabled', false).text('Retry All Failed');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -1123,6 +1123,8 @@
|
||||
},
|
||||
"ai_llm": {
|
||||
"title": "AI/LLM Integration",
|
||||
"enable_ai": "Enable AI/LLM features",
|
||||
"enable_ai_desc": "Enable AI features like note summarization, content generation, and other LLM capabilities",
|
||||
"enable_ai_features": "Enable AI/LLM features",
|
||||
"enable_ai_description": "Enable AI features like note summarization, content generation, and other LLM capabilities",
|
||||
"provider_configuration": "AI Provider Configuration",
|
||||
@ -1157,6 +1159,8 @@
|
||||
"embedding_default_provider_description": "Select the default provider used for generating note embeddings",
|
||||
"enable_auto_update_embeddings": "Auto-update Embeddings",
|
||||
"enable_auto_update_embeddings_description": "Automatically update embeddings when notes are modified",
|
||||
"auto_update_embeddings": "Auto-update Embeddings",
|
||||
"auto_update_embeddings_desc": "Automatically update embeddings when notes are modified",
|
||||
"embedding_batch_size": "Batch Size",
|
||||
"embedding_batch_size_description": "Number of notes to process in a single batch (1-50)",
|
||||
"embedding_update_interval": "Update Interval (ms)",
|
||||
|
@ -203,6 +203,60 @@ async function getEmbeddingStats(req: Request, res: Response) {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of failed embedding notes
|
||||
*/
|
||||
async function getFailedNotes(req: Request, res: Response) {
|
||||
const limit = parseInt(req.query.limit as string || '100', 10);
|
||||
const failedNotes = await vectorStore.getFailedEmbeddingNotes(limit);
|
||||
|
||||
// No need to fetch note titles here anymore as they're already included in the response
|
||||
return {
|
||||
success: true,
|
||||
failedNotes: failedNotes
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry a specific failed note embedding
|
||||
*/
|
||||
async function retryFailedNote(req: Request, res: Response) {
|
||||
const { noteId } = req.params;
|
||||
|
||||
if (!noteId) {
|
||||
return [400, {
|
||||
success: false,
|
||||
message: "Note ID is required"
|
||||
}];
|
||||
}
|
||||
|
||||
const success = await vectorStore.retryFailedEmbedding(noteId);
|
||||
|
||||
if (!success) {
|
||||
return [404, {
|
||||
success: false,
|
||||
message: "Failed note not found or note is not marked as failed"
|
||||
}];
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: "Note queued for retry"
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry all failed note embeddings
|
||||
*/
|
||||
async function retryAllFailedNotes(req: Request, res: Response) {
|
||||
const count = await vectorStore.retryAllFailedEmbeddings();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `${count} failed notes queued for retry`
|
||||
};
|
||||
}
|
||||
|
||||
export default {
|
||||
findSimilarNotes,
|
||||
searchByText,
|
||||
@ -210,5 +264,8 @@ export default {
|
||||
updateProvider,
|
||||
reprocessAllNotes,
|
||||
getQueueStatus,
|
||||
getEmbeddingStats
|
||||
getEmbeddingStats,
|
||||
getFailedNotes,
|
||||
retryFailedNote,
|
||||
retryAllFailedNotes
|
||||
};
|
||||
|
@ -380,6 +380,9 @@ function register(app: express.Application) {
|
||||
apiRoute(PST, "/api/embeddings/reprocess", embeddingsRoute.reprocessAllNotes);
|
||||
apiRoute(GET, "/api/embeddings/queue-status", embeddingsRoute.getQueueStatus);
|
||||
apiRoute(GET, "/api/embeddings/stats", embeddingsRoute.getEmbeddingStats);
|
||||
apiRoute(GET, "/api/embeddings/failed", embeddingsRoute.getFailedNotes);
|
||||
apiRoute(PST, "/api/embeddings/retry/:noteId", embeddingsRoute.retryFailedNote);
|
||||
apiRoute(PST, "/api/embeddings/retry-all-failed", embeddingsRoute.retryAllFailedNotes);
|
||||
|
||||
apiRoute(PST, "/api/llm/sessions", llmRoute.createSession);
|
||||
apiRoute(GET, "/api/llm/sessions", llmRoute.listSessions);
|
||||
|
@ -566,6 +566,213 @@ export async function deleteNoteEmbeddings(noteId: string, providerId?: string,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get notes that have failed embedding generation
|
||||
*
|
||||
* @param limit - Maximum number of failed notes to return
|
||||
* @returns List of failed notes with their error information
|
||||
*/
|
||||
export async function getFailedEmbeddingNotes(limit: number = 100): Promise<any[]> {
|
||||
// Get notes with failed embedding attempts
|
||||
const failedQueueItems = await sql.getRows(`
|
||||
SELECT noteId, operation, attempts, lastAttempt, error
|
||||
FROM embedding_queue
|
||||
WHERE attempts > 0
|
||||
ORDER BY attempts DESC, lastAttempt DESC
|
||||
LIMIT ?`,
|
||||
[limit]
|
||||
) as {noteId: string, operation: string, attempts: number, lastAttempt: string, error: string}[];
|
||||
|
||||
// Add titles to the failed notes
|
||||
const failedNotesWithTitles = [];
|
||||
for (const item of failedQueueItems) {
|
||||
const note = becca.getNote(item.noteId);
|
||||
if (note) {
|
||||
failedNotesWithTitles.push({
|
||||
...item,
|
||||
title: note.title,
|
||||
failureType: 'full' // This indicates a complete embedding failure
|
||||
});
|
||||
} else {
|
||||
failedNotesWithTitles.push({
|
||||
...item,
|
||||
failureType: 'full'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Now get notes with failed chunks
|
||||
// We need to search for labels that contain failed chunks data
|
||||
const notes = await sql.getRows(`
|
||||
SELECT noteId, name, value
|
||||
FROM attributes
|
||||
WHERE type = 'label' AND name LIKE '%FailedChunks'
|
||||
`) as {noteId: string, name: string, value: string}[];
|
||||
|
||||
// Process notes with failed chunks
|
||||
for (const item of notes) {
|
||||
try {
|
||||
const noteId = item.noteId;
|
||||
const note = becca.getNote(noteId);
|
||||
if (!note) continue;
|
||||
|
||||
// Parse the failed chunks data
|
||||
const failedChunks = JSON.parse(item.value) as Record<string, {attempts: number, lastAttempt: string, error: string}>;
|
||||
const chunkCount = Object.keys(failedChunks).length;
|
||||
if (chunkCount === 0) continue;
|
||||
|
||||
// Get the most recent failed chunk
|
||||
let latestAttempt = '';
|
||||
let totalAttempts = 0;
|
||||
let errorExample = '';
|
||||
|
||||
for (const chunkId in failedChunks) {
|
||||
const chunk = failedChunks[chunkId];
|
||||
totalAttempts += chunk.attempts;
|
||||
|
||||
if (!latestAttempt || chunk.lastAttempt > latestAttempt) {
|
||||
latestAttempt = chunk.lastAttempt;
|
||||
errorExample = chunk.error;
|
||||
}
|
||||
}
|
||||
|
||||
// Add this to our list of failed notes
|
||||
failedNotesWithTitles.push({
|
||||
noteId,
|
||||
title: note.title,
|
||||
failureType: 'chunks',
|
||||
chunks: chunkCount,
|
||||
attempts: totalAttempts,
|
||||
lastAttempt: latestAttempt,
|
||||
error: `${chunkCount} chunks failed: ${errorExample}`
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error processing note with failed chunks:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by latest attempt
|
||||
failedNotesWithTitles.sort((a, b) => {
|
||||
if (a.lastAttempt && b.lastAttempt) {
|
||||
return b.lastAttempt.localeCompare(a.lastAttempt);
|
||||
}
|
||||
return 0;
|
||||
});
|
||||
|
||||
// Limit to the specified number
|
||||
return failedNotesWithTitles.slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry embedding generation for a specific failed note
|
||||
*
|
||||
* @param noteId - ID of the note to retry
|
||||
* @returns Success flag
|
||||
*/
|
||||
export async function retryFailedEmbedding(noteId: string): Promise<boolean> {
|
||||
let success = false;
|
||||
|
||||
// First, check if the note is in the embedding queue with failed attempts
|
||||
const exists = await sql.getValue(
|
||||
"SELECT 1 FROM embedding_queue WHERE noteId = ? AND attempts > 0",
|
||||
[noteId]
|
||||
);
|
||||
|
||||
if (exists) {
|
||||
// Reset the note in the queue
|
||||
const now = dateUtils.localNowDateTime();
|
||||
const utcNow = dateUtils.utcNowDateTime();
|
||||
|
||||
await sql.execute(`
|
||||
UPDATE embedding_queue
|
||||
SET attempts = 0, error = NULL, dateQueued = ?, utcDateQueued = ?
|
||||
WHERE noteId = ?`,
|
||||
[now, utcNow, noteId]
|
||||
);
|
||||
success = true;
|
||||
}
|
||||
|
||||
// Next, check for failed chunks in labels
|
||||
const note = becca.getNote(noteId);
|
||||
if (note) {
|
||||
// Look for any provider-specific failed chunks
|
||||
const labels = note.getLabels();
|
||||
const failedChunksLabels = labels.filter(label => label.name.endsWith('FailedChunks'));
|
||||
|
||||
for (const label of failedChunksLabels) {
|
||||
// Remove the label - this will cause all chunks to be retried
|
||||
await note.removeLabel(label.name);
|
||||
success = true;
|
||||
}
|
||||
|
||||
// If we had chunk failures but no queue entry, we need to add one
|
||||
if (failedChunksLabels.length > 0 && !exists) {
|
||||
await queueNoteForEmbedding(noteId, 'UPDATE');
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry all failed embeddings
|
||||
*
|
||||
* @returns Number of notes queued for retry
|
||||
*/
|
||||
export async function retryAllFailedEmbeddings(): Promise<number> {
|
||||
let totalRetried = 0;
|
||||
|
||||
// Get count of failed notes in queue
|
||||
const failedCount = await sql.getValue(
|
||||
"SELECT COUNT(*) FROM embedding_queue WHERE attempts > 0"
|
||||
) as number;
|
||||
|
||||
if (failedCount > 0) {
|
||||
// Reset all failed notes in the queue
|
||||
const now = dateUtils.localNowDateTime();
|
||||
const utcNow = dateUtils.utcNowDateTime();
|
||||
|
||||
await sql.execute(`
|
||||
UPDATE embedding_queue
|
||||
SET attempts = 0, error = NULL, dateQueued = ?, utcDateQueued = ?
|
||||
WHERE attempts > 0`,
|
||||
[now, utcNow]
|
||||
);
|
||||
|
||||
totalRetried += failedCount;
|
||||
}
|
||||
|
||||
// Now find notes with failed chunks
|
||||
const notesWithFailedChunks = await sql.getRows(`
|
||||
SELECT DISTINCT noteId
|
||||
FROM attributes
|
||||
WHERE type = 'label' AND name LIKE '%FailedChunks'
|
||||
`) as {noteId: string}[];
|
||||
|
||||
// Process each note with failed chunks
|
||||
for (const item of notesWithFailedChunks) {
|
||||
const noteId = item.noteId;
|
||||
const note = becca.getNote(noteId);
|
||||
|
||||
if (note) {
|
||||
// Get all failed chunks labels
|
||||
const labels = note.getLabels();
|
||||
const failedChunksLabels = labels.filter(label => label.name.endsWith('FailedChunks'));
|
||||
|
||||
for (const label of failedChunksLabels) {
|
||||
// Remove the label - this will cause all chunks to be retried
|
||||
await note.removeLabel(label.name);
|
||||
}
|
||||
|
||||
// Make sure the note is in the queue
|
||||
await queueNoteForEmbedding(noteId, 'UPDATE');
|
||||
totalRetried++;
|
||||
}
|
||||
}
|
||||
|
||||
return totalRetried;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the embedding queue
|
||||
*/
|
||||
@ -621,7 +828,10 @@ export async function processEmbeddingQueue() {
|
||||
const context = await getNoteEmbeddingContext(noteData.noteId);
|
||||
|
||||
// Check if we should use chunking for large content
|
||||
const useChunking = context.content.length > 5000; // Use chunking for large notes by default
|
||||
const useChunking = context.content.length > 5000;
|
||||
|
||||
// Track if all providers failed
|
||||
let allProvidersFailed = true;
|
||||
|
||||
// Process with each enabled provider
|
||||
for (const provider of enabledProviders) {
|
||||
@ -642,16 +852,35 @@ export async function processEmbeddingQueue() {
|
||||
embedding
|
||||
);
|
||||
}
|
||||
// At least one provider succeeded
|
||||
allProvidersFailed = false;
|
||||
} catch (providerError: any) {
|
||||
log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from queue on success
|
||||
await sql.execute(
|
||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||
[noteData.noteId]
|
||||
);
|
||||
// Only remove from queue on success if at least one provider succeeded
|
||||
if (!allProvidersFailed) {
|
||||
await sql.execute(
|
||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||
[noteData.noteId]
|
||||
);
|
||||
} else {
|
||||
// If all providers failed, mark as failed but keep in queue
|
||||
await sql.execute(`
|
||||
UPDATE embedding_queue
|
||||
SET attempts = attempts + 1,
|
||||
lastAttempt = ?,
|
||||
error = ?
|
||||
WHERE noteId = ?`,
|
||||
[dateUtils.utcNowDateTime(), "All providers failed to generate embeddings", noteData.noteId]
|
||||
);
|
||||
|
||||
// Remove from queue if too many attempts
|
||||
if (noteData.attempts + 1 >= 3) {
|
||||
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
const noteData = note as unknown as QueueItem;
|
||||
|
||||
@ -667,13 +896,10 @@ export async function processEmbeddingQueue() {
|
||||
|
||||
log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
|
||||
|
||||
// Remove from queue if too many attempts
|
||||
// Don't remove from queue even after multiple failures, just mark as failed
|
||||
// This allows manual retries later
|
||||
if (noteData.attempts + 1 >= 3) {
|
||||
await sql.execute(
|
||||
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||
[noteData.noteId]
|
||||
);
|
||||
log.error(`Removed note ${noteData.noteId} from embedding queue after multiple failures`);
|
||||
log.error(`Marked note ${noteData.noteId} as permanently failed after multiple embedding attempts`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -857,40 +1083,151 @@ async function processNoteWithChunking(
|
||||
// Delete existing embeddings first to avoid duplicates
|
||||
await deleteNoteEmbeddings(noteId, provider.name, config.model);
|
||||
|
||||
// Track successful and failed chunks
|
||||
let successfulChunks = 0;
|
||||
let failedChunks = 0;
|
||||
const totalChunks = chunks.length;
|
||||
|
||||
// Get existing chunk failure data from the database
|
||||
// We'll store this in a special attribute on the note to track per-chunk failures
|
||||
const failedChunksData = await getFailedChunksData(noteId, provider.name);
|
||||
|
||||
// Process each chunk with a slight delay to avoid rate limits
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunk = chunks[i];
|
||||
const chunkId = `chunk_${i + 1}_of_${chunks.length}`;
|
||||
|
||||
// Create a modified context object with just this chunk's content
|
||||
const chunkContext: NoteEmbeddingContext = {
|
||||
...context,
|
||||
content: chunk
|
||||
};
|
||||
// Skip chunks that have failed multiple times
|
||||
if (failedChunksData[chunkId] && failedChunksData[chunkId].attempts >= 3) {
|
||||
log.info(`Skipping chunk ${chunkId} for note ${noteId} after ${failedChunksData[chunkId].attempts} failed attempts`);
|
||||
failedChunks++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate embedding for this chunk
|
||||
const embedding = await provider.generateNoteEmbeddings(chunkContext);
|
||||
try {
|
||||
// Create a modified context object with just this chunk's content
|
||||
const chunkContext: NoteEmbeddingContext = {
|
||||
...context,
|
||||
content: chunk
|
||||
};
|
||||
|
||||
// Store with chunk information
|
||||
await storeNoteEmbedding(
|
||||
noteId,
|
||||
provider.name,
|
||||
config.model,
|
||||
embedding
|
||||
);
|
||||
// Generate embedding for this chunk
|
||||
const embedding = await provider.generateNoteEmbeddings(chunkContext);
|
||||
|
||||
// Small delay between chunks to avoid rate limits
|
||||
if (i < chunks.length - 1) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
// Store with chunk information
|
||||
await storeNoteEmbedding(
|
||||
noteId,
|
||||
provider.name,
|
||||
config.model,
|
||||
embedding
|
||||
);
|
||||
|
||||
successfulChunks++;
|
||||
|
||||
// Remove this chunk from failed chunks if it was previously failed
|
||||
if (failedChunksData[chunkId]) {
|
||||
delete failedChunksData[chunkId];
|
||||
await updateFailedChunksData(noteId, provider.name, failedChunksData);
|
||||
}
|
||||
|
||||
// Small delay between chunks to avoid rate limits
|
||||
if (i < chunks.length - 1) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
} catch (error: any) {
|
||||
// Track the failure for this specific chunk
|
||||
failedChunks++;
|
||||
|
||||
if (!failedChunksData[chunkId]) {
|
||||
failedChunksData[chunkId] = {
|
||||
attempts: 1,
|
||||
lastAttempt: dateUtils.utcNowDateTime(),
|
||||
error: error.message || 'Unknown error'
|
||||
};
|
||||
} else {
|
||||
failedChunksData[chunkId].attempts++;
|
||||
failedChunksData[chunkId].lastAttempt = dateUtils.utcNowDateTime();
|
||||
failedChunksData[chunkId].error = error.message || 'Unknown error';
|
||||
}
|
||||
|
||||
// Update the failed chunks data in the database
|
||||
await updateFailedChunksData(noteId, provider.name, failedChunksData);
|
||||
|
||||
log.error(`Error processing chunk ${chunkId} for note ${noteId}: ${error.message || 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
log.info(`Generated ${chunks.length} chunk embeddings for note ${noteId}`);
|
||||
// Log information about the processed chunks
|
||||
if (successfulChunks > 0) {
|
||||
log.info(`Generated ${successfulChunks} chunk embeddings for note ${noteId}`);
|
||||
}
|
||||
|
||||
if (failedChunks > 0) {
|
||||
log.info(`Failed to generate ${failedChunks} chunk embeddings for note ${noteId}`);
|
||||
}
|
||||
|
||||
// If all chunks failed, throw an error so the note will be marked as failed
|
||||
if (successfulChunks === 0 && failedChunks > 0) {
|
||||
throw new Error(`All ${failedChunks} chunks failed for note ${noteId}`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
log.error(`Error in chunked embedding process for note ${noteId}: ${error.message || 'Unknown error'}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store failed chunk data for a note
|
||||
* This is stored in a special attribute on the note so we can track per-chunk failures
|
||||
*/
|
||||
async function getFailedChunksData(noteId: string, providerId: string): Promise<Record<string, {attempts: number, lastAttempt: string, error: string}>> {
|
||||
try {
|
||||
const attributeName = `${providerId}FailedChunks`;
|
||||
const note = becca.getNote(noteId);
|
||||
|
||||
if (!note) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const attr = note.getLabels().find(attr => attr.name === attributeName);
|
||||
|
||||
if (!attr || !attr.value) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return JSON.parse(attr.value);
|
||||
} catch (e) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update failed chunk data for a note
|
||||
*/
|
||||
async function updateFailedChunksData(noteId: string, providerId: string, data: Record<string, {attempts: number, lastAttempt: string, error: string}>): Promise<void> {
|
||||
try {
|
||||
const attributeName = `${providerId}FailedChunks`;
|
||||
const note = becca.getNote(noteId);
|
||||
|
||||
if (!note) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only store if there are failed chunks
|
||||
if (Object.keys(data).length > 0) {
|
||||
await note.setLabel(attributeName, JSON.stringify(data));
|
||||
} else {
|
||||
// If no failed chunks, remove the attribute if it exists
|
||||
const attr = note.getLabels().find(attr => attr.name === attributeName);
|
||||
if (attr) {
|
||||
await note.removeLabel(attributeName);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
log.error(`Error updating failed chunks data for note ${noteId}: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function cleanupEmbeddings() {
|
||||
// Cleanup function implementation
|
||||
}
|
||||
@ -910,5 +1247,8 @@ export default {
|
||||
setupEmbeddingBackgroundProcessing,
|
||||
initEmbeddings,
|
||||
reprocessAllNotes,
|
||||
getEmbeddingStats
|
||||
getEmbeddingStats,
|
||||
getFailedEmbeddingNotes,
|
||||
retryFailedEmbedding,
|
||||
retryAllFailedEmbeddings
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user