mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-09-26 06:51:31 +08:00
fix embeddings w/ cls.init()
This commit is contained in:
parent
0081e6f1d0
commit
781a2506f0
@ -148,11 +148,17 @@ async function updateProvider(req: Request, res: Response) {
|
|||||||
* Manually trigger a reprocessing of all notes
|
* Manually trigger a reprocessing of all notes
|
||||||
*/
|
*/
|
||||||
async function reprocessAllNotes(req: Request, res: Response) {
|
async function reprocessAllNotes(req: Request, res: Response) {
|
||||||
|
// Import cls
|
||||||
|
const cls = (await import("../../services/cls.js")).default;
|
||||||
|
|
||||||
// Start the reprocessing operation in the background
|
// Start the reprocessing operation in the background
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
try {
|
try {
|
||||||
await vectorStore.reprocessAllNotes();
|
// Wrap the operation in cls.init to ensure proper context
|
||||||
log.info("Embedding reprocessing completed successfully");
|
cls.init(async () => {
|
||||||
|
await vectorStore.reprocessAllNotes();
|
||||||
|
log.info("Embedding reprocessing completed successfully");
|
||||||
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
log.error(`Error during background embedding reprocessing: ${error.message || "Unknown error"}`);
|
log.error(`Error during background embedding reprocessing: ${error.message || "Unknown error"}`);
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ import log from "../../../log.js";
|
|||||||
import dateUtils from "../../../date_utils.js";
|
import dateUtils from "../../../date_utils.js";
|
||||||
import sql from "../../../sql.js";
|
import sql from "../../../sql.js";
|
||||||
import becca from "../../../../becca/becca.js";
|
import becca from "../../../../becca/becca.js";
|
||||||
|
import cls from "../../../../services/cls.js";
|
||||||
import type { NoteEmbeddingContext } from "../types.js";
|
import type { NoteEmbeddingContext } from "../types.js";
|
||||||
// Remove static imports that cause circular dependencies
|
// Remove static imports that cause circular dependencies
|
||||||
// import { storeNoteEmbedding, deleteNoteEmbeddings } from "./storage.js";
|
// import { storeNoteEmbedding, deleteNoteEmbeddings } from "./storage.js";
|
||||||
@ -126,12 +127,14 @@ export async function processNoteWithChunking(
|
|||||||
|
|
||||||
if (!chunks || chunks.length === 0) {
|
if (!chunks || chunks.length === 0) {
|
||||||
// Fall back to single embedding if chunking fails
|
// Fall back to single embedding if chunking fails
|
||||||
const embedding = await provider.generateEmbeddings(context.content);
|
await cls.init(async () => {
|
||||||
const config = provider.getConfig();
|
const embedding = await provider.generateEmbeddings(context.content);
|
||||||
|
const config = provider.getConfig();
|
||||||
|
|
||||||
// Use dynamic import instead of static import
|
// Use dynamic import instead of static import
|
||||||
const storage = await import('../storage.js');
|
const storage = await import('../storage.js');
|
||||||
await storage.storeNoteEmbedding(noteId, provider.name, config.model, embedding);
|
await storage.storeNoteEmbedding(noteId, provider.name, config.model, embedding);
|
||||||
|
});
|
||||||
|
|
||||||
log.info(`Generated single embedding for note ${noteId} (${note.title}) since chunking failed`);
|
log.info(`Generated single embedding for note ${noteId} (${note.title}) since chunking failed`);
|
||||||
return;
|
return;
|
||||||
@ -187,20 +190,22 @@ export async function processNoteWithChunking(
|
|||||||
const chunk = chunks[i];
|
const chunk = chunks[i];
|
||||||
try {
|
try {
|
||||||
// Generate embedding for this chunk's content with a timeout
|
// Generate embedding for this chunk's content with a timeout
|
||||||
const embedding = await processChunkWithTimeout(
|
await cls.init(async () => {
|
||||||
provider,
|
const embedding = await processChunkWithTimeout(
|
||||||
chunk,
|
provider,
|
||||||
MAX_CHUNK_PROCESSING_TIME
|
chunk,
|
||||||
);
|
MAX_CHUNK_PROCESSING_TIME
|
||||||
|
);
|
||||||
|
|
||||||
// Store with chunk information in a unique ID format
|
// Store with chunk information in a unique ID format
|
||||||
const chunkIdSuffix = `${i + 1}_of_${chunks.length}`;
|
const chunkIdSuffix = `${i + 1}_of_${chunks.length}`;
|
||||||
await storage.storeNoteEmbedding(
|
await storage.storeNoteEmbedding(
|
||||||
noteId,
|
noteId,
|
||||||
provider.name,
|
provider.name,
|
||||||
config.model,
|
config.model,
|
||||||
embedding
|
embedding
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
successfulChunks++;
|
successfulChunks++;
|
||||||
|
|
||||||
@ -264,21 +269,24 @@ export async function processNoteWithChunking(
|
|||||||
// Wait longer for retries with exponential backoff
|
// Wait longer for retries with exponential backoff
|
||||||
await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(1.5, j)));
|
await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(1.5, j)));
|
||||||
|
|
||||||
// Retry the embedding with timeout
|
// Retry the embedding with timeout using cls.init
|
||||||
const embedding = await processChunkWithTimeout(
|
await cls.init(async () => {
|
||||||
provider,
|
const embedding = await processChunkWithTimeout(
|
||||||
item.chunk,
|
provider,
|
||||||
MAX_CHUNK_PROCESSING_TIME
|
item.chunk,
|
||||||
);
|
MAX_CHUNK_PROCESSING_TIME
|
||||||
|
);
|
||||||
|
|
||||||
// Store with unique ID that indicates it was a retry
|
// Store with unique ID that indicates it was a retry
|
||||||
const chunkIdSuffix = `${item.index + 1}_of_${chunks.length}`;
|
const chunkIdSuffix = `${item.index + 1}_of_${chunks.length}`;
|
||||||
await storage.storeNoteEmbedding(
|
const storage = await import('../storage.js');
|
||||||
noteId,
|
await storage.storeNoteEmbedding(
|
||||||
provider.name,
|
noteId,
|
||||||
config.model,
|
provider.name,
|
||||||
embedding
|
config.model,
|
||||||
);
|
embedding
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
// Update counters
|
// Update counters
|
||||||
successfulChunks++;
|
successfulChunks++;
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
import eventService from "../../../services/events.js";
|
import sql from "../../../services/sql.js";
|
||||||
import options from "../../../services/options.js";
|
|
||||||
import log from "../../../services/log.js";
|
import log from "../../../services/log.js";
|
||||||
import { queueNoteForEmbedding, processEmbeddingQueue } from "./queue.js";
|
import options from "../../../services/options.js";
|
||||||
|
import cls from "../../../services/cls.js";
|
||||||
|
import { processEmbeddingQueue, queueNoteForEmbedding } from "./queue.js";
|
||||||
|
import eventService from "../../../services/events.js";
|
||||||
|
import becca from "../../../becca/becca.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setup event listeners for embedding-related events
|
* Setup event listeners for embedding-related events
|
||||||
@ -51,7 +54,10 @@ export async function setupEmbeddingBackgroundProcessing() {
|
|||||||
|
|
||||||
setInterval(async () => {
|
setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
await processEmbeddingQueue();
|
// Wrap in cls.init to ensure proper context
|
||||||
|
cls.init(async () => {
|
||||||
|
await processEmbeddingQueue();
|
||||||
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
|
log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import sql from "../../../services/sql.js";
|
import sql from "../../../services/sql.js";
|
||||||
import log from "../../../services/log.js";
|
import log from "../../../services/log.js";
|
||||||
|
import cls from "../../../services/cls.js";
|
||||||
import { queueNoteForEmbedding } from "./queue.js";
|
import { queueNoteForEmbedding } from "./queue.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -8,14 +9,19 @@ import { queueNoteForEmbedding } from "./queue.js";
|
|||||||
export async function reprocessAllNotes() {
|
export async function reprocessAllNotes() {
|
||||||
log.info("Queueing all notes for embedding updates");
|
log.info("Queueing all notes for embedding updates");
|
||||||
|
|
||||||
|
// Get all non-deleted note IDs
|
||||||
const noteIds = await sql.getColumn(
|
const noteIds = await sql.getColumn(
|
||||||
"SELECT noteId FROM notes WHERE isDeleted = 0"
|
"SELECT noteId FROM notes WHERE isDeleted = 0"
|
||||||
);
|
);
|
||||||
|
|
||||||
log.info(`Adding ${noteIds.length} notes to embedding queue`);
|
log.info(`Adding ${noteIds.length} notes to embedding queue`);
|
||||||
|
|
||||||
|
// Process each note ID within a cls context
|
||||||
for (const noteId of noteIds) {
|
for (const noteId of noteIds) {
|
||||||
await queueNoteForEmbedding(noteId as string, 'UPDATE');
|
// Use cls.init to ensure proper context for each operation
|
||||||
|
await cls.init(async () => {
|
||||||
|
await queueNoteForEmbedding(noteId as string, 'UPDATE');
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user