Notes/src/routes/api/embeddings.ts

202 lines
5.4 KiB
TypeScript
Raw Normal View History

2025-03-08 22:04:10 +00:00
import options from "../../services/options.js";
import vectorStore from "../../services/llm/embeddings/vector_store.js";
import providerManager from "../../services/llm/embeddings/providers.js";
import becca from "../../becca/becca.js";
import type { Request, Response } from "express";
2025-03-08 23:08:25 +00:00
import log from "../../services/log.js";
import sql from "../../services/sql.js";
2025-03-08 22:04:10 +00:00
/**
* Get similar notes based on note ID
*/
async function findSimilarNotes(req: Request, res: Response) {
const noteId = req.params.noteId;
const providerId = req.query.providerId as string || 'openai';
const modelId = req.query.modelId as string || 'text-embedding-3-small';
const limit = parseInt(req.query.limit as string || '10', 10);
const threshold = parseFloat(req.query.threshold as string || '0.7');
if (!noteId) {
2025-03-08 23:13:49 +00:00
return [400, {
2025-03-08 22:04:10 +00:00
success: false,
message: "Note ID is required"
2025-03-08 23:13:49 +00:00
}];
2025-03-08 22:04:10 +00:00
}
2025-03-08 23:13:49 +00:00
const embedding = await vectorStore.getEmbeddingForNote(noteId, providerId, modelId);
if (!embedding) {
// If no embedding exists for this note yet, generate one
const note = becca.getNote(noteId);
if (!note) {
return [404, {
success: false,
message: "Note not found"
}];
2025-03-08 22:04:10 +00:00
}
2025-03-08 23:13:49 +00:00
const context = await vectorStore.getNoteEmbeddingContext(noteId);
const provider = providerManager.getEmbeddingProvider(providerId);
if (!provider) {
return [400, {
success: false,
message: `Embedding provider '${providerId}' not found`
}];
}
const newEmbedding = await provider.generateNoteEmbeddings(context);
await vectorStore.storeNoteEmbedding(noteId, providerId, modelId, newEmbedding);
2025-03-08 22:04:10 +00:00
const similarNotes = await vectorStore.findSimilarNotes(
2025-03-08 23:13:49 +00:00
newEmbedding, providerId, modelId, limit, threshold
2025-03-08 22:04:10 +00:00
);
2025-03-08 23:13:49 +00:00
return {
2025-03-08 22:04:10 +00:00
success: true,
similarNotes
2025-03-08 23:13:49 +00:00
};
2025-03-08 22:04:10 +00:00
}
2025-03-08 23:13:49 +00:00
const similarNotes = await vectorStore.findSimilarNotes(
embedding.embedding, providerId, modelId, limit, threshold
);
return {
success: true,
similarNotes
};
2025-03-08 22:04:10 +00:00
}
/**
* Search notes by text
*/
async function searchByText(req: Request, res: Response) {
const { text } = req.body;
const providerId = req.query.providerId as string || 'openai';
const modelId = req.query.modelId as string || 'text-embedding-3-small';
const limit = parseInt(req.query.limit as string || '10', 10);
const threshold = parseFloat(req.query.threshold as string || '0.7');
if (!text) {
2025-03-08 23:13:49 +00:00
return [400, {
2025-03-08 22:04:10 +00:00
success: false,
message: "Search text is required"
2025-03-08 23:13:49 +00:00
}];
2025-03-08 22:04:10 +00:00
}
2025-03-08 23:13:49 +00:00
const provider = providerManager.getEmbeddingProvider(providerId);
2025-03-08 22:04:10 +00:00
2025-03-08 23:13:49 +00:00
if (!provider) {
return [400, {
success: false,
message: `Embedding provider '${providerId}' not found`
}];
}
2025-03-08 22:04:10 +00:00
2025-03-08 23:13:49 +00:00
// Generate embedding for the search text
const embedding = await provider.generateEmbeddings(text);
2025-03-08 22:04:10 +00:00
2025-03-08 23:13:49 +00:00
// Find similar notes
const similarNotes = await vectorStore.findSimilarNotes(
embedding, providerId, modelId, limit, threshold
);
2025-03-08 22:04:10 +00:00
2025-03-08 23:13:49 +00:00
return {
success: true,
similarNotes
};
2025-03-08 22:04:10 +00:00
}
/**
* Get embedding providers
*/
async function getProviders(req: Request, res: Response) {
2025-03-08 23:13:49 +00:00
const providerConfigs = await providerManager.getEmbeddingProviderConfigs();
return {
success: true,
providers: providerConfigs
};
2025-03-08 22:04:10 +00:00
}
/**
* Update provider configuration
*/
async function updateProvider(req: Request, res: Response) {
const { providerId } = req.params;
const { isEnabled, priority, config } = req.body;
2025-03-08 23:13:49 +00:00
const success = await providerManager.updateEmbeddingProviderConfig(
providerId, isEnabled, priority, config
);
2025-03-08 22:04:10 +00:00
2025-03-08 23:13:49 +00:00
if (!success) {
return [404, {
2025-03-08 22:04:10 +00:00
success: false,
2025-03-08 23:13:49 +00:00
message: "Provider not found"
}];
2025-03-08 22:04:10 +00:00
}
2025-03-08 23:13:49 +00:00
return {
success: true
};
2025-03-08 22:04:10 +00:00
}
/**
* Manually trigger a reprocessing of all notes
*/
async function reprocessAllNotes(req: Request, res: Response) {
2025-03-08 23:13:49 +00:00
// Start the reprocessing operation in the background
setTimeout(async () => {
try {
await vectorStore.reprocessAllNotes();
log.info("Embedding reprocessing completed successfully");
} catch (error: any) {
log.error(`Error during background embedding reprocessing: ${error.message || "Unknown error"}`);
2025-03-08 23:08:25 +00:00
}
2025-03-08 23:13:49 +00:00
}, 0);
// Return the response data
return {
success: true,
message: "Embedding reprocessing started in the background"
};
2025-03-08 22:04:10 +00:00
}
/**
* Get embedding queue status
*/
async function getQueueStatus(req: Request, res: Response) {
2025-03-08 23:13:49 +00:00
// Use the imported sql instead of requiring it
const queueCount = await sql.getValue(
"SELECT COUNT(*) FROM embedding_queue"
);
const failedCount = await sql.getValue(
"SELECT COUNT(*) FROM embedding_queue WHERE attempts > 0"
);
const totalEmbeddingsCount = await sql.getValue(
"SELECT COUNT(*) FROM note_embeddings"
);
return {
success: true,
status: {
queueCount,
failedCount,
totalEmbeddingsCount
}
};
2025-03-08 22:04:10 +00:00
}
export default {
findSimilarNotes,
searchByText,
getProviders,
updateProvider,
reprocessAllNotes,
getQueueStatus
};