feat(llm): remove everything to do with embeddings, part 2

This commit is contained in:
perf3ct 2025-06-07 18:20:06 +00:00
parent 44a45780b7
commit 44a2e7df21
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
7 changed files with 5 additions and 131 deletions

View File

@ -11,7 +11,7 @@ import { TPL, addMessageToChat, showSources, hideSources, showLoadingIndicator,
import { formatMarkdown } from "./utils.js"; import { formatMarkdown } from "./utils.js";
import { createChatSession, checkSessionExists, setupStreamingResponse, getDirectResponse } from "./communication.js"; import { createChatSession, checkSessionExists, setupStreamingResponse, getDirectResponse } from "./communication.js";
import { extractInChatToolSteps } from "./message_processor.js"; import { extractInChatToolSteps } from "./message_processor.js";
import { validateEmbeddingProviders } from "./validation.js"; import { validateProviders } from "./validation.js";
import type { MessageData, ToolExecutionStep, ChatData } from "./types.js"; import type { MessageData, ToolExecutionStep, ChatData } from "./types.js";
import { formatCodeBlocks } from "../../services/syntax_highlight.js"; import { formatCodeBlocks } from "../../services/syntax_highlight.js";
import { ClassicEditor, type CKTextEditor, type MentionFeed } from "@triliumnext/ckeditor5"; import { ClassicEditor, type CKTextEditor, type MentionFeed } from "@triliumnext/ckeditor5";
@ -616,7 +616,7 @@ export default class LlmChatPanel extends BasicWidget {
} }
// Check for any provider validation issues when refreshing // Check for any provider validation issues when refreshing
await validateEmbeddingProviders(this.validationWarning); await validateProviders(this.validationWarning);
// Get current note context if needed // Get current note context if needed
const currentActiveNoteId = appContext.tabManager.getActiveContext()?.note?.noteId || null; const currentActiveNoteId = appContext.tabManager.getActiveContext()?.note?.noteId || null;
@ -767,7 +767,7 @@ export default class LlmChatPanel extends BasicWidget {
*/ */
private async processUserMessage(content: string) { private async processUserMessage(content: string) {
// Check for validation issues first // Check for validation issues first
await validateEmbeddingProviders(this.validationWarning); await validateProviders(this.validationWarning);
// Make sure we have a valid session // Make sure we have a valid session
if (!this.noteId) { if (!this.noteId) {

View File

@ -26,8 +26,6 @@ interface MetricsData {
totalBlobs: number; totalBlobs: number;
totalEtapiTokens: number; totalEtapiTokens: number;
totalRecentNotes: number; totalRecentNotes: number;
totalEmbeddings: number;
totalEmbeddingProviders: number;
}; };
noteTypes: Record<string, number>; noteTypes: Record<string, number>;
attachmentTypes: Record<string, number>; attachmentTypes: Record<string, number>;
@ -88,8 +86,6 @@ function formatPrometheusMetrics(data: MetricsData): string {
addMetric('trilium_blobs_total', data.database.totalBlobs, 'Total number of blob records'); addMetric('trilium_blobs_total', data.database.totalBlobs, 'Total number of blob records');
addMetric('trilium_etapi_tokens_total', data.database.totalEtapiTokens, 'Number of active ETAPI tokens'); addMetric('trilium_etapi_tokens_total', data.database.totalEtapiTokens, 'Number of active ETAPI tokens');
addMetric('trilium_recent_notes_total', data.database.totalRecentNotes, 'Number of recent notes tracked'); addMetric('trilium_recent_notes_total', data.database.totalRecentNotes, 'Number of recent notes tracked');
addMetric('trilium_embeddings_total', data.database.totalEmbeddings, 'Number of note embeddings');
addMetric('trilium_embedding_providers_total', data.database.totalEmbeddingProviders, 'Number of embedding providers');
// Note types // Note types
for (const [type, count] of Object.entries(data.noteTypes)) { for (const [type, count] of Object.entries(data.noteTypes)) {
@ -155,15 +151,6 @@ function collectMetrics(): MetricsData {
const totalEtapiTokens = sql.getValue<number>("SELECT COUNT(*) FROM etapi_tokens WHERE isDeleted = 0"); const totalEtapiTokens = sql.getValue<number>("SELECT COUNT(*) FROM etapi_tokens WHERE isDeleted = 0");
const totalRecentNotes = sql.getValue<number>("SELECT COUNT(*) FROM recent_notes"); const totalRecentNotes = sql.getValue<number>("SELECT COUNT(*) FROM recent_notes");
// Embedding-related metrics (these tables might not exist in older versions)
let totalEmbeddings = 0;
let totalEmbeddingProviders = 0;
try {
totalEmbeddings = sql.getValue<number>("SELECT COUNT(*) FROM note_embeddings");
totalEmbeddingProviders = sql.getValue<number>("SELECT COUNT(*) FROM embedding_providers");
} catch (e) {
// Tables don't exist, keep defaults
}
const database = { const database = {
totalNotes, totalNotes,
@ -179,8 +166,6 @@ function collectMetrics(): MetricsData {
totalBlobs, totalBlobs,
totalEtapiTokens, totalEtapiTokens,
totalRecentNotes, totalRecentNotes,
totalEmbeddings,
totalEmbeddingProviders
}; };
// Note types breakdown // Note types breakdown

View File

@ -99,12 +99,6 @@ type MetricsData = ReturnType<typeof etapiMetrics.collectMetrics>;
* totalRecentNotes: * totalRecentNotes:
* type: integer * type: integer
* example: 50 * example: 50
* totalEmbeddings:
* type: integer
* example: 123
* totalEmbeddingProviders:
* type: integer
* example: 2
* noteTypes: * noteTypes:
* type: object * type: object
* additionalProperties: * additionalProperties:

View File

@ -28,10 +28,6 @@ function eraseNotes(noteIdsToErase: string[]) {
eraseRevisions(revisionIdsToErase); eraseRevisions(revisionIdsToErase);
// Erase embeddings related to the deleted notes
const embeddingIdsToErase = sql.getManyRows<{ embedId: string }>(`SELECT embedId FROM note_embeddings WHERE noteId IN (???)`, noteIdsToErase).map((row) => row.embedId);
eraseEmbeddings(embeddingIdsToErase);
log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`); log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`);
} }
@ -156,12 +152,6 @@ function eraseNotesWithDeleteId(deleteId: string) {
const attachmentIdsToErase = sql.getColumn<string>("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]); const attachmentIdsToErase = sql.getColumn<string>("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
eraseAttachments(attachmentIdsToErase); eraseAttachments(attachmentIdsToErase);
// Find and erase embeddings for deleted notes
const deletedNoteIds = sql.getColumn<string>("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);
if (deletedNoteIds.length > 0) {
const embeddingIdsToErase = sql.getColumn<string>("SELECT embedId FROM note_embeddings WHERE noteId IN (???)", deletedNoteIds);
eraseEmbeddings(embeddingIdsToErase);
}
eraseUnusedBlobs(); eraseUnusedBlobs();
} }
@ -185,16 +175,6 @@ function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds: number |
eraseAttachments(attachmentIdsToErase); eraseAttachments(attachmentIdsToErase);
} }
function eraseEmbeddings(embedIdsToErase: string[]) {
if (embedIdsToErase.length === 0) {
return;
}
sql.executeMany(`DELETE FROM note_embeddings WHERE embedId IN (???)`, embedIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'note_embeddings' AND entityId IN (???)`, embedIdsToErase));
log.info(`Erased embeddings: ${JSON.stringify(embedIdsToErase)}`);
}
export function startScheduledCleanup() { export function startScheduledCleanup() {
sqlInit.dbReady.then(() => { sqlInit.dbReady.then(() => {

View File

@ -364,14 +364,6 @@ function getEntityChangeRow(entityChange: EntityChange) {
} }
} }
// Special handling for note_embeddings embedding field
if (entityName === "note_embeddings") {
// Cast to any to access the embedding property
const row = entityRow as any;
if (row.embedding && Buffer.isBuffer(row.embedding)) {
row.embedding = row.embedding.toString("base64");
}
}
return entityRow; return entityRow;
} }

View File

@ -54,9 +54,7 @@ function updateEntity(remoteEC: EntityChange, remoteEntityRow: EntityRow | undef
const updated = remoteEC.entityName === "note_reordering" const updated = remoteEC.entityName === "note_reordering"
? updateNoteReordering(remoteEC, remoteEntityRow, instanceId) ? updateNoteReordering(remoteEC, remoteEntityRow, instanceId)
: (remoteEC.entityName === "note_embeddings" : updateNormalEntity(remoteEC, remoteEntityRow, instanceId, updateContext);
? updateNoteEmbedding(remoteEC, remoteEntityRow, instanceId, updateContext)
: updateNormalEntity(remoteEC, remoteEntityRow, instanceId, updateContext));
if (updated) { if (updated) {
if (remoteEntityRow?.isDeleted) { if (remoteEntityRow?.isDeleted) {
@ -145,78 +143,11 @@ function updateNoteReordering(remoteEC: EntityChange, remoteEntityRow: EntityRow
return true; return true;
} }
function updateNoteEmbedding(remoteEC: EntityChange, remoteEntityRow: EntityRow | undefined, instanceId: string, updateContext: UpdateContext) {
if (remoteEC.isErased) {
eraseEntity(remoteEC);
updateContext.erased++;
return true;
}
if (!remoteEntityRow) {
log.error(`Entity ${remoteEC.entityName} ${remoteEC.entityId} not found in sync update.`);
return false;
}
interface NoteEmbeddingRow {
embedId: string;
noteId: string;
providerId: string;
modelId: string;
dimension: number;
embedding: Buffer;
version: number;
dateCreated: string;
utcDateCreated: string;
dateModified: string;
utcDateModified: string;
}
// Cast remoteEntityRow to include required embedding properties
const typedRemoteEntityRow = remoteEntityRow as unknown as NoteEmbeddingRow;
// Convert embedding from base64 string to Buffer if needed
if (typedRemoteEntityRow.embedding && typeof typedRemoteEntityRow.embedding === "string") {
typedRemoteEntityRow.embedding = Buffer.from(typedRemoteEntityRow.embedding, "base64");
}
const localEntityRow = sql.getRow<NoteEmbeddingRow>(`SELECT * FROM note_embeddings WHERE embedId = ?`, [remoteEC.entityId]);
if (localEntityRow) {
// We already have this embedding, check if we need to update it
if (localEntityRow.utcDateModified >= typedRemoteEntityRow.utcDateModified) {
// Local is newer or same, no need to update
entityChangesService.putEntityChangeWithInstanceId(remoteEC, instanceId);
return true;
} else {
// Remote is newer, update local
sql.replace("note_embeddings", remoteEntityRow);
if (!updateContext.updated[remoteEC.entityName]) {
updateContext.updated[remoteEC.entityName] = [];
}
updateContext.updated[remoteEC.entityName].push(remoteEC.entityId);
entityChangesService.putEntityChangeWithInstanceId(remoteEC, instanceId);
return true;
}
} else {
// We don't have this embedding, insert it
sql.replace("note_embeddings", remoteEntityRow);
if (!updateContext.updated[remoteEC.entityName]) {
updateContext.updated[remoteEC.entityName] = [];
}
updateContext.updated[remoteEC.entityName].push(remoteEC.entityId);
entityChangesService.putEntityChangeWithInstanceId(remoteEC, instanceId);
return true;
}
}
function eraseEntity(entityChange: EntityChange) { function eraseEntity(entityChange: EntityChange) {
const { entityName, entityId } = entityChange; const { entityName, entityId } = entityChange;
const entityNames = ["notes", "branches", "attributes", "revisions", "attachments", "blobs", "note_embeddings"]; const entityNames = ["notes", "branches", "attributes", "revisions", "attachments", "blobs"];
if (!entityNames.includes(entityName)) { if (!entityNames.includes(entityName)) {
log.error(`Cannot erase ${entityName} '${entityId}'.`); log.error(`Cannot erase ${entityName} '${entityId}'.`);

View File

@ -203,13 +203,6 @@ function fillInAdditionalProperties(entityChange: EntityChange) {
WHERE attachmentId = ?`, WHERE attachmentId = ?`,
[entityChange.entityId] [entityChange.entityId]
); );
} else if (entityChange.entityName === "note_embeddings") {
// Note embeddings are backend-only entities for AI/vector search
// Frontend doesn't need the full embedding data (which is large binary data)
// Just ensure entity is marked as handled - actual sync happens at database level
if (!entityChange.isErased) {
entityChange.entity = { embedId: entityChange.entityId };
}
} }
if (entityChange.entity instanceof AbstractBeccaEntity) { if (entityChange.entity instanceof AbstractBeccaEntity) {
@ -228,7 +221,6 @@ const ORDERING: Record<string, number> = {
attachments: 3, attachments: 3,
notes: 1, notes: 1,
options: 0, options: 0,
note_embeddings: 3
}; };
function sendPing(client: WebSocket, entityChangeIds = []) { function sendPing(client: WebSocket, entityChangeIds = []) {