From 3fae664877a7b876b753dc5d54bf6357a29ac602 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sun, 1 Jun 2025 02:51:36 +0000 Subject: [PATCH] feat(llm): add utils for excluding notes from LLM --- .../llm/constants/provider_constants.ts | 5 + .../context/services/vector_search_service.ts | 12 +++ .../src/services/llm/embeddings/queue.ts | 12 +++ .../src/services/llm/embeddings/storage.ts | 8 ++ apps/server/src/services/llm/index_service.ts | 7 ++ .../services/llm/utils/ai_exclusion_utils.ts | 94 +++++++++++++++++++ 6 files changed, 138 insertions(+) create mode 100644 apps/server/src/services/llm/utils/ai_exclusion_utils.ts diff --git a/apps/server/src/services/llm/constants/provider_constants.ts b/apps/server/src/services/llm/constants/provider_constants.ts index e1cccecc6..b54454374 100644 --- a/apps/server/src/services/llm/constants/provider_constants.ts +++ b/apps/server/src/services/llm/constants/provider_constants.ts @@ -211,5 +211,10 @@ export const LLM_CONSTANTS = { CONTENT: { MAX_NOTE_CONTENT_LENGTH: 1500, MAX_TOTAL_CONTENT_LENGTH: 10000 + }, + + // AI Feature Exclusion + AI_EXCLUSION: { + LABEL_NAME: 'aiExclude' // Label used to exclude notes from all AI/LLM features } }; diff --git a/apps/server/src/services/llm/context/services/vector_search_service.ts b/apps/server/src/services/llm/context/services/vector_search_service.ts index aa916ed0a..480ba05bd 100644 --- a/apps/server/src/services/llm/context/services/vector_search_service.ts +++ b/apps/server/src/services/llm/context/services/vector_search_service.ts @@ -18,6 +18,7 @@ import cacheManager from '../modules/cache_manager.js'; import type { NoteSearchResult } from '../../interfaces/context_interfaces.js'; import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js'; import { SEARCH_CONSTANTS } from '../../constants/search_constants.js'; +import { isNoteExcludedFromAI } from '../../utils/ai_exclusion_utils.js'; export interface VectorSearchOptions { maxResults?: number; @@ -118,6 +119,11 @@ export class VectorSearchService { return null; } + // Check if this note is excluded from AI features + if (isNoteExcludedFromAI(note)) { + return null; // Skip this note if it has the AI exclusion label + } + // Get note content - full or summarized based on option let content: string | null = null; @@ -289,6 +295,12 @@ export class VectorSearchService { for (const noteId of noteIds) { try { + // Check if this note is excluded from AI features + const note = becca.getNote(noteId); + if (!note || isNoteExcludedFromAI(note)) { + continue; // Skip this note if it doesn't exist or has the AI exclusion label + } + // Get note embedding const embeddingResult = await vectorStore.getEmbeddingForNote( noteId, diff --git a/apps/server/src/services/llm/embeddings/queue.ts b/apps/server/src/services/llm/embeddings/queue.ts index 6b3b5e415..12f915c81 100644 --- a/apps/server/src/services/llm/embeddings/queue.ts +++ b/apps/server/src/services/llm/embeddings/queue.ts @@ -9,6 +9,7 @@ import { deleteNoteEmbeddings } from "./storage.js"; import type { QueueItem } from "./types.js"; import { getChunkingOperations } from "./chunking/chunking_interface.js"; import indexService from '../index_service.js'; +import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js"; // Track which notes are currently being processed const notesInProcess = new Set(); @@ -261,6 +262,17 @@ export async function processEmbeddingQueue() { continue; } + // Check if this note is excluded from AI features + if (isNoteExcludedFromAIById(noteId)) { + log.info(`Note ${noteId} excluded from AI features, removing from embedding queue`); + await sql.execute( + "DELETE FROM embedding_queue WHERE noteId = ?", + [noteId] + ); + await deleteNoteEmbeddings(noteId); // Also remove any existing embeddings + continue; + } + if (noteData.operation === 'DELETE') { await deleteNoteEmbeddings(noteId); await sql.execute( diff --git a/apps/server/src/services/llm/embeddings/storage.ts b/apps/server/src/services/llm/embeddings/storage.ts index 675047a76..01cc2ac17 100644 --- a/apps/server/src/services/llm/embeddings/storage.ts +++ b/apps/server/src/services/llm/embeddings/storage.ts @@ -8,6 +8,9 @@ import entityChangesService from "../../../services/entity_changes.js"; import type { EntityChange } from "../../../services/entity_changes_interface.js"; import { EMBEDDING_CONSTANTS } from "../constants/embedding_constants.js"; import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; +import type { NoteEmbeddingContext } from "./embeddings_interface.js"; +import becca from "../../../becca/becca.js"; +import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js"; interface Similarity { noteId: string; @@ -452,6 +455,11 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[] : ''; for (const e of embeddings) { + // Check if this note is excluded from AI features + if (isNoteExcludedFromAIById(e.noteId)) { + continue; // Skip this note if it has the AI exclusion label + } + const embVector = bufferToEmbedding(e.embedding, e.dimension); // Detect content type from mime type if available diff --git a/apps/server/src/services/llm/index_service.ts b/apps/server/src/services/llm/index_service.ts index 18eb17b79..f1182495d 100644 --- a/apps/server/src/services/llm/index_service.ts +++ b/apps/server/src/services/llm/index_service.ts @@ -20,6 +20,7 @@ import sql from "../sql.js"; import sqlInit from "../sql_init.js"; import { CONTEXT_PROMPTS } from './constants/llm_prompt_constants.js'; import { SEARCH_CONSTANTS } from './constants/search_constants.js'; +import { isNoteExcludedFromAI } from "./utils/ai_exclusion_utils.js"; export class IndexService { private initialized = false; @@ -803,6 +804,12 @@ export class IndexService { throw new Error(`Note ${noteId} not found`); } + // Check if this note is excluded from AI features + if (isNoteExcludedFromAI(note)) { + log.info(`Note ${noteId} (${note.title}) excluded from AI indexing due to exclusion label`); + return true; // Return true to indicate successful handling (exclusion is intentional) + } + // Check where embedding generation should happen const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client'; diff --git a/apps/server/src/services/llm/utils/ai_exclusion_utils.ts b/apps/server/src/services/llm/utils/ai_exclusion_utils.ts new file mode 100644 index 000000000..a9c6d6f5c --- /dev/null +++ b/apps/server/src/services/llm/utils/ai_exclusion_utils.ts @@ -0,0 +1,94 @@ +import becca from '../../../becca/becca.js'; +import type BNote from '../../../becca/entities/bnote.js'; +import { LLM_CONSTANTS } from '../constants/provider_constants.js'; +import log from '../../log.js'; + +/** + * Check if a note should be excluded from all AI/LLM features + * + * @param note - The note to check (BNote object) + * @returns true if the note should be excluded from AI features + */ +export function isNoteExcludedFromAI(note: BNote): boolean { + if (!note) { + return false; + } + + try { + // Check if the note has the AI exclusion label + const hasExclusionLabel = note.hasLabel(LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME); + + if (hasExclusionLabel) { + log.info(`Note ${note.noteId} (${note.title}) excluded from AI features due to ${LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME} label`); + return true; + } + + return false; + } catch (error) { + log.error(`Error checking AI exclusion for note ${note.noteId}: ${error}`); + return false; // Default to not excluding on error + } +} + +/** + * Check if a note should be excluded from AI features by noteId + * + * @param noteId - The ID of the note to check + * @returns true if the note should be excluded from AI features + */ +export function isNoteExcludedFromAIById(noteId: string): boolean { + if (!noteId) { + return false; + } + + try { + const note = becca.getNote(noteId); + if (!note) { + return false; + } + return isNoteExcludedFromAI(note); + } catch (error) { + log.error(`Error checking AI exclusion for note ID ${noteId}: ${error}`); + return false; // Default to not excluding on error + } +} + +/** + * Filter out notes that are excluded from AI features + * + * @param notes - Array of notes to filter + * @returns Array of notes with AI-excluded notes removed + */ +export function filterAIExcludedNotes(notes: BNote[]): BNote[] { + return notes.filter(note => !isNoteExcludedFromAI(note)); +} + +/** + * Filter out note IDs that are excluded from AI features + * + * @param noteIds - Array of note IDs to filter + * @returns Array of note IDs with AI-excluded notes removed + */ +export function filterAIExcludedNoteIds(noteIds: string[]): string[] { + return noteIds.filter(noteId => !isNoteExcludedFromAIById(noteId)); +} + +/** + * Check if any notes in an array are excluded from AI features + * + * @param notes - Array of notes to check + * @returns true if any note should be excluded from AI features + */ +export function hasAIExcludedNotes(notes: BNote[]): boolean { + return notes.some(note => isNoteExcludedFromAI(note)); +} + +/** + * Get the AI exclusion label name from constants + * This can be used in UI components or other places that need to reference the label + * + * @returns The label name used for AI exclusion + */ +export function getAIExclusionLabelName(): string { + return LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME; +}