feat(llm): add utils for excluding notes from LLM

This commit is contained in:
perf3ct 2025-06-01 02:51:36 +00:00
parent ba98bd9097
commit 3fae664877
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
6 changed files with 138 additions and 0 deletions

View File

@ -211,5 +211,10 @@ export const LLM_CONSTANTS = {
CONTENT: {
MAX_NOTE_CONTENT_LENGTH: 1500,
MAX_TOTAL_CONTENT_LENGTH: 10000
},
// AI Feature Exclusion
AI_EXCLUSION: {
LABEL_NAME: 'aiExclude' // Label used to exclude notes from all AI/LLM features
}
};

View File

@ -18,6 +18,7 @@ import cacheManager from '../modules/cache_manager.js';
import type { NoteSearchResult } from '../../interfaces/context_interfaces.js';
import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js';
import { SEARCH_CONSTANTS } from '../../constants/search_constants.js';
import { isNoteExcludedFromAI } from '../../utils/ai_exclusion_utils.js';
export interface VectorSearchOptions {
maxResults?: number;
@ -118,6 +119,11 @@ export class VectorSearchService {
return null;
}
// Check if this note is excluded from AI features
if (isNoteExcludedFromAI(note)) {
return null; // Skip this note if it has the AI exclusion label
}
// Get note content - full or summarized based on option
let content: string | null = null;
@ -289,6 +295,12 @@ export class VectorSearchService {
for (const noteId of noteIds) {
try {
// Check if this note is excluded from AI features
const note = becca.getNote(noteId);
if (!note || isNoteExcludedFromAI(note)) {
continue; // Skip this note if it doesn't exist or has the AI exclusion label
}
// Get note embedding
const embeddingResult = await vectorStore.getEmbeddingForNote(
noteId,

View File

@ -9,6 +9,7 @@ import { deleteNoteEmbeddings } from "./storage.js";
import type { QueueItem } from "./types.js";
import { getChunkingOperations } from "./chunking/chunking_interface.js";
import indexService from '../index_service.js';
import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js";
// Track which notes are currently being processed
const notesInProcess = new Set<string>();
@ -261,6 +262,17 @@ export async function processEmbeddingQueue() {
continue;
}
// Check if this note is excluded from AI features
if (isNoteExcludedFromAIById(noteId)) {
log.info(`Note ${noteId} excluded from AI features, removing from embedding queue`);
await sql.execute(
"DELETE FROM embedding_queue WHERE noteId = ?",
[noteId]
);
await deleteNoteEmbeddings(noteId); // Also remove any existing embeddings
continue;
}
if (noteData.operation === 'DELETE') {
await deleteNoteEmbeddings(noteId);
await sql.execute(

View File

@ -8,6 +8,9 @@ import entityChangesService from "../../../services/entity_changes.js";
import type { EntityChange } from "../../../services/entity_changes_interface.js";
import { EMBEDDING_CONSTANTS } from "../constants/embedding_constants.js";
import { SEARCH_CONSTANTS } from '../constants/search_constants.js';
import type { NoteEmbeddingContext } from "./embeddings_interface.js";
import becca from "../../../becca/becca.js";
import { isNoteExcludedFromAIById } from "../utils/ai_exclusion_utils.js";
interface Similarity {
noteId: string;
@ -452,6 +455,11 @@ async function processEmbeddings(queryEmbedding: Float32Array, embeddings: any[]
: '';
for (const e of embeddings) {
// Check if this note is excluded from AI features
if (isNoteExcludedFromAIById(e.noteId)) {
continue; // Skip this note if it has the AI exclusion label
}
const embVector = bufferToEmbedding(e.embedding, e.dimension);
// Detect content type from mime type if available

View File

@ -20,6 +20,7 @@ import sql from "../sql.js";
import sqlInit from "../sql_init.js";
import { CONTEXT_PROMPTS } from './constants/llm_prompt_constants.js';
import { SEARCH_CONSTANTS } from './constants/search_constants.js';
import { isNoteExcludedFromAI } from "./utils/ai_exclusion_utils.js";
export class IndexService {
private initialized = false;
@ -803,6 +804,12 @@ export class IndexService {
throw new Error(`Note ${noteId} not found`);
}
// Check if this note is excluded from AI features
if (isNoteExcludedFromAI(note)) {
log.info(`Note ${noteId} (${note.title}) excluded from AI indexing due to exclusion label`);
return true; // Return true to indicate successful handling (exclusion is intentional)
}
// Check where embedding generation should happen
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';

View File

@ -0,0 +1,94 @@
import becca from '../../../becca/becca.js';
import type BNote from '../../../becca/entities/bnote.js';
import { LLM_CONSTANTS } from '../constants/provider_constants.js';
import log from '../../log.js';
/**
* Check if a note should be excluded from all AI/LLM features
*
* @param note - The note to check (BNote object)
* @returns true if the note should be excluded from AI features
*/
export function isNoteExcludedFromAI(note: BNote): boolean {
if (!note) {
return false;
}
try {
// Check if the note has the AI exclusion label
const hasExclusionLabel = note.hasLabel(LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME);
if (hasExclusionLabel) {
log.info(`Note ${note.noteId} (${note.title}) excluded from AI features due to ${LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME} label`);
return true;
}
return false;
} catch (error) {
log.error(`Error checking AI exclusion for note ${note.noteId}: ${error}`);
return false; // Default to not excluding on error
}
}
/**
* Check if a note should be excluded from AI features by noteId
*
* @param noteId - The ID of the note to check
* @returns true if the note should be excluded from AI features
*/
export function isNoteExcludedFromAIById(noteId: string): boolean {
if (!noteId) {
return false;
}
try {
const note = becca.getNote(noteId);
if (!note) {
return false;
}
return isNoteExcludedFromAI(note);
} catch (error) {
log.error(`Error checking AI exclusion for note ID ${noteId}: ${error}`);
return false; // Default to not excluding on error
}
}
/**
* Filter out notes that are excluded from AI features
*
* @param notes - Array of notes to filter
* @returns Array of notes with AI-excluded notes removed
*/
export function filterAIExcludedNotes(notes: BNote[]): BNote[] {
return notes.filter(note => !isNoteExcludedFromAI(note));
}
/**
* Filter out note IDs that are excluded from AI features
*
* @param noteIds - Array of note IDs to filter
* @returns Array of note IDs with AI-excluded notes removed
*/
export function filterAIExcludedNoteIds(noteIds: string[]): string[] {
return noteIds.filter(noteId => !isNoteExcludedFromAIById(noteId));
}
/**
* Check if any notes in an array are excluded from AI features
*
* @param notes - Array of notes to check
* @returns true if any note should be excluded from AI features
*/
export function hasAIExcludedNotes(notes: BNote[]): boolean {
return notes.some(note => isNoteExcludedFromAI(note));
}
/**
* Get the AI exclusion label name from constants
* This can be used in UI components or other places that need to reference the label
*
* @returns The label name used for AI exclusion
*/
export function getAIExclusionLabelName(): string {
return LLM_CONSTANTS.AI_EXCLUSION.LABEL_NAME;
}