mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-07-28 10:32:27 +08:00
930 lines
38 KiB
TypeScript
930 lines
38 KiB
TypeScript
/**
|
|
* LLM Index Service
|
|
*
|
|
* Centralized service for managing knowledge base indexing for LLM features.
|
|
* This service coordinates:
|
|
* - Note embedding generation and management
|
|
* - Smart context retrieval for LLM queries
|
|
* - Progressive indexing of the knowledge base
|
|
* - Optimization of the semantic search capabilities
|
|
*/
|
|
|
|
import log from "../log.js";
|
|
import options from "../options.js";
|
|
import becca from "../../becca/becca.js";
|
|
import vectorStore from "./embeddings/index.js";
|
|
import providerManager from "./providers/providers.js";
|
|
import { ContextExtractor } from "./context/index.js";
|
|
import eventService from "../events.js";
|
|
import sql from "../sql.js";
|
|
import sqlInit from "../sql_init.js";
|
|
import { CONTEXT_PROMPTS } from './constants/llm_prompt_constants.js';
|
|
import { SEARCH_CONSTANTS } from './constants/search_constants.js';
|
|
import { isNoteExcludedFromAI } from "./utils/ai_exclusion_utils.js";
|
|
|
|
export class IndexService {
|
|
private initialized = false;
|
|
private indexingInProgress = false;
|
|
private contextExtractor = new ContextExtractor();
|
|
private automaticIndexingInterval?: NodeJS.Timeout;
|
|
|
|
// Index rebuilding tracking
|
|
private indexRebuildInProgress = false;
|
|
private indexRebuildProgress = 0;
|
|
private indexRebuildTotal = 0;
|
|
private indexRebuildCurrent = 0;
|
|
|
|
// Configuration
|
|
private defaultQueryDepth = SEARCH_CONSTANTS.HIERARCHY.DEFAULT_QUERY_DEPTH;
|
|
private maxNotesPerQuery = SEARCH_CONSTANTS.HIERARCHY.MAX_NOTES_PER_QUERY;
|
|
private defaultSimilarityThreshold = SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD;
|
|
private indexUpdateInterval = 3600000; // 1 hour in milliseconds
|
|
|
|
/**
|
|
* Initialize the index service
|
|
*/
|
|
async initialize() {
|
|
if (this.initialized) return;
|
|
|
|
try {
|
|
// Check if database is initialized before proceeding
|
|
if (!sqlInit.isDbInitialized()) {
|
|
log.info("Index service: Database not initialized yet, skipping initialization");
|
|
return;
|
|
}
|
|
|
|
const aiEnabled = options.getOptionOrNull('aiEnabled') === "true";
|
|
if (!aiEnabled) {
|
|
log.info("Index service: AI features disabled, skipping initialization");
|
|
return;
|
|
}
|
|
|
|
// Check if embedding system is ready
|
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
|
if (!providers || providers.length === 0) {
|
|
throw new Error("No embedding providers available");
|
|
}
|
|
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
// Setup automatic indexing if enabled and this instance should process embeddings
|
|
if (await options.getOptionBool('embeddingAutoUpdateEnabled') && shouldProcessEmbeddings) {
|
|
this.setupAutomaticIndexing();
|
|
log.info(`Index service: Automatic indexing enabled, processing embeddings ${isSyncServer ? 'as sync server' : 'as client'}`);
|
|
} else if (await options.getOptionBool('embeddingAutoUpdateEnabled')) {
|
|
log.info("Index service: Automatic indexing enabled, but this instance is not configured to process embeddings");
|
|
}
|
|
|
|
// Listen for note changes to update index
|
|
this.setupEventListeners();
|
|
|
|
this.initialized = true;
|
|
log.info("Index service initialized successfully");
|
|
} catch (error: any) {
|
|
log.error(`Error initializing index service: ${error.message || "Unknown error"}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Setup event listeners for index updates
|
|
*/
|
|
private setupEventListeners() {
|
|
// Listen for note content changes
|
|
eventService.subscribe(eventService.NOTE_CONTENT_CHANGE, async ({ entity }) => {
|
|
if (entity && entity.noteId) {
|
|
// Always queue notes for indexing, but the actual processing will depend on configuration
|
|
await this.queueNoteForIndexing(entity.noteId);
|
|
}
|
|
});
|
|
|
|
// Listen for new notes
|
|
eventService.subscribe(eventService.ENTITY_CREATED, async ({ entityName, entity }) => {
|
|
if (entityName === "notes" && entity && entity.noteId) {
|
|
await this.queueNoteForIndexing(entity.noteId);
|
|
}
|
|
});
|
|
|
|
// Listen for note title changes
|
|
eventService.subscribe(eventService.NOTE_TITLE_CHANGED, async ({ noteId }) => {
|
|
if (noteId) {
|
|
await this.queueNoteForIndexing(noteId);
|
|
}
|
|
});
|
|
|
|
// Listen for changes in AI settings
|
|
eventService.subscribe(eventService.ENTITY_CHANGED, async ({ entityName, entity }) => {
|
|
if (entityName === "options" && entity && entity.name) {
|
|
if (entity.name.startsWith('ai') || entity.name.startsWith('embedding')) {
|
|
log.info("AI settings changed, updating index service configuration");
|
|
await this.updateConfiguration();
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Set up automatic indexing of notes
|
|
*/
|
|
private setupAutomaticIndexing() {
|
|
// Clear existing interval if any
|
|
if (this.automaticIndexingInterval) {
|
|
clearInterval(this.automaticIndexingInterval);
|
|
}
|
|
|
|
// Create new interval
|
|
this.automaticIndexingInterval = setInterval(async () => {
|
|
try {
|
|
if (!this.indexingInProgress) {
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
if (!shouldProcessEmbeddings) {
|
|
// This instance is not configured to process embeddings
|
|
return;
|
|
}
|
|
|
|
const stats = await vectorStore.getEmbeddingStats();
|
|
|
|
// Only run automatic indexing if we're below 95% completion
|
|
if (stats.percentComplete < 95) {
|
|
log.info(`Starting automatic indexing (current completion: ${stats.percentComplete}%)`);
|
|
await this.runBatchIndexing(50); // Process 50 notes at a time
|
|
}
|
|
}
|
|
} catch (error: any) {
|
|
log.error(`Error in automatic indexing: ${error.message || "Unknown error"}`);
|
|
}
|
|
}, this.indexUpdateInterval);
|
|
|
|
log.info("Automatic indexing scheduled");
|
|
}
|
|
|
|
/**
|
|
* Update service configuration from options
|
|
*/
|
|
private async updateConfiguration() {
|
|
try {
|
|
// Update indexing interval
|
|
const intervalMs = parseInt(await options.getOption('embeddingUpdateInterval') || '3600000', 10);
|
|
this.indexUpdateInterval = intervalMs;
|
|
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
// Update automatic indexing setting
|
|
const autoIndexing = await options.getOptionBool('embeddingAutoUpdateEnabled');
|
|
if (autoIndexing && shouldProcessEmbeddings && !this.automaticIndexingInterval) {
|
|
this.setupAutomaticIndexing();
|
|
log.info(`Index service: Automatic indexing enabled, processing embeddings ${isSyncServer ? 'as sync server' : 'as client'}`);
|
|
} else if (autoIndexing && !shouldProcessEmbeddings && this.automaticIndexingInterval) {
|
|
clearInterval(this.automaticIndexingInterval);
|
|
this.automaticIndexingInterval = undefined;
|
|
log.info("Index service: Automatic indexing disabled for this instance based on configuration");
|
|
} else if (!autoIndexing && this.automaticIndexingInterval) {
|
|
clearInterval(this.automaticIndexingInterval);
|
|
this.automaticIndexingInterval = undefined;
|
|
}
|
|
|
|
// Update similarity threshold
|
|
const similarityThreshold = await options.getOption('embeddingSimilarityThreshold');
|
|
this.defaultSimilarityThreshold = parseFloat(similarityThreshold || '0.65');
|
|
|
|
// Update max notes per query
|
|
const maxNotesPerQuery = await options.getOption('maxNotesPerLlmQuery');
|
|
this.maxNotesPerQuery = parseInt(maxNotesPerQuery || '10', 10);
|
|
|
|
log.info("Index service configuration updated");
|
|
} catch (error: any) {
|
|
log.error(`Error updating index service configuration: ${error.message || "Unknown error"}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Queue a note for indexing
|
|
*/
|
|
async queueNoteForIndexing(noteId: string, priority = false) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
// Always queue notes for indexing, regardless of where embedding generation happens
|
|
// The actual processing will be determined when the queue is processed
|
|
await vectorStore.queueNoteForEmbedding(noteId, 'UPDATE');
|
|
return true;
|
|
} catch (error: any) {
|
|
log.error(`Error queueing note ${noteId} for indexing: ${error.message || "Unknown error"}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start full knowledge base indexing
|
|
* @param force - Whether to force reindexing of all notes
|
|
*/
|
|
async startFullIndexing(force = false) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
if (this.indexingInProgress) {
|
|
throw new Error("Indexing already in progress");
|
|
}
|
|
|
|
try {
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
if (!shouldProcessEmbeddings) {
|
|
// This instance is not configured to process embeddings
|
|
log.info("Skipping full indexing as this instance is not configured to process embeddings");
|
|
return false;
|
|
}
|
|
|
|
this.indexingInProgress = true;
|
|
this.indexRebuildInProgress = true;
|
|
this.indexRebuildProgress = 0;
|
|
this.indexRebuildCurrent = 0;
|
|
|
|
// Reset index rebuild progress
|
|
const totalEmbeddings = await sql.getValue("SELECT COUNT(*) FROM note_embeddings") as number;
|
|
|
|
if (totalEmbeddings === 0) {
|
|
// If there are no embeddings yet, we need to create them first
|
|
const totalNotes = await sql.getValue("SELECT COUNT(*) FROM notes WHERE isDeleted = 0") as number;
|
|
this.indexRebuildTotal = totalNotes;
|
|
|
|
log.info("No embeddings found, starting full embedding generation first");
|
|
await vectorStore.reprocessAllNotes();
|
|
log.info("Full embedding generation initiated");
|
|
} else {
|
|
// For index rebuild, use the number of embeddings as the total
|
|
this.indexRebuildTotal = totalEmbeddings;
|
|
|
|
if (force) {
|
|
// Use the new rebuildSearchIndex function that doesn't regenerate embeddings
|
|
log.info("Starting forced index rebuild without regenerating embeddings");
|
|
setTimeout(async () => {
|
|
try {
|
|
await vectorStore.rebuildSearchIndex();
|
|
this.indexRebuildInProgress = false;
|
|
this.indexRebuildProgress = 100;
|
|
log.info("Index rebuild completed successfully");
|
|
} catch (error: any) {
|
|
log.error(`Error during index rebuild: ${error.message || "Unknown error"}`);
|
|
this.indexRebuildInProgress = false;
|
|
}
|
|
}, 0);
|
|
} else {
|
|
// Check current stats
|
|
const stats = await vectorStore.getEmbeddingStats();
|
|
|
|
// Only start indexing if we're below 90% completion or if embeddings exist but need optimization
|
|
if (stats.percentComplete < 90) {
|
|
log.info("Embedding coverage below 90%, starting full embedding generation");
|
|
await vectorStore.reprocessAllNotes();
|
|
log.info("Full embedding generation initiated");
|
|
} else {
|
|
log.info(`Embedding coverage at ${stats.percentComplete}%, starting index optimization`);
|
|
setTimeout(async () => {
|
|
try {
|
|
await vectorStore.rebuildSearchIndex();
|
|
this.indexRebuildInProgress = false;
|
|
this.indexRebuildProgress = 100;
|
|
log.info("Index optimization completed successfully");
|
|
} catch (error: any) {
|
|
log.error(`Error during index optimization: ${error.message || "Unknown error"}`);
|
|
this.indexRebuildInProgress = false;
|
|
}
|
|
}, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
} catch (error: any) {
|
|
log.error(`Error starting full indexing: ${error.message || "Unknown error"}`);
|
|
this.indexRebuildInProgress = false;
|
|
return false;
|
|
} finally {
|
|
this.indexingInProgress = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update index rebuild progress
|
|
* @param processed - Number of notes processed
|
|
*/
|
|
updateIndexRebuildProgress(processed: number) {
|
|
if (!this.indexRebuildInProgress) return;
|
|
|
|
this.indexRebuildCurrent += processed;
|
|
|
|
if (this.indexRebuildTotal > 0) {
|
|
this.indexRebuildProgress = Math.min(
|
|
Math.round((this.indexRebuildCurrent / this.indexRebuildTotal) * 100),
|
|
100
|
|
);
|
|
}
|
|
|
|
if (this.indexRebuildCurrent >= this.indexRebuildTotal) {
|
|
this.indexRebuildInProgress = false;
|
|
this.indexRebuildProgress = 100;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the current index rebuild progress
|
|
*/
|
|
getIndexRebuildStatus() {
|
|
return {
|
|
inProgress: this.indexRebuildInProgress,
|
|
progress: this.indexRebuildProgress,
|
|
total: this.indexRebuildTotal,
|
|
current: this.indexRebuildCurrent
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Run a batch indexing job for a limited number of notes
|
|
* @param batchSize - Maximum number of notes to process
|
|
*/
|
|
async runBatchIndexing(batchSize = 20) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
if (this.indexingInProgress) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
this.indexingInProgress = true;
|
|
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
if (!shouldProcessEmbeddings) {
|
|
// This instance is not configured to process embeddings
|
|
log.info("Skipping batch indexing as this instance is not configured to process embeddings");
|
|
return false;
|
|
}
|
|
|
|
// Process the embedding queue
|
|
await vectorStore.processEmbeddingQueue();
|
|
|
|
return true;
|
|
} catch (error: any) {
|
|
log.error(`Error in batch indexing: ${error.message || "Unknown error"}`);
|
|
return false;
|
|
} finally {
|
|
this.indexingInProgress = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the current indexing statistics
|
|
*/
|
|
async getIndexingStats() {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
const stats = await vectorStore.getEmbeddingStats();
|
|
|
|
return {
|
|
...stats,
|
|
isIndexing: this.indexingInProgress,
|
|
automaticIndexingEnabled: !!this.automaticIndexingInterval
|
|
};
|
|
} catch (error: any) {
|
|
log.error(`Error getting indexing stats: ${error.message || "Unknown error"}`);
|
|
return {
|
|
totalNotesCount: 0,
|
|
embeddedNotesCount: 0,
|
|
queuedNotesCount: 0,
|
|
failedNotesCount: 0,
|
|
percentComplete: 0,
|
|
isIndexing: this.indexingInProgress,
|
|
automaticIndexingEnabled: !!this.automaticIndexingInterval,
|
|
error: error.message || "Unknown error"
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get information about failed embedding attempts
|
|
*/
|
|
async getFailedIndexes(limit = 100) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
return await vectorStore.getFailedEmbeddingNotes(limit);
|
|
} catch (error: any) {
|
|
log.error(`Error getting failed indexes: ${error.message || "Unknown error"}`);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retry indexing a specific note that previously failed
|
|
*/
|
|
async retryFailedNote(noteId: string) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
return await vectorStore.retryFailedEmbedding(noteId);
|
|
} catch (error: any) {
|
|
log.error(`Error retrying failed note ${noteId}: ${error.message || "Unknown error"}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retry all failed indexing operations
|
|
*/
|
|
async retryAllFailedNotes() {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
const count = await vectorStore.retryAllFailedEmbeddings();
|
|
log.info(`Queued ${count} failed notes for retry`);
|
|
return count;
|
|
} catch (error: any) {
|
|
log.error(`Error retrying all failed notes: ${error.message || "Unknown error"}`);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find semantically similar notes to a given query
|
|
* @param query - Text query to find similar notes for
|
|
* @param contextNoteId - Optional note ID to restrict search to a branch
|
|
* @param limit - Maximum number of results to return
|
|
*/
|
|
async findSimilarNotes(
|
|
query: string,
|
|
contextNoteId?: string,
|
|
limit = 10
|
|
) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
// Get all enabled embedding providers
|
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
|
if (!providers || providers.length === 0) {
|
|
throw new Error("No embedding providers available");
|
|
}
|
|
|
|
// Get the selected embedding provider
|
|
const options = (await import('../options.js')).default;
|
|
const selectedEmbeddingProvider = await options.getOption('embeddingSelectedProvider');
|
|
let provider;
|
|
|
|
if (selectedEmbeddingProvider) {
|
|
// Try to use the selected provider
|
|
const enabledProviders = await providerManager.getEnabledEmbeddingProviders();
|
|
provider = enabledProviders.find(p => p.name === selectedEmbeddingProvider);
|
|
|
|
if (!provider) {
|
|
log.info(`Selected embedding provider ${selectedEmbeddingProvider} is not available, using first enabled provider`);
|
|
// Fall back to first enabled provider
|
|
provider = providers[0];
|
|
}
|
|
} else {
|
|
// No provider selected, use first available provider
|
|
log.info('No embedding provider selected, using first available provider');
|
|
provider = providers[0];
|
|
}
|
|
|
|
if (!provider) {
|
|
throw new Error("No suitable embedding provider found");
|
|
}
|
|
|
|
log.info(`Searching with embedding provider: ${provider.name}, model: ${provider.getConfig().model}`);
|
|
|
|
// Generate embedding for the query
|
|
const embedding = await provider.generateEmbeddings(query);
|
|
log.info(`Generated embedding for query: "${query}" (${embedding.length} dimensions)`);
|
|
|
|
// Add the original query as a property to the embedding
|
|
// This is used for title matching in the vector search
|
|
Object.defineProperty(embedding, 'originalQuery', {
|
|
value: query,
|
|
writable: false,
|
|
enumerable: true,
|
|
configurable: false
|
|
});
|
|
|
|
// Store query text in a global cache for possible regeneration with different providers
|
|
// Use a type declaration to avoid TypeScript errors
|
|
interface CustomGlobal {
|
|
recentEmbeddingQueries?: Record<string, string>;
|
|
}
|
|
const globalWithCache = global as unknown as CustomGlobal;
|
|
|
|
if (!globalWithCache.recentEmbeddingQueries) {
|
|
globalWithCache.recentEmbeddingQueries = {};
|
|
}
|
|
|
|
// Use a substring of the embedding as a key (full embedding is too large)
|
|
const embeddingKey = embedding.toString().substring(0, 100);
|
|
globalWithCache.recentEmbeddingQueries[embeddingKey] = query;
|
|
|
|
// Limit cache size to prevent memory leaks (keep max 50 recent queries)
|
|
const keys = Object.keys(globalWithCache.recentEmbeddingQueries);
|
|
if (keys.length > 50) {
|
|
delete globalWithCache.recentEmbeddingQueries[keys[0]];
|
|
}
|
|
|
|
// Get Note IDs to search, optionally filtered by branch
|
|
let similarNotes: { noteId: string; title: string; similarity: number; contentType?: string }[] = [];
|
|
|
|
// Check if we need to restrict search to a specific branch
|
|
if (contextNoteId) {
|
|
const note = becca.getNote(contextNoteId);
|
|
if (!note) {
|
|
throw new Error(`Context note ${contextNoteId} not found`);
|
|
}
|
|
|
|
// Get all note IDs in the branch
|
|
const branchNoteIds = new Set<string>();
|
|
const collectNoteIds = (noteId: string) => {
|
|
branchNoteIds.add(noteId);
|
|
const note = becca.getNote(noteId);
|
|
if (note) {
|
|
for (const childNote of note.getChildNotes()) {
|
|
if (!branchNoteIds.has(childNote.noteId)) {
|
|
collectNoteIds(childNote.noteId);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
collectNoteIds(contextNoteId);
|
|
|
|
// Get embeddings for all notes in the branch
|
|
const config = provider.getConfig();
|
|
|
|
// Import the ContentType detection from vector utils
|
|
const { ContentType, detectContentType, cosineSimilarity } = await import('./embeddings/vector_utils.js');
|
|
|
|
for (const noteId of branchNoteIds) {
|
|
const noteEmbedding = await vectorStore.getEmbeddingForNote(
|
|
noteId,
|
|
provider.name,
|
|
config.model
|
|
);
|
|
|
|
if (noteEmbedding) {
|
|
// Get the note to determine its content type
|
|
const note = becca.getNote(noteId);
|
|
if (note) {
|
|
// Detect content type from mime type
|
|
const contentType = detectContentType(note.mime, '');
|
|
|
|
// Use content-aware similarity calculation
|
|
const similarity = cosineSimilarity(
|
|
embedding,
|
|
noteEmbedding.embedding,
|
|
true, // normalize
|
|
config.model, // source model
|
|
noteEmbedding.providerId, // target model (use providerId)
|
|
contentType, // content type for padding strategy
|
|
undefined // use default BALANCED performance profile
|
|
);
|
|
|
|
if (similarity >= this.defaultSimilarityThreshold) {
|
|
similarNotes.push({
|
|
noteId,
|
|
title: note.title,
|
|
similarity,
|
|
contentType: contentType.toString()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by similarity and return top results
|
|
return similarNotes
|
|
.sort((a, b) => b.similarity - a.similarity)
|
|
.slice(0, limit);
|
|
} else {
|
|
// Search across all notes
|
|
const config = provider.getConfig();
|
|
const results = await vectorStore.findSimilarNotes(
|
|
embedding,
|
|
provider.name,
|
|
config.model,
|
|
limit,
|
|
this.defaultSimilarityThreshold
|
|
);
|
|
|
|
// Enhance results with note titles
|
|
similarNotes = results.map(result => {
|
|
const note = becca.getNote(result.noteId);
|
|
return {
|
|
noteId: result.noteId,
|
|
title: note ? note.title : 'Unknown Note',
|
|
similarity: result.similarity,
|
|
contentType: result.contentType
|
|
};
|
|
});
|
|
|
|
return similarNotes;
|
|
}
|
|
} catch (error: any) {
|
|
log.error(`Error finding similar notes: ${error.message || "Unknown error"}`);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate context for an LLM query based on relevance to the user's question
|
|
* @param query - The user's question
|
|
* @param contextNoteId - Optional ID of a note to use as context root
|
|
* @param depth - Depth of context to include (1-4)
|
|
*/
|
|
async generateQueryContext(
|
|
query: string,
|
|
contextNoteId?: string,
|
|
depth = 2
|
|
) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
// Find similar notes to the query
|
|
const similarNotes = await this.findSimilarNotes(
|
|
query,
|
|
contextNoteId,
|
|
this.maxNotesPerQuery
|
|
);
|
|
|
|
if (similarNotes.length === 0) {
|
|
return CONTEXT_PROMPTS.INDEX_NO_NOTES_CONTEXT;
|
|
}
|
|
|
|
// Build context from the similar notes
|
|
let context = `I found some relevant information in your notes that may help answer: "${query}"\n\n`;
|
|
|
|
for (const note of similarNotes) {
|
|
const noteObj = becca.getNote(note.noteId);
|
|
if (!noteObj) continue;
|
|
|
|
context += `## ${noteObj.title}\n`;
|
|
|
|
// Add parent context for better understanding
|
|
const parents = noteObj.getParentNotes();
|
|
if (parents.length > 0) {
|
|
context += `Path: ${parents.map(p => p.title).join(' > ')}\n`;
|
|
}
|
|
|
|
// Add content based on depth
|
|
if (depth >= 2) {
|
|
const content = await this.contextExtractor.getNoteContent(note.noteId);
|
|
if (content) {
|
|
// For larger content, use summary
|
|
if (content.length > 2000) {
|
|
const summary = await this.contextExtractor.summarizeContent(content, noteObj.title);
|
|
context += `${summary}\n[Content summarized due to length]\n\n`;
|
|
} else {
|
|
context += `${content}\n\n`;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add child note titles for more context if depth >= 3
|
|
if (depth >= 3) {
|
|
const childNotes = noteObj.getChildNotes();
|
|
if (childNotes.length > 0) {
|
|
context += `Child notes: ${childNotes.slice(0, 5).map(n => n.title).join(', ')}`;
|
|
if (childNotes.length > 5) {
|
|
context += ` and ${childNotes.length - 5} more`;
|
|
}
|
|
context += `\n\n`;
|
|
}
|
|
}
|
|
|
|
// Add attribute context for even deeper understanding if depth >= 4
|
|
if (depth >= 4) {
|
|
const attributes = noteObj.getOwnedAttributes();
|
|
if (attributes.length > 0) {
|
|
const relevantAttrs = attributes.filter(a =>
|
|
!a.name.startsWith('_') && !a.name.startsWith('child:') && !a.name.startsWith('relation:')
|
|
);
|
|
|
|
if (relevantAttrs.length > 0) {
|
|
context += `Attributes: ${relevantAttrs.map(a =>
|
|
`${a.type === 'label' ? '#' : '~'}${a.name}${a.value ? '=' + a.value : ''}`
|
|
).join(', ')}\n\n`;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add instructions about how to reference the notes
|
|
context += "When referring to information from these notes in your response, please cite them by their titles " +
|
|
"(e.g., \"According to your note on [Title]...\"). If the information doesn't contain what you need, " +
|
|
"just say so and use your general knowledge instead.";
|
|
|
|
return context;
|
|
} catch (error: any) {
|
|
log.error(`Error generating query context: ${error.message || "Unknown error"}`);
|
|
return "I'm an AI assistant helping with your Trilium notes. I encountered an error while retrieving context from your notes, but I'll try to assist based on general knowledge.";
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if this instance is a sync server and should generate embeddings
|
|
*/
|
|
async isSyncServerForEmbeddings() {
|
|
// Check if this is a sync server (no syncServerHost means this is a sync server)
|
|
const syncServerHost = await options.getOption('syncServerHost');
|
|
const isSyncServer = !syncServerHost;
|
|
|
|
// Check if embedding generation should happen on the sync server
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const shouldGenerateOnSyncServer = embeddingLocation === 'sync_server';
|
|
|
|
return isSyncServer && shouldGenerateOnSyncServer;
|
|
}
|
|
|
|
/**
|
|
* Generate a comprehensive index entry for a note
|
|
* This prepares all metadata and contexts for optimal LLM retrieval
|
|
*/
|
|
async generateNoteIndex(noteId: string) {
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
try {
|
|
const note = becca.getNote(noteId);
|
|
if (!note) {
|
|
throw new Error(`Note ${noteId} not found`);
|
|
}
|
|
|
|
// Check if this note is excluded from AI features
|
|
if (isNoteExcludedFromAI(note)) {
|
|
log.info(`Note ${noteId} (${note.title}) excluded from AI indexing due to exclusion label`);
|
|
return true; // Return true to indicate successful handling (exclusion is intentional)
|
|
}
|
|
|
|
// Check where embedding generation should happen
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
|
|
// If embedding generation should happen on the sync server and we're not the sync server,
|
|
// just queue the note for embedding but don't generate it
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldSkipGeneration = embeddingLocation === 'sync_server' && !isSyncServer;
|
|
|
|
if (shouldSkipGeneration) {
|
|
// We're not the sync server, so just queue the note for embedding
|
|
// The sync server will handle the actual embedding generation
|
|
log.info(`Note ${noteId} queued for embedding generation on sync server`);
|
|
return true;
|
|
}
|
|
|
|
// Get complete note context for indexing
|
|
const context = await vectorStore.getNoteEmbeddingContext(noteId);
|
|
|
|
// Queue note for embedding with all available providers
|
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
|
for (const provider of providers) {
|
|
try {
|
|
const embedding = await provider.generateNoteEmbeddings(context);
|
|
if (embedding) {
|
|
const config = provider.getConfig();
|
|
await vectorStore.storeNoteEmbedding(
|
|
noteId,
|
|
provider.name,
|
|
config.model,
|
|
embedding
|
|
);
|
|
}
|
|
} catch (error: any) {
|
|
log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${error.message || "Unknown error"}`);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
} catch (error: any) {
|
|
log.error(`Error generating note index for ${noteId}: ${error.message || "Unknown error"}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start embedding generation (called when AI is enabled)
|
|
*/
|
|
async startEmbeddingGeneration() {
|
|
try {
|
|
log.info("Starting embedding generation system");
|
|
|
|
const aiEnabled = options.getOptionOrNull('aiEnabled') === "true";
|
|
if (!aiEnabled) {
|
|
log.error("Cannot start embedding generation - AI features are disabled");
|
|
throw new Error("AI features must be enabled first");
|
|
}
|
|
|
|
// Re-initialize if needed
|
|
if (!this.initialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
// Check if this instance should process embeddings
|
|
const embeddingLocation = await options.getOption('embeddingGenerationLocation') || 'client';
|
|
const isSyncServer = await this.isSyncServerForEmbeddings();
|
|
const shouldProcessEmbeddings = embeddingLocation === 'client' || isSyncServer;
|
|
|
|
if (!shouldProcessEmbeddings) {
|
|
log.info("This instance is not configured to process embeddings");
|
|
return;
|
|
}
|
|
|
|
// Verify providers are available (this will create them on-demand if needed)
|
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
|
if (providers.length === 0) {
|
|
throw new Error("No embedding providers available");
|
|
}
|
|
log.info(`Found ${providers.length} embedding providers: ${providers.map(p => p.name).join(', ')}`);
|
|
|
|
// Setup automatic indexing if enabled
|
|
if (await options.getOptionBool('embeddingAutoUpdateEnabled')) {
|
|
this.setupAutomaticIndexing();
|
|
log.info(`Automatic embedding indexing started ${isSyncServer ? 'as sync server' : 'as client'}`);
|
|
}
|
|
|
|
// Re-initialize event listeners
|
|
this.setupEventListeners();
|
|
|
|
// Start processing the queue immediately
|
|
await this.runBatchIndexing(20);
|
|
|
|
log.info("Embedding generation started successfully");
|
|
} catch (error: any) {
|
|
log.error(`Error starting embedding generation: ${error.message || "Unknown error"}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stop embedding generation (called when AI is disabled)
|
|
*/
|
|
async stopEmbeddingGeneration() {
|
|
try {
|
|
log.info("Stopping embedding generation system");
|
|
|
|
// Clear automatic indexing interval
|
|
if (this.automaticIndexingInterval) {
|
|
clearInterval(this.automaticIndexingInterval);
|
|
this.automaticIndexingInterval = undefined;
|
|
log.info("Automatic indexing stopped");
|
|
}
|
|
|
|
// Stop the background processing from embeddings/events.ts
|
|
vectorStore.stopEmbeddingBackgroundProcessing();
|
|
|
|
// Clear all embedding providers to clean up resources
|
|
providerManager.clearAllEmbeddingProviders();
|
|
|
|
// Mark as not indexing
|
|
this.indexingInProgress = false;
|
|
this.indexRebuildInProgress = false;
|
|
|
|
log.info("Embedding generation stopped successfully");
|
|
} catch (error: any) {
|
|
log.error(`Error stopping embedding generation: ${error.message || "Unknown error"}`);
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create singleton instance
|
|
const indexService = new IndexService();
|
|
export default indexService;
|