mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-11-02 14:12:42 +08:00
create llm index service
This commit is contained in:
parent
3f37196fe7
commit
730d123802
@ -30,6 +30,10 @@ sql_init.initializeDb();
|
|||||||
const { initializeEmbeddings } = await import("./services/llm/embeddings/init.js");
|
const { initializeEmbeddings } = await import("./services/llm/embeddings/init.js");
|
||||||
await initializeEmbeddings();
|
await initializeEmbeddings();
|
||||||
|
|
||||||
|
// Initialize the index service for LLM functionality
|
||||||
|
const { default: indexService } = await import("./services/llm/index_service.js");
|
||||||
|
await indexService.initialize().catch(e => console.error("Failed to initialize index service:", e));
|
||||||
|
|
||||||
// view engine setup
|
// view engine setup
|
||||||
app.set("views", path.join(scriptDir, "views"));
|
app.set("views", path.join(scriptDir, "views"));
|
||||||
app.set("view engine", "ejs");
|
app.set("view engine", "ejs");
|
||||||
|
|||||||
@ -11,6 +11,8 @@ import type { Message, ChatCompletionOptions } from "../../services/llm/ai_inter
|
|||||||
import * as aiServiceManagerModule from "../../services/llm/ai_service_manager.js";
|
import * as aiServiceManagerModule from "../../services/llm/ai_service_manager.js";
|
||||||
import triliumContextService from "../../services/llm/trilium_context_service.js";
|
import triliumContextService from "../../services/llm/trilium_context_service.js";
|
||||||
import sql from "../../services/sql.js";
|
import sql from "../../services/sql.js";
|
||||||
|
// Import the index service for knowledge base management
|
||||||
|
import indexService from "../../services/llm/index_service.js";
|
||||||
|
|
||||||
// LLM service constants
|
// LLM service constants
|
||||||
export const LLM_CONSTANTS = {
|
export const LLM_CONSTANTS = {
|
||||||
@ -885,11 +887,238 @@ async function sendMessage(req: Request, res: Response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get statistics about the knowledge base indexing
|
||||||
|
*/
|
||||||
|
async function getIndexStats(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const stats = await indexService.getIndexingStats();
|
||||||
|
return stats;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error getting index stats: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to get index stats: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start or update knowledge base indexing
|
||||||
|
*/
|
||||||
|
async function startIndexing(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { force, batchSize } = req.body || {};
|
||||||
|
|
||||||
|
let result;
|
||||||
|
if (batchSize) {
|
||||||
|
// Run a limited batch indexing
|
||||||
|
result = await indexService.runBatchIndexing(batchSize);
|
||||||
|
return {
|
||||||
|
success: result,
|
||||||
|
message: result ? `Batch indexing started with size ${batchSize}` : 'Indexing already in progress'
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// Start full indexing
|
||||||
|
result = await indexService.startFullIndexing(force);
|
||||||
|
return {
|
||||||
|
success: result,
|
||||||
|
message: result ? 'Full indexing started' : 'Indexing already in progress or not needed'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error starting indexing: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to start indexing: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get failed indexing attempts
|
||||||
|
*/
|
||||||
|
async function getFailedIndexes(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string, 10) : 100;
|
||||||
|
const failedNotes = await indexService.getFailedIndexes(limit);
|
||||||
|
|
||||||
|
return {
|
||||||
|
count: failedNotes.length,
|
||||||
|
failedNotes
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error getting failed indexes: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to get failed indexes: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry failed indexing operation
|
||||||
|
*/
|
||||||
|
async function retryFailedIndex(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { noteId } = req.params;
|
||||||
|
if (!noteId) {
|
||||||
|
throw new Error('Note ID is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const success = await indexService.retryFailedNote(noteId);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success,
|
||||||
|
message: success ? `Note ${noteId} queued for retry` : `Note ${noteId} not found in failed queue`
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error retrying failed index: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to retry index: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry all failed indexing operations
|
||||||
|
*/
|
||||||
|
async function retryAllFailedIndexes(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const count = await indexService.retryAllFailedNotes();
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
count,
|
||||||
|
message: `${count} notes queued for retry`
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error retrying all failed indexes: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to retry indexes: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find similar notes based on query
|
||||||
|
*/
|
||||||
|
async function findSimilarNotes(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { query, contextNoteId, limit } = req.body || {};
|
||||||
|
|
||||||
|
if (!query || typeof query !== 'string' || query.trim().length === 0) {
|
||||||
|
throw new Error('Query is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const similarNotes = await indexService.findSimilarNotes(
|
||||||
|
query,
|
||||||
|
contextNoteId,
|
||||||
|
limit || 10
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
count: similarNotes.length,
|
||||||
|
similarNotes
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error finding similar notes: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to find similar notes: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate context for an LLM query
|
||||||
|
*/
|
||||||
|
async function generateQueryContext(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { query, contextNoteId, depth } = req.body || {};
|
||||||
|
|
||||||
|
if (!query || typeof query !== 'string' || query.trim().length === 0) {
|
||||||
|
throw new Error('Query is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const context = await indexService.generateQueryContext(
|
||||||
|
query,
|
||||||
|
contextNoteId,
|
||||||
|
depth || 2
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
context,
|
||||||
|
length: context.length
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error generating query context: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to generate query context: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Index a specific note
|
||||||
|
*/
|
||||||
|
async function indexNote(req: Request, res: Response) {
|
||||||
|
try {
|
||||||
|
if (!isDatabaseInitialized()) {
|
||||||
|
throw new Error('Database is not initialized yet');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { noteId } = req.params;
|
||||||
|
if (!noteId) {
|
||||||
|
throw new Error('Note ID is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if note exists
|
||||||
|
const note = becca.getNote(noteId);
|
||||||
|
if (!note) {
|
||||||
|
throw new Error(`Note ${noteId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const success = await indexService.generateNoteIndex(noteId);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success,
|
||||||
|
noteId,
|
||||||
|
noteTitle: note.title,
|
||||||
|
message: success ? `Note "${note.title}" indexed successfully` : `Failed to index note "${note.title}"`
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error indexing note: ${error.message || 'Unknown error'}`);
|
||||||
|
throw new Error(`Failed to index note: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
|
// Chat session management
|
||||||
createSession,
|
createSession,
|
||||||
getSession,
|
getSession,
|
||||||
updateSession,
|
updateSession,
|
||||||
listSessions,
|
listSessions,
|
||||||
deleteSession,
|
deleteSession,
|
||||||
sendMessage
|
sendMessage,
|
||||||
|
|
||||||
|
// Knowledge base index management
|
||||||
|
getIndexStats,
|
||||||
|
startIndexing,
|
||||||
|
getFailedIndexes,
|
||||||
|
retryFailedIndex,
|
||||||
|
retryAllFailedIndexes,
|
||||||
|
findSimilarNotes,
|
||||||
|
generateQueryContext,
|
||||||
|
indexNote
|
||||||
};
|
};
|
||||||
|
|||||||
@ -384,6 +384,7 @@ function register(app: express.Application) {
|
|||||||
apiRoute(PST, "/api/embeddings/retry/:noteId", embeddingsRoute.retryFailedNote);
|
apiRoute(PST, "/api/embeddings/retry/:noteId", embeddingsRoute.retryFailedNote);
|
||||||
apiRoute(PST, "/api/embeddings/retry-all-failed", embeddingsRoute.retryAllFailedNotes);
|
apiRoute(PST, "/api/embeddings/retry-all-failed", embeddingsRoute.retryAllFailedNotes);
|
||||||
|
|
||||||
|
// LLM chat session management endpoints
|
||||||
apiRoute(PST, "/api/llm/sessions", llmRoute.createSession);
|
apiRoute(PST, "/api/llm/sessions", llmRoute.createSession);
|
||||||
apiRoute(GET, "/api/llm/sessions", llmRoute.listSessions);
|
apiRoute(GET, "/api/llm/sessions", llmRoute.listSessions);
|
||||||
apiRoute(GET, "/api/llm/sessions/:sessionId", llmRoute.getSession);
|
apiRoute(GET, "/api/llm/sessions/:sessionId", llmRoute.getSession);
|
||||||
@ -391,6 +392,16 @@ function register(app: express.Application) {
|
|||||||
apiRoute(DEL, "/api/llm/sessions/:sessionId", llmRoute.deleteSession);
|
apiRoute(DEL, "/api/llm/sessions/:sessionId", llmRoute.deleteSession);
|
||||||
apiRoute(PST, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage);
|
apiRoute(PST, "/api/llm/sessions/:sessionId/messages", llmRoute.sendMessage);
|
||||||
route(GET, "/api/llm/sessions/:sessionId/messages", [auth.checkApiAuth, csrfMiddleware], llmRoute.sendMessage, apiResultHandler);
|
route(GET, "/api/llm/sessions/:sessionId/messages", [auth.checkApiAuth, csrfMiddleware], llmRoute.sendMessage, apiResultHandler);
|
||||||
|
|
||||||
|
// LLM index management endpoints
|
||||||
|
apiRoute(GET, "/api/llm/index/stats", llmRoute.getIndexStats);
|
||||||
|
apiRoute(PST, "/api/llm/index/start", llmRoute.startIndexing);
|
||||||
|
apiRoute(GET, "/api/llm/index/failed", llmRoute.getFailedIndexes);
|
||||||
|
apiRoute(PST, "/api/llm/index/retry/:noteId", llmRoute.retryFailedIndex);
|
||||||
|
apiRoute(PST, "/api/llm/index/retry-all", llmRoute.retryAllFailedIndexes);
|
||||||
|
apiRoute(PST, "/api/llm/index/similar", llmRoute.findSimilarNotes);
|
||||||
|
apiRoute(PST, "/api/llm/index/context", llmRoute.generateQueryContext);
|
||||||
|
apiRoute(PST, "/api/llm/index/notes/:noteId", llmRoute.indexNote);
|
||||||
|
|
||||||
// Ollama API endpoints
|
// Ollama API endpoints
|
||||||
route(PST, "/api/ollama/list-models", [auth.checkApiAuth, csrfMiddleware], ollamaRoute.listModels, apiResultHandler);
|
route(PST, "/api/ollama/list-models", [auth.checkApiAuth, csrfMiddleware], ollamaRoute.listModels, apiResultHandler);
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import { OllamaService } from './providers/ollama_service.js';
|
|||||||
import log from '../log.js';
|
import log from '../log.js';
|
||||||
import { ContextExtractor } from './context/index.js';
|
import { ContextExtractor } from './context/index.js';
|
||||||
import semanticContextService from './semantic_context_service.js';
|
import semanticContextService from './semantic_context_service.js';
|
||||||
|
import indexService from './index_service.js';
|
||||||
|
|
||||||
type ServiceProviders = 'openai' | 'anthropic' | 'ollama';
|
type ServiceProviders = 'openai' | 'anthropic' | 'ollama';
|
||||||
|
|
||||||
@ -181,6 +182,14 @@ export class AIServiceManager {
|
|||||||
getSemanticContextService() {
|
getSemanticContextService() {
|
||||||
return semanticContextService;
|
return semanticContextService;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the index service for managing knowledge base indexing
|
||||||
|
* @returns The index service instance
|
||||||
|
*/
|
||||||
|
getIndexService() {
|
||||||
|
return indexService;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't create singleton immediately, use a lazy-loading pattern
|
// Don't create singleton immediately, use a lazy-loading pattern
|
||||||
@ -208,12 +217,15 @@ export default {
|
|||||||
async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise<ChatResponse> {
|
async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise<ChatResponse> {
|
||||||
return getInstance().generateChatCompletion(messages, options);
|
return getInstance().generateChatCompletion(messages, options);
|
||||||
},
|
},
|
||||||
// Add our new methods
|
// Context and index related methods
|
||||||
getContextExtractor() {
|
getContextExtractor() {
|
||||||
return getInstance().getContextExtractor();
|
return getInstance().getContextExtractor();
|
||||||
},
|
},
|
||||||
getSemanticContextService() {
|
getSemanticContextService() {
|
||||||
return getInstance().getSemanticContextService();
|
return getInstance().getSemanticContextService();
|
||||||
|
},
|
||||||
|
getIndexService() {
|
||||||
|
return getInstance().getIndexService();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
585
src/services/llm/index_service.ts
Normal file
585
src/services/llm/index_service.ts
Normal file
@ -0,0 +1,585 @@
|
|||||||
|
/**
|
||||||
|
* LLM Index Service
|
||||||
|
*
|
||||||
|
* Centralized service for managing knowledge base indexing for LLM features.
|
||||||
|
* This service coordinates:
|
||||||
|
* - Note embedding generation and management
|
||||||
|
* - Smart context retrieval for LLM queries
|
||||||
|
* - Progressive indexing of the knowledge base
|
||||||
|
* - Optimization of the semantic search capabilities
|
||||||
|
*/
|
||||||
|
|
||||||
|
import log from "../log.js";
|
||||||
|
import options from "../options.js";
|
||||||
|
import becca from "../../becca/becca.js";
|
||||||
|
import vectorStore from "./embeddings/vector_store.js";
|
||||||
|
import providerManager from "./embeddings/providers.js";
|
||||||
|
import { ContextExtractor } from "./context/index.js";
|
||||||
|
import eventService from "../events.js";
|
||||||
|
import type { NoteEmbeddingContext } from "./embeddings/embeddings_interface.js";
|
||||||
|
import type { OptionDefinitions } from "../options_interface.js";
|
||||||
|
|
||||||
|
class IndexService {
|
||||||
|
private initialized = false;
|
||||||
|
private indexingInProgress = false;
|
||||||
|
private contextExtractor = new ContextExtractor();
|
||||||
|
private automaticIndexingInterval?: NodeJS.Timeout;
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
private defaultQueryDepth = 2;
|
||||||
|
private maxNotesPerQuery = 10;
|
||||||
|
private defaultSimilarityThreshold = 0.65;
|
||||||
|
private indexUpdateInterval = 3600000; // 1 hour in milliseconds
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the index service
|
||||||
|
*/
|
||||||
|
async initialize() {
|
||||||
|
if (this.initialized) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const aiEnabled = await options.getOptionBool('aiEnabled');
|
||||||
|
if (!aiEnabled) {
|
||||||
|
log.info("Index service: AI features disabled, skipping initialization");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if embedding system is ready
|
||||||
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
||||||
|
if (!providers || providers.length === 0) {
|
||||||
|
throw new Error("No embedding providers available");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup automatic indexing if enabled
|
||||||
|
if (await options.getOptionBool('embeddingAutoUpdate')) {
|
||||||
|
this.setupAutomaticIndexing();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Listen for note changes to update index
|
||||||
|
this.setupEventListeners();
|
||||||
|
|
||||||
|
this.initialized = true;
|
||||||
|
log.info("Index service initialized successfully");
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error initializing index service: ${error.message || "Unknown error"}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setup event listeners for index updates
|
||||||
|
*/
|
||||||
|
private setupEventListeners() {
|
||||||
|
// Listen for note content changes
|
||||||
|
eventService.subscribe(eventService.NOTE_CONTENT_CHANGE, ({ entity }) => {
|
||||||
|
if (entity && entity.noteId) {
|
||||||
|
this.queueNoteForIndexing(entity.noteId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Listen for new notes
|
||||||
|
eventService.subscribe(eventService.ENTITY_CREATED, ({ entityName, entity }) => {
|
||||||
|
if (entityName === "notes" && entity && entity.noteId) {
|
||||||
|
this.queueNoteForIndexing(entity.noteId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Listen for note title changes
|
||||||
|
eventService.subscribe(eventService.NOTE_TITLE_CHANGED, ({ noteId }) => {
|
||||||
|
if (noteId) {
|
||||||
|
this.queueNoteForIndexing(noteId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Listen for changes in AI settings
|
||||||
|
eventService.subscribe(eventService.ENTITY_CHANGED, ({ entityName, entity }) => {
|
||||||
|
if (entityName === "options" && entity && entity.name) {
|
||||||
|
if (entity.name.startsWith('ai') || entity.name.startsWith('embedding')) {
|
||||||
|
log.info("AI settings changed, updating index service configuration");
|
||||||
|
this.updateConfiguration();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set up automatic indexing of notes
|
||||||
|
*/
|
||||||
|
private setupAutomaticIndexing() {
|
||||||
|
// Clear existing interval if any
|
||||||
|
if (this.automaticIndexingInterval) {
|
||||||
|
clearInterval(this.automaticIndexingInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new interval
|
||||||
|
this.automaticIndexingInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
if (!this.indexingInProgress) {
|
||||||
|
const stats = await vectorStore.getEmbeddingStats();
|
||||||
|
|
||||||
|
// Only run automatic indexing if we're below 95% completion
|
||||||
|
if (stats.percentComplete < 95) {
|
||||||
|
log.info(`Starting automatic indexing (current completion: ${stats.percentComplete}%)`);
|
||||||
|
await this.runBatchIndexing(50); // Process 50 notes at a time
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error in automatic indexing: ${error.message || "Unknown error"}`);
|
||||||
|
}
|
||||||
|
}, this.indexUpdateInterval);
|
||||||
|
|
||||||
|
log.info("Automatic indexing scheduled");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update service configuration from options
|
||||||
|
*/
|
||||||
|
private async updateConfiguration() {
|
||||||
|
try {
|
||||||
|
// Update indexing interval
|
||||||
|
const intervalMs = parseInt(await options.getOption('embeddingUpdateInterval') || '3600000', 10);
|
||||||
|
this.indexUpdateInterval = intervalMs;
|
||||||
|
|
||||||
|
// Update automatic indexing setting
|
||||||
|
const autoIndexing = await options.getOptionBool('embeddingAutoUpdate');
|
||||||
|
if (autoIndexing && !this.automaticIndexingInterval) {
|
||||||
|
this.setupAutomaticIndexing();
|
||||||
|
} else if (!autoIndexing && this.automaticIndexingInterval) {
|
||||||
|
clearInterval(this.automaticIndexingInterval);
|
||||||
|
this.automaticIndexingInterval = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update similarity threshold
|
||||||
|
const similarityThreshold = await options.getOption('embeddingSimilarityThreshold');
|
||||||
|
this.defaultSimilarityThreshold = parseFloat(similarityThreshold || '0.65');
|
||||||
|
|
||||||
|
// Update max notes per query
|
||||||
|
const maxNotesPerQuery = await options.getOption('maxNotesPerLlmQuery');
|
||||||
|
this.maxNotesPerQuery = parseInt(maxNotesPerQuery || '10', 10);
|
||||||
|
|
||||||
|
log.info("Index service configuration updated");
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error updating index service configuration: ${error.message || "Unknown error"}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue a note for indexing
|
||||||
|
*/
|
||||||
|
async queueNoteForIndexing(noteId: string, priority = false) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await vectorStore.queueNoteForEmbedding(noteId, 'UPDATE');
|
||||||
|
return true;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error queueing note ${noteId} for indexing: ${error.message || "Unknown error"}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start full knowledge base indexing
|
||||||
|
* @param force - Whether to force reindexing of all notes
|
||||||
|
*/
|
||||||
|
async startFullIndexing(force = false) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.indexingInProgress) {
|
||||||
|
throw new Error("Indexing already in progress");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.indexingInProgress = true;
|
||||||
|
|
||||||
|
if (force) {
|
||||||
|
// Force reindexing of all notes
|
||||||
|
await vectorStore.reprocessAllNotes();
|
||||||
|
log.info("Forced reindexing of all notes initiated");
|
||||||
|
} else {
|
||||||
|
// Check current stats
|
||||||
|
const stats = await vectorStore.getEmbeddingStats();
|
||||||
|
|
||||||
|
// Only start indexing if we're below 90% completion
|
||||||
|
if (stats.percentComplete < 90) {
|
||||||
|
await vectorStore.reprocessAllNotes();
|
||||||
|
log.info("Full indexing initiated");
|
||||||
|
} else {
|
||||||
|
log.info(`Skipping full indexing, already at ${stats.percentComplete}% completion`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error starting full indexing: ${error.message || "Unknown error"}`);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
this.indexingInProgress = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a batch indexing job for a limited number of notes
|
||||||
|
* @param batchSize - Maximum number of notes to process
|
||||||
|
*/
|
||||||
|
async runBatchIndexing(batchSize = 20) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.indexingInProgress) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.indexingInProgress = true;
|
||||||
|
|
||||||
|
// Process the embedding queue
|
||||||
|
await vectorStore.processEmbeddingQueue();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error in batch indexing: ${error.message || "Unknown error"}`);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
this.indexingInProgress = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current indexing statistics
|
||||||
|
*/
|
||||||
|
async getIndexingStats() {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stats = await vectorStore.getEmbeddingStats();
|
||||||
|
|
||||||
|
return {
|
||||||
|
...stats,
|
||||||
|
isIndexing: this.indexingInProgress,
|
||||||
|
automaticIndexingEnabled: !!this.automaticIndexingInterval
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error getting indexing stats: ${error.message || "Unknown error"}`);
|
||||||
|
return {
|
||||||
|
totalNotesCount: 0,
|
||||||
|
embeddedNotesCount: 0,
|
||||||
|
queuedNotesCount: 0,
|
||||||
|
failedNotesCount: 0,
|
||||||
|
percentComplete: 0,
|
||||||
|
isIndexing: this.indexingInProgress,
|
||||||
|
automaticIndexingEnabled: !!this.automaticIndexingInterval,
|
||||||
|
error: error.message || "Unknown error"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get information about failed embedding attempts
|
||||||
|
*/
|
||||||
|
async getFailedIndexes(limit = 100) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await vectorStore.getFailedEmbeddingNotes(limit);
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error getting failed indexes: ${error.message || "Unknown error"}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry indexing a specific note that previously failed
|
||||||
|
*/
|
||||||
|
async retryFailedNote(noteId: string) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await vectorStore.retryFailedEmbedding(noteId);
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error retrying failed note ${noteId}: ${error.message || "Unknown error"}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry all failed indexing operations
|
||||||
|
*/
|
||||||
|
async retryAllFailedNotes() {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const count = await vectorStore.retryAllFailedEmbeddings();
|
||||||
|
log.info(`Queued ${count} failed notes for retry`);
|
||||||
|
return count;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error retrying all failed notes: ${error.message || "Unknown error"}`);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find semantically similar notes to a given query
|
||||||
|
* @param query - Text query to find similar notes for
|
||||||
|
* @param contextNoteId - Optional note ID to restrict search to a branch
|
||||||
|
* @param limit - Maximum number of results to return
|
||||||
|
*/
|
||||||
|
async findSimilarNotes(
|
||||||
|
query: string,
|
||||||
|
contextNoteId?: string,
|
||||||
|
limit = 10
|
||||||
|
) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
||||||
|
if (!providers || providers.length === 0) {
|
||||||
|
throw new Error("No embedding providers available");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the first enabled provider
|
||||||
|
const provider = providers[0];
|
||||||
|
|
||||||
|
// Generate embedding for the query
|
||||||
|
const embedding = await provider.generateEmbeddings(query);
|
||||||
|
|
||||||
|
// Check if we need to restrict search to a specific branch
|
||||||
|
if (contextNoteId) {
|
||||||
|
const note = becca.getNote(contextNoteId);
|
||||||
|
if (!note) {
|
||||||
|
throw new Error(`Context note ${contextNoteId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all note IDs in the branch
|
||||||
|
const branchNoteIds = new Set<string>();
|
||||||
|
const collectNoteIds = (noteId: string) => {
|
||||||
|
branchNoteIds.add(noteId);
|
||||||
|
const note = becca.getNote(noteId);
|
||||||
|
if (note) {
|
||||||
|
for (const childNote of note.getChildNotes()) {
|
||||||
|
if (!branchNoteIds.has(childNote.noteId)) {
|
||||||
|
collectNoteIds(childNote.noteId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
collectNoteIds(contextNoteId);
|
||||||
|
|
||||||
|
// Get embeddings for all notes in the branch
|
||||||
|
const similarNotes = [];
|
||||||
|
const config = provider.getConfig();
|
||||||
|
|
||||||
|
for (const noteId of branchNoteIds) {
|
||||||
|
const noteEmbedding = await vectorStore.getEmbeddingForNote(
|
||||||
|
noteId,
|
||||||
|
provider.name,
|
||||||
|
config.model
|
||||||
|
);
|
||||||
|
|
||||||
|
if (noteEmbedding) {
|
||||||
|
const similarity = vectorStore.cosineSimilarity(embedding, noteEmbedding.embedding);
|
||||||
|
if (similarity >= this.defaultSimilarityThreshold) {
|
||||||
|
const note = becca.getNote(noteId);
|
||||||
|
if (note) {
|
||||||
|
similarNotes.push({
|
||||||
|
noteId,
|
||||||
|
title: note.title,
|
||||||
|
similarity
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by similarity and return top results
|
||||||
|
return similarNotes
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, limit);
|
||||||
|
} else {
|
||||||
|
// Search across all notes
|
||||||
|
const config = provider.getConfig();
|
||||||
|
const similarNotes = await vectorStore.findSimilarNotes(
|
||||||
|
embedding,
|
||||||
|
provider.name,
|
||||||
|
config.model,
|
||||||
|
limit,
|
||||||
|
this.defaultSimilarityThreshold
|
||||||
|
);
|
||||||
|
|
||||||
|
// Enhance results with note titles
|
||||||
|
return similarNotes.map(result => {
|
||||||
|
const note = becca.getNote(result.noteId);
|
||||||
|
return {
|
||||||
|
noteId: result.noteId,
|
||||||
|
title: note ? note.title : 'Unknown Note',
|
||||||
|
similarity: result.similarity
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error finding similar notes: ${error.message || "Unknown error"}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate context for an LLM query based on relevance to the user's question
|
||||||
|
* @param query - The user's question
|
||||||
|
* @param contextNoteId - Optional ID of a note to use as context root
|
||||||
|
* @param depth - Depth of context to include (1-4)
|
||||||
|
*/
|
||||||
|
async generateQueryContext(
|
||||||
|
query: string,
|
||||||
|
contextNoteId?: string,
|
||||||
|
depth = 2
|
||||||
|
) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Find similar notes to the query
|
||||||
|
const similarNotes = await this.findSimilarNotes(
|
||||||
|
query,
|
||||||
|
contextNoteId,
|
||||||
|
this.maxNotesPerQuery
|
||||||
|
);
|
||||||
|
|
||||||
|
if (similarNotes.length === 0) {
|
||||||
|
return "I'm an AI assistant helping with your Trilium notes. I couldn't find specific notes related to your query, but I'll try to assist based on general knowledge.";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build context from the similar notes
|
||||||
|
let context = `I found some relevant information in your notes that may help answer: "${query}"\n\n`;
|
||||||
|
|
||||||
|
for (const note of similarNotes) {
|
||||||
|
const noteObj = becca.getNote(note.noteId);
|
||||||
|
if (!noteObj) continue;
|
||||||
|
|
||||||
|
context += `## ${noteObj.title}\n`;
|
||||||
|
|
||||||
|
// Add parent context for better understanding
|
||||||
|
const parents = noteObj.getParentNotes();
|
||||||
|
if (parents.length > 0) {
|
||||||
|
context += `Path: ${parents.map(p => p.title).join(' > ')}\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add content based on depth
|
||||||
|
if (depth >= 2) {
|
||||||
|
const content = await this.contextExtractor.getNoteContent(note.noteId);
|
||||||
|
if (content) {
|
||||||
|
// For larger content, use summary
|
||||||
|
if (content.length > 2000) {
|
||||||
|
const summary = await this.contextExtractor.summarizeContent(content, noteObj.title);
|
||||||
|
context += `${summary}\n[Content summarized due to length]\n\n`;
|
||||||
|
} else {
|
||||||
|
context += `${content}\n\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add child note titles for more context if depth >= 3
|
||||||
|
if (depth >= 3) {
|
||||||
|
const childNotes = noteObj.getChildNotes();
|
||||||
|
if (childNotes.length > 0) {
|
||||||
|
context += `Child notes: ${childNotes.slice(0, 5).map(n => n.title).join(', ')}`;
|
||||||
|
if (childNotes.length > 5) {
|
||||||
|
context += ` and ${childNotes.length - 5} more`;
|
||||||
|
}
|
||||||
|
context += `\n\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add attribute context for even deeper understanding if depth >= 4
|
||||||
|
if (depth >= 4) {
|
||||||
|
const attributes = noteObj.getOwnedAttributes();
|
||||||
|
if (attributes.length > 0) {
|
||||||
|
const relevantAttrs = attributes.filter(a =>
|
||||||
|
!a.name.startsWith('_') && !a.name.startsWith('child:') && !a.name.startsWith('relation:')
|
||||||
|
);
|
||||||
|
|
||||||
|
if (relevantAttrs.length > 0) {
|
||||||
|
context += `Attributes: ${relevantAttrs.map(a =>
|
||||||
|
`${a.type === 'label' ? '#' : '~'}${a.name}${a.value ? '=' + a.value : ''}`
|
||||||
|
).join(', ')}\n\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add instructions about how to reference the notes
|
||||||
|
context += "When referring to information from these notes in your response, please cite them by their titles " +
|
||||||
|
"(e.g., \"According to your note on [Title]...\"). If the information doesn't contain what you need, " +
|
||||||
|
"just say so and use your general knowledge instead.";
|
||||||
|
|
||||||
|
return context;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error generating query context: ${error.message || "Unknown error"}`);
|
||||||
|
return "I'm an AI assistant helping with your Trilium notes. I encountered an error while retrieving context from your notes, but I'll try to assist based on general knowledge.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a comprehensive index entry for a note
|
||||||
|
* This prepares all metadata and contexts for optimal LLM retrieval
|
||||||
|
*/
|
||||||
|
async generateNoteIndex(noteId: string) {
|
||||||
|
if (!this.initialized) {
|
||||||
|
await this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const note = becca.getNote(noteId);
|
||||||
|
if (!note) {
|
||||||
|
throw new Error(`Note ${noteId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get complete note context for indexing
|
||||||
|
const context = await vectorStore.getNoteEmbeddingContext(noteId);
|
||||||
|
|
||||||
|
// Queue note for embedding with all available providers
|
||||||
|
const providers = await providerManager.getEnabledEmbeddingProviders();
|
||||||
|
for (const provider of providers) {
|
||||||
|
try {
|
||||||
|
const embedding = await provider.generateNoteEmbeddings(context);
|
||||||
|
if (embedding) {
|
||||||
|
const config = provider.getConfig();
|
||||||
|
await vectorStore.storeNoteEmbedding(
|
||||||
|
noteId,
|
||||||
|
provider.name,
|
||||||
|
config.model,
|
||||||
|
embedding
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error generating embedding with provider ${provider.name} for note ${noteId}: ${error.message || "Unknown error"}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error generating note index for ${noteId}: ${error.message || "Unknown error"}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create singleton instance
|
||||||
|
const indexService = new IndexService();
|
||||||
|
export default indexService;
|
||||||
Loading…
x
Reference in New Issue
Block a user