Notes/src/routes/api/llm.ts

1632 lines
56 KiB
TypeScript
Raw Normal View History

import type { Request, Response } from "express";
import log from "../../services/log.js";
import options from "../../services/options.js";
// @ts-ignore
import { v4 as uuidv4 } from 'uuid';
import becca from "../../becca/becca.js";
import vectorStore from "../../services/llm/embeddings/index.js";
import providerManager from "../../services/llm/providers/providers.js";
import type { Message, ChatCompletionOptions } from "../../services/llm/ai_interface.js";
// Import this way to prevent immediate instantiation
import * as aiServiceManagerModule from "../../services/llm/ai_service_manager.js";
2025-03-19 19:28:02 +00:00
import contextService from "../../services/llm/context_service.js";
2025-03-10 03:34:48 +00:00
import sql from "../../services/sql.js";
2025-03-11 23:26:47 +00:00
// Import the index service for knowledge base management
import indexService from "../../services/llm/index_service.js";
2025-03-28 23:07:02 +00:00
import { CONTEXT_PROMPTS, ERROR_PROMPTS, FORMATTING_PROMPTS } from '../../services/llm/constants/llm_prompt_constants.js';
2025-03-28 23:25:06 +00:00
import { LLM_CONSTANTS } from '../../services/llm/constants/provider_constants.js';
2025-03-11 23:04:51 +00:00
// Define basic interfaces
interface ChatMessage {
role: 'user' | 'assistant' | 'system';
content: string;
timestamp?: Date;
}
interface ChatSession {
id: string;
title: string;
messages: ChatMessage[];
createdAt: Date;
lastActive: Date;
noteContext?: string; // Optional noteId that provides context
metadata: Record<string, any>;
}
interface NoteSource {
noteId: string;
title: string;
content?: string;
similarity?: number;
branchId?: string;
}
interface SessionOptions {
title?: string;
systemPrompt?: string;
temperature?: number;
maxTokens?: number;
model?: string;
provider?: string;
contextNoteId?: string;
}
// In-memory storage for sessions
// In a production app, this should be stored in a database
const sessions = new Map<string, ChatSession>();
// Flag to track if cleanup timer has been initialized
let cleanupInitialized = false;
/**
2025-03-11 23:04:51 +00:00
* Initialize the session cleanup timer to remove old/inactive sessions
* Only call this after database is initialized
*/
function initializeCleanupTimer() {
if (cleanupInitialized) {
return;
}
2025-03-11 23:04:51 +00:00
// Clean sessions that have expired based on the constants
function cleanupOldSessions() {
2025-03-11 23:04:51 +00:00
const expiryTime = new Date(Date.now() - LLM_CONSTANTS.SESSION.SESSION_EXPIRY_MS);
for (const [sessionId, session] of sessions.entries()) {
2025-03-11 23:04:51 +00:00
if (session.lastActive < expiryTime) {
sessions.delete(sessionId);
}
}
}
2025-03-11 23:04:51 +00:00
// Run cleanup at the configured interval
setInterval(cleanupOldSessions, LLM_CONSTANTS.SESSION.CLEANUP_INTERVAL_MS);
cleanupInitialized = true;
}
/**
* Check if the database is initialized
*/
function isDatabaseInitialized(): boolean {
try {
options.getOption('initialized');
return true;
} catch (error) {
return false;
}
}
/**
* Get the AI service manager in a way that doesn't crash at startup
*/
function safelyUseAIManager(): boolean {
// Only use AI manager if database is initialized
if (!isDatabaseInitialized()) {
return false;
}
// Try to access the manager - will create instance only if needed
try {
return aiServiceManagerModule.default.isAnyServiceAvailable();
} catch (error) {
log.error(`Error accessing AI service manager: ${error}`);
return false;
}
}
/**
* @swagger
* /api/llm/sessions:
* post:
* summary: Create a new LLM chat session
* operationId: llm-create-session
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* title:
* type: string
* description: Title for the chat session
* systemPrompt:
* type: string
* description: System message to set the behavior of the assistant
* temperature:
* type: number
* description: Temperature parameter for the LLM (0.0-1.0)
* maxTokens:
* type: integer
* description: Maximum tokens to generate in responses
* model:
* type: string
* description: Specific model to use (depends on provider)
* provider:
* type: string
* description: LLM provider to use (e.g., 'openai', 'anthropic', 'ollama')
* contextNoteId:
* type: string
* description: Note ID to use as context for the session
* responses:
* '200':
* description: Successfully created session
* content:
* application/json:
* schema:
* type: object
* properties:
* sessionId:
* type: string
* title:
* type: string
* createdAt:
* type: string
* format: date-time
* security:
* - session: []
* tags: ["llm"]
*/
async function createSession(req: Request, res: Response) {
try {
// Initialize cleanup if not already done
initializeCleanupTimer();
const options: SessionOptions = req.body || {};
const title = options.title || 'Chat Session';
const sessionId = uuidv4();
const now = new Date();
// Initial system message if provided
const messages: ChatMessage[] = [];
if (options.systemPrompt) {
messages.push({
role: 'system',
content: options.systemPrompt,
timestamp: now
});
}
// Store session info
sessions.set(sessionId, {
id: sessionId,
title,
messages,
createdAt: now,
lastActive: now,
noteContext: options.contextNoteId,
metadata: {
temperature: options.temperature,
maxTokens: options.maxTokens,
model: options.model,
provider: options.provider
}
});
return {
id: sessionId,
title,
createdAt: now
};
} catch (error: any) {
log.error(`Error creating LLM session: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to create LLM session: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/sessions/{sessionId}:
* get:
* summary: Retrieve a specific chat session by ID
* operationId: llm-get-session
* parameters:
* - name: sessionId
* in: path
* required: true
* schema:
* type: string
* responses:
* '200':
* description: Chat session details
* content:
* application/json:
* schema:
* type: object
* properties:
* id:
* type: string
* title:
* type: string
* messages:
* type: array
* items:
* type: object
* properties:
* role:
* type: string
* enum: [user, assistant, system]
* content:
* type: string
* timestamp:
* type: string
* format: date-time
* createdAt:
* type: string
* format: date-time
* lastActive:
* type: string
* format: date-time
* '404':
* description: Session not found
* security:
* - session: []
* tags: ["llm"]
*/
async function getSession(req: Request, res: Response) {
try {
const { sessionId } = req.params;
// Check if session exists
const session = sessions.get(sessionId);
if (!session) {
throw new Error(`Session with ID ${sessionId} not found`);
}
// Return session without internal metadata
return {
id: session.id,
title: session.title,
createdAt: session.createdAt,
lastActive: session.lastActive,
messages: session.messages,
noteContext: session.noteContext
};
} catch (error: any) {
log.error(`Error getting LLM session: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to get session: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/sessions/{sessionId}:
* put:
* summary: Update a chat session's settings
* operationId: llm-update-session
* parameters:
* - name: sessionId
* in: path
* required: true
* schema:
* type: string
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* title:
* type: string
* description: Updated title for the session
* systemPrompt:
* type: string
* description: Updated system prompt
* temperature:
* type: number
* description: Updated temperature setting
* maxTokens:
* type: integer
* description: Updated maximum tokens setting
* model:
* type: string
* description: Updated model selection
* provider:
* type: string
* description: Updated provider selection
* contextNoteId:
* type: string
* description: Updated note ID for context
* responses:
* '200':
* description: Session successfully updated
* content:
* application/json:
* schema:
* type: object
* properties:
* id:
* type: string
* title:
* type: string
* updatedAt:
* type: string
* format: date-time
* '404':
* description: Session not found
* security:
* - session: []
* tags: ["llm"]
*/
async function updateSession(req: Request, res: Response) {
try {
const { sessionId } = req.params;
const updates = req.body || {};
// Check if session exists
const session = sessions.get(sessionId);
if (!session) {
throw new Error(`Session with ID ${sessionId} not found`);
}
// Update allowed fields
if (updates.title) {
session.title = updates.title;
}
if (updates.noteContext) {
session.noteContext = updates.noteContext;
}
// Update metadata
if (updates.temperature !== undefined) {
session.metadata.temperature = updates.temperature;
}
if (updates.maxTokens !== undefined) {
session.metadata.maxTokens = updates.maxTokens;
}
if (updates.model) {
session.metadata.model = updates.model;
}
if (updates.provider) {
session.metadata.provider = updates.provider;
}
// Update timestamp
session.lastActive = new Date();
return {
id: session.id,
title: session.title,
updatedAt: session.lastActive
};
} catch (error: any) {
log.error(`Error updating LLM session: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to update session: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/sessions:
* get:
* summary: List all chat sessions
* operationId: llm-list-sessions
* responses:
* '200':
* description: List of chat sessions
* content:
* application/json:
* schema:
* type: array
* items:
* type: object
* properties:
* id:
* type: string
* title:
* type: string
* createdAt:
* type: string
* format: date-time
* lastActive:
* type: string
* format: date-time
* messageCount:
* type: integer
* security:
* - session: []
* tags: ["llm"]
*/
async function listSessions(req: Request, res: Response) {
try {
const sessionList = Array.from(sessions.values()).map(session => ({
id: session.id,
title: session.title,
createdAt: session.createdAt,
lastActive: session.lastActive,
messageCount: session.messages.length
}));
// Sort by last activity (most recent first)
sessionList.sort((a, b) => b.lastActive.getTime() - a.lastActive.getTime());
return {
sessions: sessionList
};
} catch (error: any) {
log.error(`Error listing LLM sessions: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to list sessions: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/sessions/{sessionId}:
* delete:
* summary: Delete a chat session
* operationId: llm-delete-session
* parameters:
* - name: sessionId
* in: path
* required: true
* schema:
* type: string
* responses:
* '200':
* description: Session successfully deleted
* '404':
* description: Session not found
* security:
* - session: []
* tags: ["llm"]
*/
async function deleteSession(req: Request, res: Response) {
try {
const { sessionId } = req.params;
// Check if session exists
if (!sessions.has(sessionId)) {
throw new Error(`Session with ID ${sessionId} not found`);
}
// Delete session
sessions.delete(sessionId);
return {
success: true,
message: `Session ${sessionId} deleted successfully`
};
} catch (error: any) {
log.error(`Error deleting LLM session: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to delete session: ${error.message || 'Unknown error'}`);
}
}
/**
2025-03-10 03:34:48 +00:00
* Find relevant notes based on search query
*/
2025-03-10 03:34:48 +00:00
async function findRelevantNotes(content: string, contextNoteId: string | null = null, limit = 5): Promise<NoteSource[]> {
try {
2025-03-10 03:34:48 +00:00
// If database is not initialized, we can't do this
if (!isDatabaseInitialized()) {
2025-03-10 03:34:48 +00:00
return [];
}
2025-03-10 03:34:48 +00:00
// Check if embeddings are available
const enabledProviders = await providerManager.getEnabledEmbeddingProviders();
if (enabledProviders.length === 0) {
log.info("No embedding providers available, can't find relevant notes");
return [];
}
2025-03-10 03:34:48 +00:00
// If content is too short, don't bother
if (content.length < 3) {
return [];
}
2025-03-10 03:34:48 +00:00
// Get the embedding for the query
const provider = enabledProviders[0];
const embedding = await provider.generateEmbeddings(content);
2025-03-10 03:34:48 +00:00
let results;
if (contextNoteId) {
2025-03-10 03:34:48 +00:00
// For branch context, get notes specifically from that branch
// TODO: This is a simplified implementation - we need to
// properly get all notes in the subtree starting from contextNoteId
// For now, just get direct children of the context note
const contextNote = becca.notes[contextNoteId];
if (!contextNote) {
return [];
}
const childBranches = await sql.getRows(`
SELECT branches.* FROM branches
WHERE branches.parentNoteId = ?
AND branches.isDeleted = 0
`, [contextNoteId]);
const childNoteIds = childBranches.map((branch: any) => branch.noteId);
// Include the context note itself
childNoteIds.push(contextNoteId);
// Find similar notes in this context
results = [];
for (const noteId of childNoteIds) {
const noteEmbedding = await vectorStore.getEmbeddingForNote(
noteId,
provider.name,
provider.getConfig().model
);
if (noteEmbedding) {
const similarity = vectorStore.cosineSimilarity(
embedding,
noteEmbedding.embedding
);
if (similarity > 0.65) {
results.push({
noteId,
2025-03-10 03:34:48 +00:00
similarity
});
}
}
}
2025-03-10 03:34:48 +00:00
// Sort by similarity
results.sort((a, b) => b.similarity - a.similarity);
results = results.slice(0, limit);
} else {
// General search across all notes
results = await vectorStore.findSimilarNotes(
embedding,
provider.name,
provider.getConfig().model,
limit
);
}
2025-03-10 03:34:48 +00:00
// Format the results
const sources: NoteSource[] = [];
2025-03-10 03:34:48 +00:00
for (const result of results) {
const note = becca.notes[result.noteId];
if (!note) continue;
2025-03-10 03:34:48 +00:00
let noteContent: string | undefined = undefined;
if (note.type === 'text') {
const content = note.getContent();
// Handle both string and Buffer types
noteContent = typeof content === 'string' ? content :
content instanceof Buffer ? content.toString('utf8') : undefined;
}
2025-03-10 03:34:48 +00:00
sources.push({
noteId: result.noteId,
title: note.title,
2025-03-10 03:34:48 +00:00
content: noteContent,
similarity: result.similarity,
branchId: note.getBranches()[0]?.branchId
});
}
return sources;
} catch (error: any) {
log.error(`Error finding relevant notes: ${error.message}`);
return [];
}
}
/**
2025-03-20 00:06:56 +00:00
* Build a prompt with context from relevant notes
*/
function buildContextFromNotes(sources: NoteSource[], query: string): string {
console.log("Building context from notes with query:", query);
console.log("Sources length:", sources ? sources.length : 0);
// If no sources are available, just return the query without additional context
if (!sources || sources.length === 0) {
console.log("No sources available, using just the query");
return query || '';
}
const noteContexts = sources
.filter(source => source.content) // Only include sources with content
2025-03-11 22:47:36 +00:00
.map((source) => {
// Format each note with its title as a natural heading
return `### ${source.title}\n${source.content || 'No content available'}`;
})
.join('\n\n');
if (!noteContexts) {
console.log("After filtering, no valid note contexts remain - using just the query");
return query || '';
}
2025-03-20 00:06:56 +00:00
// Use the template from the constants file, replacing placeholders
return CONTEXT_PROMPTS.CONTEXT_NOTES_WRAPPER
.replace('{noteContexts}', noteContexts)
.replace('{query}', query);
}
/**
* @swagger
* /api/llm/sessions/{sessionId}/messages:
* post:
* summary: Send a message to an LLM and get a response
* operationId: llm-send-message
* parameters:
* - name: sessionId
* in: path
* required: true
* schema:
* type: string
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* message:
* type: string
* description: The user message to send to the LLM
* options:
* type: object
* description: Optional parameters for this specific message
* properties:
* temperature:
* type: number
* maxTokens:
* type: integer
* model:
* type: string
* provider:
* type: string
* includeContext:
* type: boolean
* description: Whether to include relevant notes as context
* useNoteContext:
* type: boolean
* description: Whether to use the session's context note
* responses:
* '200':
* description: LLM response
* content:
* application/json:
* schema:
* type: object
* properties:
* response:
* type: string
* sources:
* type: array
* items:
* type: object
* properties:
* noteId:
* type: string
* title:
* type: string
* similarity:
* type: number
* sessionId:
* type: string
* '404':
* description: Session not found
* '500':
* description: Error processing request
* security:
* - session: []
* tags: ["llm"]
*/
async function sendMessage(req: Request, res: Response) {
try {
2025-03-10 04:28:56 +00:00
// Extract parameters differently based on the request method
2025-03-19 18:49:14 +00:00
let content, useAdvancedContext, showThinking, sessionId;
2025-03-10 04:28:56 +00:00
if (req.method === 'POST') {
// For POST requests, get content from the request body
const requestBody = req.body || {};
content = requestBody.content;
useAdvancedContext = requestBody.useAdvancedContext || false;
2025-03-19 18:49:14 +00:00
showThinking = requestBody.showThinking || false;
// Add logging for POST requests
log.info(`LLM POST message: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}, contentLength=${content ? content.length : 0}`);
2025-03-10 04:28:56 +00:00
} else if (req.method === 'GET') {
// For GET (streaming) requests, get format from query params
// The content should have been sent in a previous POST request
useAdvancedContext = req.query.useAdvancedContext === 'true';
2025-03-19 18:49:14 +00:00
showThinking = req.query.showThinking === 'true';
2025-03-10 04:28:56 +00:00
content = ''; // We don't need content for GET requests
2025-03-19 18:49:14 +00:00
// Add logging for GET requests
log.info(`LLM GET stream: sessionId=${req.params.sessionId}, useAdvancedContext=${useAdvancedContext}, showThinking=${showThinking}`);
2025-03-10 04:28:56 +00:00
}
// Get sessionId from URL params since it's part of the route
sessionId = req.params.sessionId;
// Get the Accept header once at the start
const acceptHeader = req.get('Accept');
2025-03-10 05:06:33 +00:00
const isStreamingRequest = req.method === 'GET' && req.query.format === 'stream';
2025-03-10 04:28:56 +00:00
// For GET requests, ensure we have the format=stream parameter
if (req.method === 'GET' && (!req.query.format || req.query.format !== 'stream')) {
throw new Error('Stream format parameter is required for GET requests');
}
// For POST requests, validate the content
if (req.method === 'POST' && (!content || typeof content !== 'string' || content.trim().length === 0)) {
throw new Error('Content cannot be empty');
}
2025-03-10 04:28:56 +00:00
// Get session
if (!sessionId || !sessions.has(sessionId)) {
throw new Error('Session not found');
}
2025-03-10 03:34:48 +00:00
2025-03-10 04:28:56 +00:00
const session = sessions.get(sessionId)!;
session.lastActive = new Date();
// For POST requests, store the user message
if (req.method === 'POST' && content) {
// Add message to session
session.messages.push({
role: 'user',
content,
timestamp: new Date()
});
// Log a preview of the message
log.info(`Processing LLM message: "${content.substring(0, 50)}${content.length > 50 ? '...' : ''}"`);
2025-03-10 03:34:48 +00:00
}
2025-03-10 03:34:48 +00:00
// Check if AI services are available
if (!safelyUseAIManager()) {
throw new Error('AI services are not available');
}
2025-03-10 03:34:48 +00:00
// Get the AI service manager
const aiServiceManager = aiServiceManagerModule.default.getInstance();
// Get the default service - just use the first available one
const availableProviders = aiServiceManager.getAvailableProviders();
let service = null;
if (availableProviders.length > 0) {
// Use the first available provider
const providerName = availableProviders[0];
// We know the manager has a 'services' property from our code inspection,
// but TypeScript doesn't know that from the interface.
// This is a workaround to access it
service = (aiServiceManager as any).services[providerName];
}
2025-03-10 03:34:48 +00:00
if (!service) {
throw new Error('No AI service is available');
}
// Information to return to the client
let aiResponse = '';
let sourceNotes: NoteSource[] = [];
2025-03-10 04:28:56 +00:00
// For POST requests, we need to process the message
// For GET (streaming) requests, we use the latest user message from the session
if (req.method === 'POST' || isStreamingRequest) {
// Get the latest user message for context
const latestUserMessage = session.messages
.filter(msg => msg.role === 'user')
.pop();
2025-03-10 03:34:48 +00:00
2025-03-10 04:28:56 +00:00
if (!latestUserMessage && req.method === 'GET') {
throw new Error('No user message found in session');
}
2025-03-10 03:34:48 +00:00
2025-03-10 04:28:56 +00:00
// Use the latest message content for GET requests
const messageContent = req.method === 'POST' ? content : latestUserMessage!.content;
// If Advanced Context is enabled, we use the improved method
if (useAdvancedContext) {
// Use the Trilium-specific approach
const contextNoteId = session.noteContext || null;
2025-03-19 18:49:14 +00:00
2025-03-19 19:28:02 +00:00
// Log that we're calling contextService with the parameters
2025-03-19 18:49:14 +00:00
log.info(`Using enhanced context with: noteId=${contextNoteId}, showThinking=${showThinking}`);
2025-03-19 19:28:02 +00:00
const results = await contextService.processQuery(
2025-03-19 18:49:14 +00:00
messageContent,
service,
contextNoteId,
2025-03-19 19:28:02 +00:00
showThinking
2025-03-19 18:49:14 +00:00
);
2025-03-10 04:28:56 +00:00
// Get the generated context
const context = results.context;
// Convert from NoteSearchResult to NoteSource
sourceNotes = results.sources.map(source => ({
noteId: source.noteId,
title: source.title,
content: source.content || undefined, // Convert null to undefined
similarity: source.similarity
}));
// Build context from relevant notes
const contextFromNotes = buildContextFromNotes(sourceNotes, messageContent);
2025-03-10 04:28:56 +00:00
// Add system message with the context
const contextMessage: Message = {
role: 'system',
content: context
};
// DEBUG: Log context details before sending to LLM
2025-03-28 23:07:02 +00:00
log.info(`${FORMATTING_PROMPTS.DIVIDERS.CONTENT_SECTION} Context begins with: "${context.substring(0, 200)}..."`);
log.info(`${FORMATTING_PROMPTS.DIVIDERS.CONTENT_SECTION} Context ends with: "...${context.substring(context.length - 200)}"`);
log.info(`${FORMATTING_PROMPTS.DIVIDERS.NOTE_SECTION} Number of notes included: ${sourceNotes.length}`);
// Format messages for the LLM using the proper context
const aiMessages = await contextService.buildMessagesWithContext(
session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
2025-03-10 04:28:56 +00:00
role: msg.role,
content: msg.content
})),
context,
service
);
// Add enhanced debug logging
if (service.constructor.name === 'OllamaService') {
// Log condensed version of the context so we can see if it's being properly formatted
console.log(`Sending context to Ollama with length: ${context.length} chars`);
console.log(`Context first 200 chars: ${context.substring(0, 200).replace(/\n/g, '\\n')}...`);
console.log(`Context last 200 chars: ${context.substring(context.length - 200).replace(/\n/g, '\\n')}...`);
// Log the first user message to verify context injection is working
const userMsg = aiMessages.find(m => m.role === 'user');
if (userMsg) {
console.log(`First user message (first 200 chars): ${userMsg.content.substring(0, 200).replace(/\n/g, '\\n')}...`);
}
}
2025-03-10 04:28:56 +00:00
// DEBUG: Log message structure being sent to LLM
log.info(`Message structure being sent to LLM: ${aiMessages.length} messages total`);
2025-03-10 04:28:56 +00:00
// Configure chat options from session metadata
const chatOptions: ChatCompletionOptions = {
temperature: session.metadata.temperature || 0.7,
maxTokens: session.metadata.maxTokens,
model: session.metadata.model,
stream: isStreamingRequest ? true : undefined
};
// Process based on whether this is a streaming request
if (isStreamingRequest) {
2025-03-10 05:06:33 +00:00
// Set streaming headers once
2025-03-10 04:28:56 +00:00
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
2025-03-10 05:06:33 +00:00
// Flag to indicate we've handled the response directly
// This lets the route handler know not to process the result
(res as any).triliumResponseHandled = true;
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
let messageContent = '';
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
try {
// Use the correct method name: generateChatCompletion
const response = await service.generateChatCompletion(aiMessages, chatOptions);
// Handle streaming if the response includes a stream method
if (response.stream) {
await response.stream((chunk: { text: string; done: boolean }) => {
if (chunk.text) {
messageContent += chunk.text;
// Only write if the response hasn't finished
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
}
}
if (chunk.done) {
// Signal the end of the stream when done, only if not already ended
if (!res.writableEnded) {
res.write('data: [DONE]\n\n');
res.end();
}
}
});
} else {
// If no streaming available, send the response as a single chunk
messageContent = response.text;
// Only write if the response hasn't finished
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
2025-03-10 04:28:56 +00:00
res.write('data: [DONE]\n\n');
res.end();
}
2025-03-10 05:06:33 +00:00
}
2025-03-10 03:34:48 +00:00
2025-03-10 05:06:33 +00:00
// Store the full response for the session
aiResponse = messageContent;
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
// Store the assistant's response in the session
session.messages.push({
role: 'assistant',
content: aiResponse,
timestamp: new Date()
});
// For streaming requests we don't return anything as we've already sent the response
return null;
} catch (streamingError: any) {
// If streaming fails and we haven't sent a response yet, throw the error
if (!res.headersSent) {
throw streamingError;
} else {
// If headers were already sent, try to send an error event
try {
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
res.write('data: [DONE]\n\n');
res.end();
}
} catch (e) {
log.error(`Failed to write streaming error: ${e}`);
}
return null;
}
}
2025-03-10 04:28:56 +00:00
} else {
// Non-streaming approach for POST requests
const response = await service.generateChatCompletion(aiMessages, chatOptions);
aiResponse = response.text; // Extract the text from the response
// Store the assistant's response in the session
session.messages.push({
role: 'assistant',
content: aiResponse,
timestamp: new Date()
});
// Return the response for POST requests
return {
content: aiResponse,
sources: sourceNotes.map(note => ({
noteId: note.noteId,
title: note.title,
similarity: note.similarity
2025-03-10 04:28:56 +00:00
}))
};
}
2025-03-10 03:34:48 +00:00
} else {
2025-03-10 04:28:56 +00:00
// Original approach - find relevant notes through direct embedding comparison
const relevantNotes = await findRelevantNotes(
2025-03-10 05:06:33 +00:00
messageContent,
2025-03-10 04:28:56 +00:00
session.noteContext || null,
5
2025-03-10 03:34:48 +00:00
);
2025-03-10 04:28:56 +00:00
sourceNotes = relevantNotes;
// Build context from relevant notes
2025-03-10 05:06:33 +00:00
const context = buildContextFromNotes(relevantNotes, messageContent);
2025-03-10 04:28:56 +00:00
// Get messages with context properly formatted for the specific LLM provider
const aiMessages = await contextService.buildMessagesWithContext(
session.messages.slice(-LLM_CONSTANTS.SESSION.MAX_SESSION_MESSAGES).map(msg => ({
2025-03-10 04:28:56 +00:00
role: msg.role,
content: msg.content
})),
context,
service
);
2025-03-10 04:28:56 +00:00
// Configure chat options from session metadata
const chatOptions: ChatCompletionOptions = {
temperature: session.metadata.temperature || 0.7,
maxTokens: session.metadata.maxTokens,
model: session.metadata.model,
stream: isStreamingRequest ? true : undefined
};
if (isStreamingRequest) {
2025-03-10 05:06:33 +00:00
// Set streaming headers once
2025-03-10 04:28:56 +00:00
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
2025-03-10 05:06:33 +00:00
// Flag to indicate we've handled the response directly
// This lets the route handler know not to process the result
(res as any).triliumResponseHandled = true;
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
let messageContent = '';
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
try {
// Use the correct method name: generateChatCompletion
const response = await service.generateChatCompletion(aiMessages, chatOptions);
// Handle streaming if the response includes a stream method
if (response.stream) {
await response.stream((chunk: { text: string; done: boolean }) => {
if (chunk.text) {
messageContent += chunk.text;
// Only write if the response hasn't finished
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
}
}
if (chunk.done) {
// Signal the end of the stream when done, only if not already ended
if (!res.writableEnded) {
res.write('data: [DONE]\n\n');
res.end();
}
}
});
} else {
// If no streaming available, send the response as a single chunk
messageContent = response.text;
// Only write if the response hasn't finished
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
2025-03-10 04:28:56 +00:00
res.write('data: [DONE]\n\n');
res.end();
}
2025-03-10 05:06:33 +00:00
}
2025-03-10 05:06:33 +00:00
// Store the full response for the session
aiResponse = messageContent;
2025-03-10 04:28:56 +00:00
2025-03-10 05:06:33 +00:00
// Store the assistant's response in the session
session.messages.push({
role: 'assistant',
content: aiResponse,
timestamp: new Date()
});
// For streaming requests we don't return anything as we've already sent the response
return null;
} catch (streamingError: any) {
// If streaming fails and we haven't sent a response yet, throw the error
if (!res.headersSent) {
throw streamingError;
} else {
// If headers were already sent, try to send an error event
try {
if (!res.writableEnded) {
res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
res.write('data: [DONE]\n\n');
res.end();
}
} catch (e) {
log.error(`Failed to write streaming error: ${e}`);
}
return null;
}
}
2025-03-10 04:28:56 +00:00
} else {
// Non-streaming approach for POST requests
const response = await service.generateChatCompletion(aiMessages, chatOptions);
aiResponse = response.text; // Extract the text from the response
// Store the assistant's response in the session
session.messages.push({
role: 'assistant',
content: aiResponse,
timestamp: new Date()
});
// Return the response for POST requests
return {
content: aiResponse,
sources: sourceNotes.map(note => ({
noteId: note.noteId,
title: note.title,
similarity: note.similarity
2025-03-10 04:28:56 +00:00
}))
};
}
2025-03-10 03:34:48 +00:00
}
2025-03-10 05:06:33 +00:00
} else {
// If it's not a POST or streaming GET request, return the session's message history
return {
id: session.id,
messages: session.messages
};
2025-03-10 03:34:48 +00:00
}
} catch (error: any) {
2025-03-28 23:07:02 +00:00
log.error(`Error in LLM query processing: ${error}`);
return {
error: ERROR_PROMPTS.USER_ERRORS.GENERAL_ERROR
};
}
}
2025-03-11 23:26:47 +00:00
/**
* @swagger
* /api/llm/index/stats:
* get:
* summary: Get statistics about the vector index
* operationId: llm-index-stats
* responses:
* '200':
* description: Vector index statistics
* content:
* application/json:
* schema:
* type: object
* properties:
* totalEmbeddings:
* type: integer
* totalIndexedNotes:
* type: integer
* lastIndexed:
* type: string
* format: date-time
* embeddingProvider:
* type: string
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function getIndexStats(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const stats = await indexService.getIndexingStats();
return stats;
} catch (error: any) {
log.error(`Error getting index stats: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to get index stats: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/index/start:
* post:
* summary: Start or restart the indexing process
* operationId: llm-start-indexing
* requestBody:
* required: false
* content:
* application/json:
* schema:
* type: object
* properties:
* forceReindex:
* type: boolean
* description: Whether to force reindexing of all notes
* branchId:
* type: string
* description: Optional branch ID to limit indexing scope
* responses:
* '200':
* description: Indexing process started
* content:
* application/json:
* schema:
* type: object
* properties:
* message:
* type: string
* notesToIndex:
* type: integer
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function startIndexing(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const { force, batchSize } = req.body || {};
2025-03-11 23:26:47 +00:00
let result;
if (batchSize) {
// Run a limited batch indexing
result = await indexService.runBatchIndexing(batchSize);
return {
success: result,
message: result ? `Batch indexing started with size ${batchSize}` : 'Indexing already in progress'
};
} else {
// Start full indexing
result = await indexService.startFullIndexing(force);
return {
success: result,
message: result ? 'Full indexing started' : 'Indexing already in progress or not needed'
};
}
} catch (error: any) {
log.error(`Error starting indexing: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to start indexing: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/index/failed:
* get:
* summary: Get list of notes that failed to be indexed
* operationId: llm-failed-indexes
* responses:
* '200':
* description: List of failed note indexes
* content:
* application/json:
* schema:
* type: array
* items:
* type: object
* properties:
* noteId:
* type: string
* title:
* type: string
* error:
* type: string
* timestamp:
* type: string
* format: date-time
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function getFailedIndexes(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const limit = req.query.limit ? parseInt(req.query.limit as string, 10) : 100;
const failedNotes = await indexService.getFailedIndexes(limit);
2025-03-11 23:26:47 +00:00
return {
count: failedNotes.length,
failedNotes
};
} catch (error: any) {
log.error(`Error getting failed indexes: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to get failed indexes: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/index/failed/{noteId}/retry:
* post:
* summary: Retry indexing a specific failed note
* operationId: llm-retry-failed-index
* parameters:
* - name: noteId
* in: path
* required: true
* schema:
* type: string
* responses:
* '200':
* description: Retry process started
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* message:
* type: string
* '404':
* description: Failed note not found
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function retryFailedIndex(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const { noteId } = req.params;
if (!noteId) {
throw new Error('Note ID is required');
}
const success = await indexService.retryFailedNote(noteId);
2025-03-11 23:26:47 +00:00
return {
success,
message: success ? `Note ${noteId} queued for retry` : `Note ${noteId} not found in failed queue`
};
} catch (error: any) {
log.error(`Error retrying failed index: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to retry index: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/index/failed/retry-all:
* post:
* summary: Retry indexing all failed notes
* operationId: llm-retry-all-failed
* responses:
* '200':
* description: Retry process started for all failed notes
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* message:
* type: string
* count:
* type: integer
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function retryAllFailedIndexes(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const count = await indexService.retryAllFailedNotes();
2025-03-11 23:26:47 +00:00
return {
success: true,
count,
message: `${count} notes queued for retry`
};
} catch (error: any) {
log.error(`Error retrying all failed indexes: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to retry indexes: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/similar:
* post:
* summary: Find notes similar to the provided content
* operationId: llm-find-similar
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* content:
* type: string
* description: Content to find similar notes for
* limit:
* type: integer
* description: Maximum number of results to return
* threshold:
* type: number
* description: Similarity threshold (0.0-1.0)
* responses:
* '200':
* description: List of similar notes
* content:
* application/json:
* schema:
* type: array
* items:
* type: object
* properties:
* noteId:
* type: string
* title:
* type: string
* similarity:
* type: number
* branchId:
* type: string
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function findSimilarNotes(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const { query, contextNoteId, limit } = req.body || {};
2025-03-11 23:26:47 +00:00
if (!query || typeof query !== 'string' || query.trim().length === 0) {
throw new Error('Query is required');
}
const similarNotes = await indexService.findSimilarNotes(
query,
contextNoteId,
limit || 10
);
2025-03-11 23:26:47 +00:00
return {
count: similarNotes.length,
similarNotes
};
} catch (error: any) {
log.error(`Error finding similar notes: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to find similar notes: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/generate-context:
* post:
* summary: Generate context from similar notes for a query
* operationId: llm-generate-context
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* properties:
* query:
* type: string
* description: Query to generate context for
* limit:
* type: integer
* description: Maximum number of notes to include
* contextNoteId:
* type: string
* description: Optional note ID to provide additional context
* responses:
* '200':
* description: Generated context and sources
* content:
* application/json:
* schema:
* type: object
* properties:
* context:
* type: string
* sources:
* type: array
* items:
* type: object
* properties:
* noteId:
* type: string
* title:
* type: string
* similarity:
* type: number
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function generateQueryContext(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const { query, contextNoteId, depth } = req.body || {};
2025-03-11 23:26:47 +00:00
if (!query || typeof query !== 'string' || query.trim().length === 0) {
throw new Error('Query is required');
}
const context = await indexService.generateQueryContext(
query,
contextNoteId,
depth || 2
);
2025-03-11 23:26:47 +00:00
return {
context,
length: context.length
};
} catch (error: any) {
log.error(`Error generating query context: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to generate query context: ${error.message || 'Unknown error'}`);
}
}
/**
* @swagger
* /api/llm/index/note/{noteId}:
* post:
* summary: Index or reindex a specific note
* operationId: llm-index-note
* parameters:
* - name: noteId
* in: path
* required: true
* schema:
* type: string
* requestBody:
* required: false
* content:
* application/json:
* schema:
* type: object
* properties:
* force:
* type: boolean
* description: Whether to force reindexing even if already indexed
* responses:
* '200':
* description: Note indexing result
* content:
* application/json:
* schema:
* type: object
* properties:
* success:
* type: boolean
* message:
* type: string
* '404':
* description: Note not found
* security:
* - session: []
* tags: ["llm"]
2025-03-11 23:26:47 +00:00
*/
async function indexNote(req: Request, res: Response) {
try {
if (!isDatabaseInitialized()) {
throw new Error('Database is not initialized yet');
}
const { noteId } = req.params;
if (!noteId) {
throw new Error('Note ID is required');
}
// Check if note exists
const note = becca.getNote(noteId);
if (!note) {
throw new Error(`Note ${noteId} not found`);
}
const success = await indexService.generateNoteIndex(noteId);
2025-03-11 23:26:47 +00:00
return {
success,
noteId,
noteTitle: note.title,
message: success ? `Note "${note.title}" indexed successfully` : `Failed to index note "${note.title}"`
};
} catch (error: any) {
log.error(`Error indexing note: ${error.message || 'Unknown error'}`);
throw new Error(`Failed to index note: ${error.message || 'Unknown error'}`);
}
}
export default {
2025-03-11 23:26:47 +00:00
// Chat session management
createSession,
getSession,
updateSession,
listSessions,
deleteSession,
2025-03-11 23:26:47 +00:00
sendMessage,
2025-03-11 23:26:47 +00:00
// Knowledge base index management
getIndexStats,
startIndexing,
getFailedIndexes,
retryFailedIndex,
retryAllFailedIndexes,
findSimilarNotes,
generateQueryContext,
indexNote
};