migrate to a pipeline approach for LLM chats

2025-11-11 20:54:04 +08:00 · 2025-03-29 21:31:33 +00:00 · 2025-03-29 21:31:33 +00:00 · def28b1dcd
commit def28b1dcd
parent a8fc9e9768
14 changed files with 1617 additions and 151 deletions
--- a/src/services/llm/chat_service.ts
+++ b/src/services/llm/chat_service.ts
@ -1,8 +1,9 @@
 import type { Message, ChatCompletionOptions } from './ai_interface.js';
-import aiServiceManager from './ai_service_manager.js';
 import chatStorageService from './chat_storage_service.js';
 import log from '../log.js';
 import { CONTEXT_PROMPTS, ERROR_PROMPTS } from './constants/llm_prompt_constants.js';
+import { ChatPipeline } from './pipeline/chat_pipeline.js';
+import type { ChatPipelineConfig, StreamCallback } from './pipeline/interfaces.js';

 export interface ChatSession {
    id: string;
@ -12,12 +13,45 @@ export interface ChatSession {
    options?: ChatCompletionOptions;
 }

+/**
+ * Chat pipeline configurations for different use cases
+ */
+const PIPELINE_CONFIGS: Record<string, Partial<ChatPipelineConfig>> = {
+    default: {
+        enableStreaming: true,
+        enableMetrics: true
+    },
+    agent: {
+        enableStreaming: true,
+        enableMetrics: true,
+        maxToolCallIterations: 5
+    },
+    performance: {
+        enableStreaming: false,
+        enableMetrics: true
+    }
+};
+
 /**
 * Service for managing chat interactions and history
 */
 export class ChatService {
    private activeSessions: Map<string, ChatSession> = new Map();
-    private streamingCallbacks: Map<string, (content: string, isDone: boolean) => void> = new Map();
+    private pipelines: Map<string, ChatPipeline> = new Map();
+
+    constructor() {
+        // Initialize pipelines
+        Object.entries(PIPELINE_CONFIGS).forEach(([name, config]) => {
+            this.pipelines.set(name, new ChatPipeline(config));
+        });
+    }
+
+    /**
+     * Get a pipeline by name, or the default one
+     */
+    private getPipeline(name: string = 'default'): ChatPipeline {
+        return this.pipelines.get(name) || this.pipelines.get('default')!;
+    }

    /**
     * Create a new chat session
@ -70,7 +104,7 @@ export class ChatService {
        sessionId: string,
        content: string,
        options?: ChatCompletionOptions,
-        streamCallback?: (content: string, isDone: boolean) => void
+        streamCallback?: StreamCallback
    ): Promise<ChatSession> {
        const session = await this.getOrCreateSession(sessionId);

@ -83,20 +117,23 @@ export class ChatService {
        session.messages.push(userMessage);
        session.isStreaming = true;

-        // Set up streaming if callback provided
-        if (streamCallback) {
-            this.streamingCallbacks.set(session.id, streamCallback);
-        }
-
        try {
            // Immediately save the user message
            await chatStorageService.updateChat(session.id, session.messages);

-            // Generate AI response
-            const response = await aiServiceManager.generateChatCompletion(
-                session.messages,
-                options || session.options
-            );
+            // Log message processing
+            log.info(`Processing message: "${content.substring(0, 100)}..."`);
+
+            // Select pipeline to use
+            const pipeline = this.getPipeline();
+
+            // Execute the pipeline
+            const response = await pipeline.execute({
+                messages: session.messages,
+                options: options || session.options,
+                query: content,
+                streamCallback
+            });

            // Add assistant message
            const assistantMessage: Message = {
@ -111,26 +148,19 @@ export class ChatService {
            await chatStorageService.updateChat(session.id, session.messages);

            // If first message, update the title based on content
-            if (session.messages.length <= 2 && !session.title) {
-                // Extract a title from the conversation
+            if (session.messages.length <= 2 && (!session.title || session.title === 'New Chat')) {
                const title = this.generateTitleFromMessages(session.messages);
                session.title = title;
                await chatStorageService.updateChat(session.id, session.messages, title);
            }

-            // Notify streaming is complete
-            if (streamCallback) {
-                streamCallback(response.text, true);
-                this.streamingCallbacks.delete(session.id);
-            }
-
            return session;

        } catch (error: any) {
            session.isStreaming = false;
            console.error('Error in AI chat:', error);

-            // Add error message so user knows something went wrong
+            // Add error message
            const errorMessage: Message = {
                role: 'assistant',
                content: ERROR_PROMPTS.USER_ERRORS.GENERAL_ERROR
@ -141,10 +171,100 @@ export class ChatService {
            // Save the conversation with error
            await chatStorageService.updateChat(session.id, session.messages);

-            // Notify streaming is complete with error
+            // Notify streaming error if callback provided
+            if (streamCallback) {
+                streamCallback(errorMessage.content, true);
+            }
+
+            return session;
+        }
+    }
+
+    /**
+     * Send a message with context from a specific note
+     */
+    async sendContextAwareMessage(
+        sessionId: string,
+        content: string,
+        noteId: string,
+        options?: ChatCompletionOptions,
+        streamCallback?: StreamCallback
+    ): Promise<ChatSession> {
+        const session = await this.getOrCreateSession(sessionId);
+
+        // Add user message
+        const userMessage: Message = {
+            role: 'user',
+            content
+        };
+
+        session.messages.push(userMessage);
+        session.isStreaming = true;
+
+        try {
+            // Immediately save the user message
+            await chatStorageService.updateChat(session.id, session.messages);
+
+            // Log message processing
+            log.info(`Processing context-aware message: "${content.substring(0, 100)}..."`);
+            log.info(`Using context from note: ${noteId}`);
+
+            // Get showThinking option if it exists
+            const showThinking = options?.showThinking === true;
+
+            // Select appropriate pipeline based on whether agent tools are needed
+            const pipelineType = showThinking ? 'agent' : 'default';
+            const pipeline = this.getPipeline(pipelineType);
+
+            // Execute the pipeline with note context
+            const response = await pipeline.execute({
+                messages: session.messages,
+                options: options || session.options,
+                noteId,
+                query: content,
+                showThinking,
+                streamCallback
+            });
+
+            // Add assistant message
+            const assistantMessage: Message = {
+                role: 'assistant',
+                content: response.text
+            };
+
+            session.messages.push(assistantMessage);
+            session.isStreaming = false;
+
+            // Save the complete conversation
+            await chatStorageService.updateChat(session.id, session.messages);
+
+            // If first message, update the title
+            if (session.messages.length <= 2 && (!session.title || session.title === 'New Chat')) {
+                const title = this.generateTitleFromMessages(session.messages);
+                session.title = title;
+                await chatStorageService.updateChat(session.id, session.messages, title);
+            }
+
+            return session;
+
+        } catch (error: any) {
+            session.isStreaming = false;
+            console.error('Error in context-aware chat:', error);
+
+            // Add error message
+            const errorMessage: Message = {
+                role: 'assistant',
+                content: ERROR_PROMPTS.USER_ERRORS.CONTEXT_ERROR
+            };
+
+            session.messages.push(errorMessage);
+
+            // Save the conversation with error
+            await chatStorageService.updateChat(session.id, session.messages);
+
+            // Notify streaming error if callback provided
            if (streamCallback) {
                streamCallback(errorMessage.content, true);
-                this.streamingCallbacks.delete(session.id);
            }

            return session;
@ -166,19 +286,17 @@ export class ChatService {
        const lastUserMessage = [...session.messages].reverse()
            .find(msg => msg.role === 'user' && msg.content.length > 10)?.content || '';

-        let context;
-
-        if (useSmartContext && lastUserMessage) {
-            // Use smart context that considers the query for better relevance
-            context = await aiServiceManager.getContextExtractor().getSmartContext(noteId, lastUserMessage);
-        } else {
-            // Fall back to full context if smart context is disabled or no query available
-            context = await aiServiceManager.getContextExtractor().getFullContext(noteId);
-        }
+        // Use the context extraction stage from the pipeline
+        const pipeline = this.getPipeline();
+        const contextResult = await pipeline.stages.contextExtraction.execute({
+            noteId,
+            query: lastUserMessage,
+            useSmartContext
+        });

        const contextMessage: Message = {
            role: 'user',
-            content: CONTEXT_PROMPTS.NOTE_CONTEXT_PROMPT.replace('{context}', context)
+            content: CONTEXT_PROMPTS.NOTE_CONTEXT_PROMPT.replace('{context}', contextResult.context)
        };

        session.messages.push(contextMessage);
@ -198,15 +316,18 @@ export class ChatService {
    async addSemanticNoteContext(sessionId: string, noteId: string, query: string): Promise<ChatSession> {
        const session = await this.getOrCreateSession(sessionId);

-        // Use semantic context that considers the query for better relevance
-        const contextService = aiServiceManager.getContextService();
-        const context = await contextService.getSemanticContext(noteId, query);
+        // Use the semantic context extraction stage from the pipeline
+        const pipeline = this.getPipeline();
+        const contextResult = await pipeline.stages.semanticContextExtraction.execute({
+            noteId,
+            query
+        });

        const contextMessage: Message = {
            role: 'user',
            content: CONTEXT_PROMPTS.SEMANTIC_NOTE_CONTEXT_PROMPT
                .replace('{query}', query)
-                .replace('{context}', context)
+                .replace('{context}', contextResult.context)
        };

        session.messages.push(contextMessage);
@ -215,117 +336,6 @@ export class ChatService {
        return session;
    }

-    /**
-     * Send a message with enhanced semantic note context
-     */
-    async sendContextAwareMessage(
-        sessionId: string,
-        content: string,
-        noteId: string,
-        options?: ChatCompletionOptions,
-        streamCallback?: (content: string, isDone: boolean) => void
-    ): Promise<ChatSession> {
-        const session = await this.getOrCreateSession(sessionId);
-
-        // Add user message
-        const userMessage: Message = {
-            role: 'user',
-            content
-        };
-
-        session.messages.push(userMessage);
-        session.isStreaming = true;
-
-        // Set up streaming if callback provided
-        if (streamCallback) {
-            this.streamingCallbacks.set(session.id, streamCallback);
-        }
-
-        try {
-            // Immediately save the user message
-            await chatStorageService.updateChat(session.id, session.messages);
-
-            // Get the Trilium Context Service for enhanced context
-            const contextService = aiServiceManager.getContextService();
-
-            // Get showThinking option if it exists
-            const showThinking = options?.showThinking === true;
-
-            log.info(`Processing LLM message: "${content.substring(0, 100)}..."`);
-            log.info(`Using enhanced context with: noteId=${noteId}, showThinking=${showThinking}`);
-
-            // Get enhanced context for this note and query
-            const enhancedContext = await contextService.getAgentToolsContext(
-                noteId,
-                content,
-                showThinking
-            );
-
-            // Create messages array with context using the improved method
-            const messagesWithContext = await contextService.buildMessagesWithContext(
-                session.messages,
-                enhancedContext,
-                aiServiceManager.getService()
-            );
-
-            // Generate AI response
-            const response = await aiServiceManager.generateChatCompletion(
-                messagesWithContext,
-                options
-            );
-
-            // Add assistant message
-            const assistantMessage: Message = {
-                role: 'assistant',
-                content: response.text
-            };
-
-            session.messages.push(assistantMessage);
-            session.isStreaming = false;
-
-            // Save the complete conversation (without system message)
-            await chatStorageService.updateChat(session.id, session.messages);
-
-            // If first message, update the title
-            if (session.messages.length <= 2 && (!session.title || session.title === 'New Chat')) {
-                const title = this.generateTitleFromMessages(session.messages);
-                session.title = title;
-                await chatStorageService.updateChat(session.id, session.messages, title);
-            }
-
-            // Notify streaming is complete
-            if (streamCallback) {
-                streamCallback(response.text, true);
-                this.streamingCallbacks.delete(session.id);
-            }
-
-            return session;
-
-        } catch (error: any) {
-            session.isStreaming = false;
-            console.error('Error in context-aware chat:', error);
-
-            // Add error message
-            const errorMessage: Message = {
-                role: 'assistant',
-                content: ERROR_PROMPTS.USER_ERRORS.CONTEXT_ERROR
-            };
-
-            session.messages.push(errorMessage);
-
-            // Save the conversation with error
-            await chatStorageService.updateChat(session.id, session.messages);
-
-            // Notify streaming is complete with error
-            if (streamCallback) {
-                streamCallback(errorMessage.content, true);
-                this.streamingCallbacks.delete(session.id);
-            }
-
-            return session;
-        }
-    }
-
    /**
     * Get all user's chat sessions
     */
@ -345,10 +355,25 @@ export class ChatService {
     */
    async deleteSession(sessionId: string): Promise<boolean> {
        this.activeSessions.delete(sessionId);
-        this.streamingCallbacks.delete(sessionId);
        return chatStorageService.deleteChat(sessionId);
    }

+    /**
+     * Get pipeline performance metrics
+     */
+    getPipelineMetrics(pipelineType: string = 'default'): any {
+        const pipeline = this.getPipeline(pipelineType);
+        return pipeline.getMetrics();
+    }
+    
+    /**
+     * Reset pipeline metrics
+     */
+    resetPipelineMetrics(pipelineType: string = 'default'): void {
+        const pipeline = this.getPipeline(pipelineType);
+        pipeline.resetMetrics();
+    }
+
    /**
     * Generate a title from the first messages in a conversation
     */
@ -377,4 +402,4 @@ export class ChatService {

 // Singleton instance
 const chatService = new ChatService();
-export default chatService;
+export default chatService;
--- a/src/services/llm/constants/llm_prompt_constants.js
+++ b/src/services/llm/constants/llm_prompt_constants.js
@ -0,0 +1,70 @@
+/**
+ * Constants related to LLM prompts and messaging
+ */
+
+/**
+ * System prompts for different use cases
+ */
+export const SYSTEM_PROMPTS = {
+    DEFAULT_SYSTEM_PROMPT: 
+        "You are an intelligent AI assistant for Trilium Notes, a hierarchical note-taking application. " +
+        "Help the user with their notes, knowledge management, and questions. " +
+        "When referencing their notes, be clear about which note you're referring to. " +
+        "Be concise but thorough in your responses.",
+    
+    AGENT_TOOLS_PROMPT:
+        "You are an intelligent AI assistant for Trilium Notes with access to special tools. " +
+        "You can use these tools to search through the user's notes and find relevant information. " +
+        "Always be helpful, accurate, and respect the user's privacy and security.",
+    
+    CONTEXT_AWARE_PROMPT:
+        "You are an intelligent AI assistant for Trilium Notes. " +
+        "You have access to the context from the user's notes. " +
+        "Use this context to provide accurate and helpful responses. " +
+        "Be specific when referencing information from their notes."
+};
+
+/**
+ * Prompts related to context handling
+ */
+export const CONTEXT_PROMPTS = {
+    NOTE_CONTEXT_PROMPT:
+        "Here is context from my current note to help you understand what I'm working on: {context}",
+    
+    SEMANTIC_NOTE_CONTEXT_PROMPT:
+        "I'm asking about: {query}\n\nHere's relevant information from my notes: {context}",
+    
+    AGENT_TOOLS_CONTEXT_PROMPT:
+        "You have access to the following tools to help answer the user's question: {tools}",
+    
+    INDEX_NO_NOTES_CONTEXT:
+        "I couldn't find any directly relevant information in your notes about this query. " +
+        "I'll try to help based on my general knowledge, but please note that I may not have all the specific details you need."
+};
+
+/**
+ * Error prompts for different scenarios
+ */
+export const ERROR_PROMPTS = {
+    USER_ERRORS: {
+        GENERAL_ERROR: 
+            "I'm sorry, but I encountered an error while processing your request. " +
+            "Please try again or rephrase your question.",
+        
+        CONTEXT_ERROR:
+            "I'm sorry, but I encountered an error while retrieving context from your notes. " +
+            "I'll try to help based on what I know, but I might be missing important context.",
+        
+        PROVIDER_ERROR:
+            "I'm sorry, but there seems to be an issue with the AI service provider. " +
+            "Please check your connection and API settings, or try again later."
+    },
+    
+    SYSTEM_ERRORS: {
+        NO_PROVIDER_AVAILABLE:
+            "No AI provider is available. Please check your AI settings and ensure at least one provider is configured properly.",
+        
+        UNAUTHORIZED:
+            "The AI provider returned an authorization error. Please check your API key settings."
+    }
+};
--- a/src/services/llm/pipeline/README.md
+++ b/src/services/llm/pipeline/README.md
@ -0,0 +1,417 @@
+# Trilium LLM Pipeline Documentation
+
+This document provides an overview of the LLM Pipeline architecture in Trilium Notes, explains the data flow, and provides instructions for extending the pipeline with new providers or stages.
+
+## Overview
+
+The LLM Pipeline is a modular architecture that handles the flow of data for LLM chat interactions in Trilium Notes. It breaks down the complex process of context retrieval, message preparation, model selection, completion generation, and response processing into separate, reusable stages.
+
+## Pipeline Data Flow
+
+```mermaid
+flowchart TD
+    Input[Chat Input] --> Pipeline
+    
+    subgraph Pipeline
+        direction TB
+        Context[Context Extraction] --> MessagePrep[Message Preparation]
+        MessagePrep --> ModelSelection[Model Selection]
+        ModelSelection --> LLMCompletion[LLM Completion]
+        LLMCompletion --> ResponseProcess[Response Processing]
+    end
+    
+    Pipeline --> Output[Chat Response]
+    
+    subgraph Optional
+        direction TB
+        ToolExecution[Tool Execution]
+    end
+    
+    ResponseProcess -.-> ToolExecution
+    ToolExecution -.-> MessagePrep
+```
+
+## Pipeline Architecture
+
+The pipeline is composed of modular stages that can be configured for different use cases:
+
+```mermaid
+classDiagram
+    class ChatPipeline {
+        +stages
+        +config: ChatPipelineConfig
+        +metrics: PipelineMetrics
+        +constructor(config?: ChatPipelineConfig)
+        +execute(input: ChatPipelineInput): Promise~ChatResponse~
+        +getMetrics(): PipelineMetrics
+        +resetMetrics(): void
+    }
+    
+    class BasePipelineStage {
+        +name: string
+        +execute(input: PipelineInput): Promise~PipelineOutput~
+        #process(input: PipelineInput): Promise~PipelineOutput~
+    }
+    
+    ChatPipeline --> ContextExtractionStage
+    ChatPipeline --> SemanticContextExtractionStage
+    ChatPipeline --> AgentToolsContextStage
+    ChatPipeline --> MessagePreparationStage
+    ChatPipeline --> ModelSelectionStage
+    ChatPipeline --> LLMCompletionStage
+    ChatPipeline --> ResponseProcessingStage
+    
+    BasePipelineStage <|-- ContextExtractionStage
+    BasePipelineStage <|-- SemanticContextExtractionStage
+    BasePipelineStage <|-- AgentToolsContextStage
+    BasePipelineStage <|-- MessagePreparationStage
+    BasePipelineStage <|-- ModelSelectionStage
+    BasePipelineStage <|-- LLMCompletionStage
+    BasePipelineStage <|-- ResponseProcessingStage
+```
+
+## Pipeline Stages
+
+Each stage in the pipeline has a specific responsibility:
+
+1. **Context Extraction**: Retrieves relevant context from notes based on user query
+2. **Message Preparation**: Formats messages with context in provider-specific way
+3. **Model Selection**: Chooses appropriate model based on query complexity
+4. **LLM Completion**: Generates completion using the selected provider and model
+5. **Response Processing**: Post-processes the response (handles "thinking" output, formats markdown, etc.)
+
+## Provider-Specific Message Formatting
+
+Different LLM providers handle context in different ways for optimal performance:
+
+```mermaid
+flowchart TD
+    Messages[User Messages] --> Formatter[Message Formatter]
+    Context[Note Context] --> Formatter
+    SystemPrompt[System Prompt] --> Formatter
+    
+    Formatter --> Factory{Provider Type}
+    Factory -->|OpenAI| OpenAIFormatter[OpenAI Formatter]
+    Factory -->|Claude| ClaudeFormatter[Claude Formatter]
+    Factory -->|Ollama| OllamaFormatter[Ollama Formatter]
+    Factory -->|Other| DefaultFormatter[Default Formatter]
+    
+    OpenAIFormatter --> OpenAIMessage[Optimized Messages]
+    ClaudeFormatter --> ClaudeMessage[Optimized Messages]
+    OllamaFormatter --> OllamaMessage[Optimized Messages]
+    DefaultFormatter --> DefaultMessage[Generic Messages]
+```
+
+## Multiple Pipeline Configurations
+
+The chat service now supports multiple pipeline configurations for different use cases:
+
+```mermaid
+flowchart LR
+    ChatService --> DefaultPipeline[Default Pipeline]
+    ChatService --> AgentPipeline[Agent Pipeline]
+    ChatService --> PerformancePipeline[Performance Pipeline]
+    
+    DefaultPipeline --> DefaultConfig[enableStreaming: true<br>enableMetrics: true]
+    AgentPipeline --> AgentConfig[enableStreaming: true<br>enableMetrics: true<br>maxToolCallIterations: 5]
+    PerformancePipeline --> PerformanceConfig[enableStreaming: false<br>enableMetrics: true]
+```
+
+## Adding a New LLM Provider
+
+To add a new LLM provider to Trilium, follow these steps:
+
+### 1. Implement the AIService Interface
+
+Create a new file in `src/services/llm/providers/your_provider_service.ts`:
+
+```typescript
+import type { Message, ChatCompletionOptions, ChatResponse, AIService } from '../ai_interface.js';
+import log from '../../log.js';
+import options from '../../options.js';
+
+export class YourProviderService implements AIService {
+    async generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse> {
+        // Implement API call to your provider
+        // Return response in standardized format
+        return {
+            text: "Response text from your provider",
+            model: options?.model || "default-model",
+            provider: "your-provider-name"
+        };
+    }
+
+    isAvailable(): boolean {
+        // Check if API key or other required config exists
+        const apiKey = options.getOption('yourProviderApiKey');
+        return !!apiKey;
+    }
+
+    getName(): string {
+        return 'your-provider-name';
+    }
+}
+```
+
+### 2. Create a Message Formatter
+
+Create a custom message formatter in `src/services/llm/pipeline/interfaces/message_formatter.ts`:
+
+```typescript
+export class YourProviderMessageFormatter extends BaseMessageFormatter {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        // Format messages optimally for your provider
+        const formattedMessages: Message[] = [];
+        
+        // Add system message
+        if (systemPrompt) {
+            formattedMessages.push({
+                role: 'system',
+                content: systemPrompt
+            });
+        }
+        
+        // Format context according to provider's best practices
+        if (context) {
+            // Example: Add context in provider-specific format
+            formattedMessages.push({
+                role: 'user',
+                content: `Reference information: ${context}`
+            });
+        }
+        
+        // Add the rest of messages
+        formattedMessages.push(...this.getMessagesWithoutSystem(messages));
+        
+        return formattedMessages;
+    }
+}
+
+// Register your formatter with the factory
+MessageFormatterFactory.registerFormatter('your-provider-name', new YourProviderMessageFormatter());
+```
+
+### 3. Register the Provider in AIServiceManager
+
+Update `src/services/llm/ai_service_manager.ts`:
+
+```typescript
+import { YourProviderService } from './providers/your_provider_service.js';
+
+// Add your provider to the services object
+private services: Record<ServiceProviders, AIService> = {
+    openai: new OpenAIService(),
+    anthropic: new AnthropicService(),
+    ollama: new OllamaService(),
+    'your-provider-name': new YourProviderService()
+};
+
+// Add it to the default provider order
+private providerOrder: ServiceProviders[] = ['openai', 'anthropic', 'ollama', 'your-provider-name'];
+```
+
+### 4. Add Provider Settings to Options
+
+In the appropriate UI file, add settings for your provider:
+
+```typescript
+// Example settings
+{
+    name: 'yourProviderApiKey',
+    value: '',
+    isSensitive: true
+},
+{
+    name: 'yourProviderDefaultModel',
+    value: 'default-model-name'
+}
+```
+
+## Using the Pipeline via ChatService
+
+The ChatService now provides a unified `processMessage` method for all chat interactions:
+
+```typescript
+import chatService from '../services/llm/chat_service.js';
+
+// Process a regular message
+const session = await chatService.processMessage(
+    'session-id', 
+    'What can you tell me about this note?', 
+    {
+        chatOptions: { 
+            model: 'openai:gpt-4',
+            temperature: 0.7
+        },
+        streamCallback: (text, isDone) => {
+            console.log('Received text:', text);
+        }
+    }
+);
+
+// Process a context-aware message
+const session = await chatService.processMessage(
+    'session-id', 
+    'What can you tell me about this note?', 
+    {
+        noteId: 'note-id-for-context',
+        chatOptions: { showThinking: true },
+        pipelineType: 'agent' // Use agent pipeline
+    }
+);
+```
+
+## Using Pipeline Configurations
+
+You can create specialized pipelines for different use cases:
+
+```typescript
+const PIPELINE_CONFIGS = {
+    default: {
+        enableStreaming: true,
+        enableMetrics: true
+    },
+    agent: {
+        enableStreaming: true,
+        enableMetrics: true,
+        maxToolCallIterations: 5
+    },
+    performance: {
+        enableStreaming: false,
+        enableMetrics: true
+    }
+};
+
+// Create a pipeline with custom config
+const pipeline = new ChatPipeline(PIPELINE_CONFIGS.agent);
+```
+
+## Pipeline Metrics
+
+The pipeline now includes built-in performance metrics:
+
+```typescript
+// Get pipeline metrics
+const metrics = chatService.getPipelineMetrics('default');
+console.log('Total executions:', metrics.totalExecutions);
+console.log('Average execution time:', metrics.averageExecutionTime, 'ms');
+
+// Get stage-specific metrics
+for (const [stage, stageMetrics] of Object.entries(metrics.stageMetrics)) {
+    console.log(`Stage ${stage}:`, stageMetrics.averageExecutionTime, 'ms');
+}
+
+// Reset metrics
+chatService.resetPipelineMetrics();
+```
+
+## Streaming Support
+
+The pipeline now has built-in streaming support:
+
+```typescript
+// Create a pipeline with streaming enabled
+const pipeline = new ChatPipeline({ enableStreaming: true });
+
+// Execute with streaming callback
+const response = await pipeline.execute({
+    messages: [...],
+    options: { stream: true },
+    streamCallback: (text, isDone) => {
+        // Update UI with streaming response
+        updateChatUI(text);
+    }
+});
+```
+
+## Extending the Pipeline with Custom Stages
+
+You can create custom pipeline stages:
+
+1. Define your stage interface in `interfaces.ts`
+2. Create a new stage class that extends `BasePipelineStage`
+3. Instantiate your stage in the `ChatPipeline` constructor
+4. Modify the `execute` method to include your stage
+
+Example custom stage:
+
+```typescript
+export class CustomStage extends BasePipelineStage<CustomInput, CustomOutput> {
+    constructor() {
+        super('CustomStage');
+    }
+    
+    protected async process(input: CustomInput): Promise<CustomOutput> {
+        // Process input and return output
+        return { result: 'processed data' };
+    }
+}
+```
+
+## Tool Execution for Agentic Features
+
+For implementing agentic features with tool execution:
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Pipeline as LLM Pipeline
+    participant LLM
+    participant Tools as Tool Executor
+    
+    User->>Pipeline: User Query
+    Pipeline->>LLM: Formatted Query + Context
+    LLM->>Pipeline: Response with Tool Calls
+    Pipeline->>Tools: Execute Tool Calls
+    Tools->>Pipeline: Tool Results
+    Pipeline->>LLM: Original Messages + Tool Results
+    LLM->>Pipeline: Final Response
+    Pipeline->>User: Formatted Response
+```
+
+To implement tool execution:
+
+1. Create a new `ToolExecutionStage` that:
+   - Parses tool calls from LLM response
+   - Executes the appropriate tools
+   - Returns results formatted for next LLM call
+   
+2. Modify the pipeline to recursively handle tool calls:
+   - If response contains tool calls, execute tools
+   - Feed results back to message preparation
+   - Call LLM completion again with updated messages
+   - Repeat until no more tool calls or max iterations reached
+   - The pipeline already has a `maxToolCallIterations` config for this purpose
+
+## Error Handling
+
+All stages include built-in error handling. Errors are logged and propagated up the pipeline, where they're caught and displayed to the user as a friendly error message.
+
+To add custom error handling to a stage:
+
+```typescript
+protected async process(input: YourInput): Promise<YourOutput> {
+    try {
+        // Your processing logic
+    } catch (error) {
+        log.error(`Custom error in stage: ${error.message}`);
+        throw new Error('User-friendly error message');
+    }
+}
+```
+
+## Performance Considerations
+
+The pipeline implements several performance optimizations:
+
+1. **Lazy Loading**: Components are only initialized when needed
+2. **Caching**: Context extraction results are cached when possible
+3. **Response Streaming**: Supports streaming for immediate feedback
+4. **Performance Metrics**: Built-in timing metrics for each stage
+
+When extending the pipeline, consider these best practices:
+
+- Use the built-in metrics to identify bottlenecks
+- Cache expensive operations
+- Consider using the "performance" pipeline configuration for use cases where streaming isn't needed
+- Use the appropriate level of context for the query complexity
--- a/src/services/llm/pipeline/chat_pipeline.ts
+++ b/src/services/llm/pipeline/chat_pipeline.ts
@ -0,0 +1,269 @@
+import type { ChatPipelineInput, ChatPipelineConfig, PipelineMetrics, StreamCallback } from './interfaces.js';
+import type { ChatResponse, StreamChunk } from '../ai_interface.js';
+import { ContextExtractionStage } from './stages/context_extraction_stage.js';
+import { SemanticContextExtractionStage } from './stages/semantic_context_extraction_stage.js';
+import { AgentToolsContextStage } from './stages/agent_tools_context_stage.js';
+import { MessagePreparationStage } from './stages/message_preparation_stage.js';
+import { ModelSelectionStage } from './stages/model_selection_stage.js';
+import { LLMCompletionStage } from './stages/llm_completion_stage.js';
+import { ResponseProcessingStage } from './stages/response_processing_stage.js';
+import log from '../../log.js';
+
+/**
+ * Pipeline for managing the entire chat flow
+ * Implements a modular, composable architecture where each stage is a separate component
+ */
+export class ChatPipeline {
+    stages: {
+        contextExtraction: ContextExtractionStage;
+        semanticContextExtraction: SemanticContextExtractionStage;
+        agentToolsContext: AgentToolsContextStage;
+        messagePreparation: MessagePreparationStage;
+        modelSelection: ModelSelectionStage;
+        llmCompletion: LLMCompletionStage;
+        responseProcessing: ResponseProcessingStage;
+    };
+
+    config: ChatPipelineConfig;
+    metrics: PipelineMetrics;
+
+    /**
+     * Create a new chat pipeline
+     * @param config Optional pipeline configuration
+     */
+    constructor(config?: Partial<ChatPipelineConfig>) {
+        // Initialize all pipeline stages
+        this.stages = {
+            contextExtraction: new ContextExtractionStage(),
+            semanticContextExtraction: new SemanticContextExtractionStage(),
+            agentToolsContext: new AgentToolsContextStage(),
+            messagePreparation: new MessagePreparationStage(),
+            modelSelection: new ModelSelectionStage(),
+            llmCompletion: new LLMCompletionStage(),
+            responseProcessing: new ResponseProcessingStage()
+        };
+
+        // Set default configuration values
+        this.config = {
+            enableStreaming: true,
+            enableMetrics: true,
+            maxToolCallIterations: 5,
+            ...config
+        };
+
+        // Initialize metrics
+        this.metrics = {
+            totalExecutions: 0,
+            averageExecutionTime: 0,
+            stageMetrics: {}
+        };
+
+        // Initialize stage metrics
+        Object.keys(this.stages).forEach(stageName => {
+            this.metrics.stageMetrics[stageName] = {
+                totalExecutions: 0,
+                averageExecutionTime: 0
+            };
+        });
+    }
+
+    /**
+     * Execute the chat pipeline
+     * This is the main entry point that orchestrates all pipeline stages
+     */
+    async execute(input: ChatPipelineInput): Promise<ChatResponse> {
+        log.info(`Executing chat pipeline with ${input.messages.length} messages`);
+        const startTime = Date.now();
+        this.metrics.totalExecutions++;
+
+        // Initialize streaming handler if requested
+        let streamCallback = input.streamCallback;
+        let accumulatedText = '';
+
+        try {
+            // Extract content length for model selection
+            let contentLength = 0;
+            for (const message of input.messages) {
+                contentLength += message.content.length;
+            }
+
+            // Determine which pipeline flow to use
+            let context: string | undefined;
+
+            // For context-aware chats, get the appropriate context
+            if (input.noteId && input.query) {
+                const contextStartTime = Date.now();
+                if (input.showThinking) {
+                    // Get enhanced context with agent tools if thinking is enabled
+                    const agentContext = await this.stages.agentToolsContext.execute({
+                        noteId: input.noteId,
+                        query: input.query,
+                        showThinking: input.showThinking
+                    });
+                    context = agentContext.context;
+                    this.updateStageMetrics('agentToolsContext', contextStartTime);
+                } else {
+                    // Get semantic context for regular queries
+                    const semanticContext = await this.stages.semanticContextExtraction.execute({
+                        noteId: input.noteId,
+                        query: input.query
+                    });
+                    context = semanticContext.context;
+                    this.updateStageMetrics('semanticContextExtraction', contextStartTime);
+                }
+            }
+
+            // Select the appropriate model based on query complexity and content length
+            const modelSelectionStartTime = Date.now();
+            const modelSelection = await this.stages.modelSelection.execute({
+                options: input.options,
+                query: input.query,
+                contentLength
+            });
+            this.updateStageMetrics('modelSelection', modelSelectionStartTime);
+
+            // Prepare messages with context and system prompt
+            const messagePreparationStartTime = Date.now();
+            const preparedMessages = await this.stages.messagePreparation.execute({
+                messages: input.messages,
+                context,
+                systemPrompt: input.options?.systemPrompt,
+                options: modelSelection.options
+            });
+            this.updateStageMetrics('messagePreparation', messagePreparationStartTime);
+
+            // Generate completion using the LLM
+            const llmStartTime = Date.now();
+
+            // Setup streaming handler if streaming is enabled and callback provided
+            const enableStreaming = this.config.enableStreaming && 
+                                  modelSelection.options.stream !== false &&
+                                  typeof streamCallback === 'function';
+            
+            if (enableStreaming) {
+                // Make sure stream is enabled in options
+                modelSelection.options.stream = true;
+            }
+
+            const completion = await this.stages.llmCompletion.execute({
+                messages: preparedMessages.messages,
+                options: modelSelection.options
+            });
+            this.updateStageMetrics('llmCompletion', llmStartTime);
+
+            // Handle streaming if enabled and available
+            if (enableStreaming && completion.response.stream && streamCallback) {
+                // Setup stream handler that passes chunks through response processing
+                await completion.response.stream(async (chunk: StreamChunk) => {
+                    // Process the chunk text
+                    const processedChunk = await this.processStreamChunk(chunk, input.options);
+                    
+                    // Accumulate text for final response
+                    accumulatedText += processedChunk.text;
+                    
+                    // Forward to callback
+                    await streamCallback!(processedChunk.text, processedChunk.done);
+                });
+            }
+
+            // For non-streaming responses, process the full response
+            const processStartTime = Date.now();
+            const processed = await this.stages.responseProcessing.execute({
+                response: completion.response,
+                options: input.options
+            });
+            this.updateStageMetrics('responseProcessing', processStartTime);
+
+            // Combine response with processed text, using accumulated text if streamed
+            const finalResponse: ChatResponse = {
+                ...completion.response,
+                text: accumulatedText || processed.text
+            };
+
+            const endTime = Date.now();
+            const executionTime = endTime - startTime;
+            
+            // Update overall average execution time
+            this.metrics.averageExecutionTime = 
+                (this.metrics.averageExecutionTime * (this.metrics.totalExecutions - 1) + executionTime) /
+                this.metrics.totalExecutions;
+                
+            log.info(`Chat pipeline completed in ${executionTime}ms`);
+
+            return finalResponse;
+        } catch (error: any) {
+            log.error(`Error in chat pipeline: ${error.message}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Process a stream chunk through the response processing stage
+     */
+    private async processStreamChunk(chunk: StreamChunk, options?: any): Promise<StreamChunk> {
+        try {
+            // Only process non-empty chunks
+            if (!chunk.text) return chunk;
+
+            // Create a minimal response object for the processor
+            const miniResponse = {
+                text: chunk.text,
+                model: 'streaming',
+                provider: 'streaming'
+            };
+
+            // Process the chunk text
+            const processed = await this.stages.responseProcessing.execute({
+                response: miniResponse,
+                options: options
+            });
+
+            // Return processed chunk
+            return {
+                ...chunk,
+                text: processed.text
+            };
+        } catch (error) {
+            // On error, return original chunk
+            log.error(`Error processing stream chunk: ${error}`);
+            return chunk;
+        }
+    }
+
+    /**
+     * Update metrics for a pipeline stage
+     */
+    private updateStageMetrics(stageName: string, startTime: number) {
+        if (!this.config.enableMetrics) return;
+        
+        const executionTime = Date.now() - startTime;
+        const metrics = this.metrics.stageMetrics[stageName];
+        
+        metrics.totalExecutions++;
+        metrics.averageExecutionTime = 
+            (metrics.averageExecutionTime * (metrics.totalExecutions - 1) + executionTime) /
+            metrics.totalExecutions;
+    }
+
+    /**
+     * Get the current pipeline metrics
+     */
+    getMetrics(): PipelineMetrics {
+        return this.metrics;
+    }
+
+    /**
+     * Reset pipeline metrics
+     */
+    resetMetrics(): void {
+        this.metrics.totalExecutions = 0;
+        this.metrics.averageExecutionTime = 0;
+        
+        Object.keys(this.metrics.stageMetrics).forEach(stageName => {
+            this.metrics.stageMetrics[stageName] = {
+                totalExecutions: 0,
+                averageExecutionTime: 0
+            };
+        });
+    }
+}
--- a/src/services/llm/pipeline/interfaces.ts
+++ b/src/services/llm/pipeline/interfaces.ts
@ -0,0 +1,140 @@
+import type { Message, ChatCompletionOptions, ChatResponse, StreamChunk } from '../ai_interface.js';
+
+/**
+ * Base interface for pipeline input
+ */
+export interface PipelineInput {
+    [key: string]: any;
+}
+
+/**
+ * Pipeline configuration options
+ */
+export interface ChatPipelineConfig {
+    /**
+     * Whether to enable streaming support
+     */
+    enableStreaming: boolean;
+    
+    /**
+     * Whether to enable performance metrics
+     */
+    enableMetrics: boolean;
+    
+    /**
+     * Maximum number of tool call iterations
+     */
+    maxToolCallIterations: number;
+}
+
+/**
+ * Pipeline metrics for monitoring performance
+ */
+export interface PipelineMetrics {
+    totalExecutions: number;
+    averageExecutionTime: number;
+    stageMetrics: Record<string, StageMetrics>;
+}
+
+/**
+ * Metrics for an individual pipeline stage
+ */
+export interface StageMetrics {
+    totalExecutions: number;
+    averageExecutionTime: number;
+}
+
+/**
+ * Callback for handling stream chunks
+ */
+export type StreamCallback = (text: string, isDone: boolean) => Promise<void> | void;
+
+/**
+ * Common input for all chat-related pipeline stages
+ */
+export interface ChatPipelineInput extends PipelineInput {
+    messages: Message[];
+    options?: ChatCompletionOptions;
+    noteId?: string;
+    query?: string;
+    showThinking?: boolean;
+    streamCallback?: StreamCallback;
+}
+
+/**
+ * Base interface for pipeline stage output
+ */
+export interface PipelineOutput {
+    [key: string]: any;
+}
+
+/**
+ * Interface for the pipeline stage that performs context extraction
+ */
+export interface ContextExtractionInput extends PipelineInput {
+    noteId: string;
+    query: string;
+    useSmartContext?: boolean;
+}
+
+/**
+ * Interface for the pipeline stage that performs semantic context extraction
+ */
+export interface SemanticContextExtractionInput extends PipelineInput {
+    noteId: string;
+    query: string;
+    maxResults?: number;
+}
+
+/**
+ * Interface for the pipeline stage that performs message preparation
+ */
+export interface MessagePreparationInput extends PipelineInput {
+    messages: Message[];
+    context?: string;
+    systemPrompt?: string;
+    options?: ChatCompletionOptions;
+}
+
+/**
+ * Interface for the pipeline stage that performs model selection
+ */
+export interface ModelSelectionInput extends PipelineInput {
+    options?: ChatCompletionOptions;
+    query?: string;
+    contentLength?: number;
+}
+
+/**
+ * Interface for the pipeline stage that performs LLM completion
+ */
+export interface LLMCompletionInput extends PipelineInput {
+    messages: Message[];
+    options?: ChatCompletionOptions;
+    provider?: string;
+}
+
+/**
+ * Interface for the pipeline stage that performs response processing
+ */
+export interface ResponseProcessingInput extends PipelineInput {
+    response: ChatResponse;
+    options?: ChatCompletionOptions;
+}
+
+/**
+ * Interface for the pipeline stage that handles tool execution
+ */
+export interface ToolExecutionInput extends PipelineInput {
+    response: ChatResponse;
+    messages: Message[];
+    options?: ChatCompletionOptions;
+}
+
+/**
+ * Base interface for a pipeline stage
+ */
+export interface PipelineStage<TInput extends PipelineInput, TOutput extends PipelineOutput> {
+    name: string;
+    execute(input: TInput): Promise<TOutput>;
+}
--- a/src/services/llm/pipeline/interfaces/message_formatter.ts
+++ b/src/services/llm/pipeline/interfaces/message_formatter.ts
@ -0,0 +1,215 @@
+import type { Message } from '../../ai_interface.js';
+
+/**
+ * Interface for message formatters that handle provider-specific message formatting
+ */
+export interface MessageFormatter {
+    /**
+     * Format messages with system prompt and context in provider-specific way
+     * @param messages Original messages
+     * @param systemPrompt Optional system prompt to override
+     * @param context Optional context to include
+     * @returns Formatted messages optimized for the specific provider
+     */
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+}
+
+/**
+ * Base message formatter with common functionality
+ */
+export abstract class BaseMessageFormatter implements MessageFormatter {
+    /**
+     * Format messages with system prompt and context
+     * Each provider should override this method with their specific formatting strategy
+     */
+    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+    
+    /**
+     * Helper method to extract existing system message from messages
+     */
+    protected getSystemMessage(messages: Message[]): Message | undefined {
+        return messages.find(msg => msg.role === 'system');
+    }
+    
+    /**
+     * Helper method to create a copy of messages without system message
+     */
+    protected getMessagesWithoutSystem(messages: Message[]): Message[] {
+        return messages.filter(msg => msg.role !== 'system');
+    }
+}
+
+/**
+ * OpenAI-specific message formatter
+ * Optimizes message format for OpenAI models (GPT-3.5, GPT-4, etc.)
+ */
+export class OpenAIMessageFormatter extends BaseMessageFormatter {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+        
+        // OpenAI performs best with system message first, then context as a separate system message
+        // or appended to the original system message
+        
+        // Handle system message
+        const existingSystem = this.getSystemMessage(messages);
+        if (systemPrompt || existingSystem) {
+            const systemContent = systemPrompt || existingSystem?.content || '';
+            formattedMessages.push({
+                role: 'system',
+                content: systemContent
+            });
+        }
+        
+        // Add context as a system message with clear instruction
+        if (context) {
+            formattedMessages.push({
+                role: 'system',
+                content: `Please use the following context to respond to the user's messages:\n\n${context}`
+            });
+        }
+        
+        // Add remaining messages (excluding system)
+        formattedMessages.push(...this.getMessagesWithoutSystem(messages));
+        
+        return formattedMessages;
+    }
+}
+
+/**
+ * Anthropic-specific message formatter
+ * Optimizes message format for Claude models
+ */
+export class AnthropicMessageFormatter extends BaseMessageFormatter {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+        
+        // Anthropic performs best with a specific XML-like format for context and system instructions
+        
+        // Create system message with combined prompt and context if any
+        let systemContent = '';
+        const existingSystem = this.getSystemMessage(messages);
+        
+        if (systemPrompt || existingSystem) {
+            systemContent = systemPrompt || existingSystem?.content || '';
+        }
+        
+        // For Claude, wrap context in XML tags for clear separation
+        if (context) {
+            systemContent += `\n\n<context>\n${context}\n</context>`;
+        }
+        
+        // Add system message if we have content
+        if (systemContent) {
+            formattedMessages.push({
+                role: 'system',
+                content: systemContent
+            });
+        }
+        
+        // Add remaining messages (excluding system)
+        formattedMessages.push(...this.getMessagesWithoutSystem(messages));
+        
+        return formattedMessages;
+    }
+}
+
+/**
+ * Ollama-specific message formatter
+ * Optimizes message format for open-source models
+ */
+export class OllamaMessageFormatter extends BaseMessageFormatter {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+        
+        // Ollama format is closer to raw prompting and typically works better with
+        // context embedded in system prompt rather than as separate messages
+        
+        // Build comprehensive system prompt
+        let systemContent = '';
+        const existingSystem = this.getSystemMessage(messages);
+        
+        if (systemPrompt || existingSystem) {
+            systemContent = systemPrompt || existingSystem?.content || '';
+        }
+        
+        // Add context to system prompt
+        if (context) {
+            systemContent += `\n\nReference information:\n${context}`;
+        }
+        
+        // Add system message if we have content
+        if (systemContent) {
+            formattedMessages.push({
+                role: 'system',
+                content: systemContent
+            });
+        }
+        
+        // Add remaining messages (excluding system)
+        formattedMessages.push(...this.getMessagesWithoutSystem(messages));
+        
+        return formattedMessages;
+    }
+}
+
+/**
+ * Default message formatter when provider is unknown
+ */
+export class DefaultMessageFormatter extends BaseMessageFormatter {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+        const formattedMessages: Message[] = [];
+        
+        // Handle system message
+        const existingSystem = this.getSystemMessage(messages);
+        if (systemPrompt || existingSystem) {
+            const systemContent = systemPrompt || existingSystem?.content || '';
+            formattedMessages.push({
+                role: 'system',
+                content: systemContent
+            });
+        }
+        
+        // Add context as a user message
+        if (context) {
+            formattedMessages.push({
+                role: 'user',
+                content: `Here is context to help you answer my questions: ${context}`
+            });
+        }
+        
+        // Add user/assistant messages
+        formattedMessages.push(...this.getMessagesWithoutSystem(messages));
+        
+        return formattedMessages;
+    }
+}
+
+/**
+ * Factory for creating the appropriate message formatter based on provider
+ */
+export class MessageFormatterFactory {
+    private static formatters: Record<string, MessageFormatter> = {
+        openai: new OpenAIMessageFormatter(),
+        anthropic: new AnthropicMessageFormatter(),
+        ollama: new OllamaMessageFormatter(),
+        default: new DefaultMessageFormatter()
+    };
+    
+    /**
+     * Get the appropriate formatter for a provider
+     * @param provider Provider name
+     * @returns Message formatter for that provider
+     */
+    static getFormatter(provider: string): MessageFormatter {
+        return this.formatters[provider] || this.formatters.default;
+    }
+    
+    /**
+     * Register a custom formatter for a provider
+     * @param provider Provider name
+     * @param formatter Custom formatter implementation
+     */
+    static registerFormatter(provider: string, formatter: MessageFormatter): void {
+        this.formatters[provider] = formatter;
+    }
+}
--- a/src/services/llm/pipeline/pipeline_stage.ts
+++ b/src/services/llm/pipeline/pipeline_stage.ts
@ -0,0 +1,36 @@
+import type { PipelineInput, PipelineOutput, PipelineStage } from './interfaces.js';
+import log from '../../log.js';
+
+/**
+ * Abstract base class for pipeline stages
+ */
+export abstract class BasePipelineStage<TInput extends PipelineInput, TOutput extends PipelineOutput> implements PipelineStage<TInput, TOutput> {
+    name: string;
+
+    constructor(name: string) {
+        this.name = name;
+    }
+
+    /**
+     * Execute the pipeline stage
+     */
+    async execute(input: TInput): Promise<TOutput> {
+        try {
+            log.info(`Executing pipeline stage: ${this.name}`);
+            const startTime = Date.now();
+            const result = await this.process(input);
+            const endTime = Date.now();
+            log.info(`Pipeline stage ${this.name} completed in ${endTime - startTime}ms`);
+            return result;
+        } catch (error: any) {
+            log.error(`Error in pipeline stage ${this.name}: ${error.message}`);
+            throw error;
+        }
+    }
+
+    /**
+     * Process the input and produce output
+     * This is the main method that each pipeline stage must implement
+     */
+    protected abstract process(input: TInput): Promise<TOutput>;
+}
--- a/src/services/llm/pipeline/stages/agent_tools_context_stage.ts
+++ b/src/services/llm/pipeline/stages/agent_tools_context_stage.ts
@ -0,0 +1,32 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { PipelineInput } from '../interfaces.js';
+import aiServiceManager from '../../ai_service_manager.js';
+import log from '../../../log.js';
+
+interface AgentToolsContextInput extends PipelineInput {
+    noteId: string;
+    query: string;
+    showThinking?: boolean;
+}
+
+/**
+ * Pipeline stage for retrieving agent tools context
+ */
+export class AgentToolsContextStage extends BasePipelineStage<AgentToolsContextInput, { context: string }> {
+    constructor() {
+        super('AgentToolsContext');
+    }
+
+    /**
+     * Get enhanced context with agent tools
+     */
+    protected async process(input: AgentToolsContextInput): Promise<{ context: string }> {
+        const { noteId, query, showThinking = false } = input;
+        log.info(`Getting agent tools context for note ${noteId}, query: ${query?.substring(0, 50)}..., showThinking: ${showThinking}`);
+
+        const contextService = aiServiceManager.getContextService();
+        const context = await contextService.getAgentToolsContext(noteId, query, showThinking);
+
+        return { context };
+    }
+}
--- a/src/services/llm/pipeline/stages/context_extraction_stage.ts
+++ b/src/services/llm/pipeline/stages/context_extraction_stage.ts
@ -0,0 +1,33 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { ContextExtractionInput } from '../interfaces.js';
+import aiServiceManager from '../../ai_service_manager.js';
+import log from '../../../log.js';
+
+/**
+ * Pipeline stage for extracting context from notes
+ */
+export class ContextExtractionStage extends BasePipelineStage<ContextExtractionInput, { context: string }> {
+    constructor() {
+        super('ContextExtraction');
+    }
+
+    /**
+     * Extract context from a note
+     */
+    protected async process(input: ContextExtractionInput): Promise<{ context: string }> {
+        const { noteId, query, useSmartContext = true } = input;
+        log.info(`Extracting context from note ${noteId}, query: ${query?.substring(0, 50)}...`);
+
+        let context: string;
+
+        if (useSmartContext && query) {
+            // Use smart context that considers the query for better relevance
+            context = await aiServiceManager.getContextService().getSmartContext(noteId, query);
+        } else {
+            // Fall back to full context if smart context is disabled or no query available
+            context = await aiServiceManager.getContextExtractor().getFullContext(noteId);
+        }
+
+        return { context };
+    }
+}
--- a/src/services/llm/pipeline/stages/llm_completion_stage.ts
+++ b/src/services/llm/pipeline/stages/llm_completion_stage.ts
@ -0,0 +1,34 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { LLMCompletionInput } from '../interfaces.js';
+import type { ChatResponse } from '../../ai_interface.js';
+import aiServiceManager from '../../ai_service_manager.js';
+import log from '../../../log.js';
+
+/**
+ * Pipeline stage for LLM completion
+ */
+export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, { response: ChatResponse }> {
+    constructor() {
+        super('LLMCompletion');
+    }
+
+    /**
+     * Generate LLM completion using the AI service
+     */
+    protected async process(input: LLMCompletionInput): Promise<{ response: ChatResponse }> {
+        const { messages, options, provider } = input;
+
+        log.info(`Generating LLM completion, provider: ${provider || 'auto'}, model: ${options?.model || 'default'}`);
+
+        // If provider is specified, use that specific provider
+        if (provider && aiServiceManager.isProviderAvailable(provider)) {
+            const service = aiServiceManager.getService(provider);
+            const response = await service.generateChatCompletion(messages, options);
+            return { response };
+        }
+
+        // Otherwise use the service manager to select an available provider
+        const response = await aiServiceManager.generateChatCompletion(messages, options);
+        return { response };
+    }
+}
--- a/src/services/llm/pipeline/stages/message_preparation_stage.ts
+++ b/src/services/llm/pipeline/stages/message_preparation_stage.ts
@ -0,0 +1,46 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { MessagePreparationInput } from '../interfaces.js';
+import type { Message } from '../../ai_interface.js';
+import { SYSTEM_PROMPTS } from '../../constants/llm_prompt_constants.js';
+import { MessageFormatterFactory } from '../interfaces/message_formatter.js';
+import log from '../../../log.js';
+
+/**
+ * Pipeline stage for preparing messages for LLM completion
+ */
+export class MessagePreparationStage extends BasePipelineStage<MessagePreparationInput, { messages: Message[] }> {
+    constructor() {
+        super('MessagePreparation');
+    }
+
+    /**
+     * Prepare messages for LLM completion, including system prompt and context
+     * This uses provider-specific formatters to optimize the message structure
+     */
+    protected async process(input: MessagePreparationInput): Promise<{ messages: Message[] }> {
+        const { messages, context, systemPrompt, options } = input;
+        
+        // Determine provider from model string if available (format: "provider:model")
+        let provider = 'default';
+        if (options?.model && options.model.includes(':')) {
+            const [providerName] = options.model.split(':');
+            provider = providerName;
+        }
+        
+        log.info(`Preparing messages for provider: ${provider}, context: ${!!context}, system prompt: ${!!systemPrompt}`);
+        
+        // Get appropriate formatter for this provider
+        const formatter = MessageFormatterFactory.getFormatter(provider);
+        
+        // Format messages using provider-specific approach
+        const formattedMessages = formatter.formatMessages(
+            messages,
+            systemPrompt || SYSTEM_PROMPTS.DEFAULT_SYSTEM_PROMPT,
+            context
+        );
+        
+        log.info(`Formatted ${messages.length} messages into ${formattedMessages.length} messages for provider: ${provider}`);
+        
+        return { messages: formattedMessages };
+    }
+}
--- a/src/services/llm/pipeline/stages/model_selection_stage.ts
+++ b/src/services/llm/pipeline/stages/model_selection_stage.ts
@ -0,0 +1,79 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { ModelSelectionInput } from '../interfaces.js';
+import type { ChatCompletionOptions } from '../../ai_interface.js';
+import log from '../../../log.js';
+import options from '../../../options.js';
+
+/**
+ * Pipeline stage for selecting the appropriate LLM model
+ */
+export class ModelSelectionStage extends BasePipelineStage<ModelSelectionInput, { options: ChatCompletionOptions }> {
+    constructor() {
+        super('ModelSelection');
+    }
+
+    /**
+     * Select the appropriate model based on input complexity
+     */
+    protected async process(input: ModelSelectionInput): Promise<{ options: ChatCompletionOptions }> {
+        const { options: inputOptions, query, contentLength } = input;
+
+        // Start with provided options or create a new object
+        const updatedOptions: ChatCompletionOptions = { ...(inputOptions || {}) };
+
+        // If model already specified, don't override it
+        if (updatedOptions.model) {
+            log.info(`Using explicitly specified model: ${updatedOptions.model}`);
+            return { options: updatedOptions };
+        }
+
+        // Get default model from options
+        const defaultModel = await options.getOption('aiDefaultModel') || 'openai:gpt-3.5-turbo';
+
+        // Determine query complexity
+        let queryComplexity = 'low';
+        if (query) {
+            // Simple heuristic: longer queries or those with complex terms indicate higher complexity
+            const complexityIndicators = [
+                'explain', 'analyze', 'compare', 'evaluate', 'synthesize',
+                'summarize', 'elaborate', 'investigate', 'research', 'debate'
+            ];
+
+            const hasComplexTerms = complexityIndicators.some(term => query.toLowerCase().includes(term));
+            const isLongQuery = query.length > 100;
+            const hasMultipleQuestions = (query.match(/\?/g) || []).length > 1;
+
+            if ((hasComplexTerms && isLongQuery) || hasMultipleQuestions) {
+                queryComplexity = 'high';
+            } else if (hasComplexTerms || isLongQuery) {
+                queryComplexity = 'medium';
+            }
+        }
+
+        // Check content length if provided
+        if (contentLength && contentLength > 5000) {
+            // For large content, favor more powerful models
+            queryComplexity = contentLength > 10000 ? 'high' : 'medium';
+        }
+
+        // Select model based on complexity
+        if (queryComplexity === 'high') {
+            // Use more powerful model for complex queries
+            const advancedModel = await options.getOption('aiAdvancedModel') || 'openai:gpt-4-turbo';
+            updatedOptions.model = advancedModel;
+            // May also increase context window and reduce temperature for complex tasks
+            if (!updatedOptions.temperature) updatedOptions.temperature = 0.3;
+        } else if (queryComplexity === 'medium') {
+            // Use standard model with moderate settings
+            updatedOptions.model = defaultModel;
+            if (!updatedOptions.temperature) updatedOptions.temperature = 0.5;
+        } else {
+            // Use default model with standard settings for simple queries
+            updatedOptions.model = defaultModel;
+            if (!updatedOptions.temperature) updatedOptions.temperature = 0.7;
+        }
+
+        log.info(`Selected model: ${updatedOptions.model} for query complexity: ${queryComplexity}`);
+        return { options: updatedOptions };
+    }
+}
--- a/src/services/llm/pipeline/stages/response_processing_stage.ts
+++ b/src/services/llm/pipeline/stages/response_processing_stage.ts
@ -0,0 +1,44 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { ResponseProcessingInput } from '../interfaces.js';
+import type { ChatResponse } from '../../ai_interface.js';
+import log from '../../../log.js';
+
+/**
+ * Pipeline stage for processing LLM responses
+ */
+export class ResponseProcessingStage extends BasePipelineStage<ResponseProcessingInput, { text: string }> {
+    constructor() {
+        super('ResponseProcessing');
+    }
+
+    /**
+     * Process the LLM response
+     */
+    protected async process(input: ResponseProcessingInput): Promise<{ text: string }> {
+        const { response, options } = input;
+        log.info(`Processing LLM response from model: ${response.model}`);
+
+        // Perform any necessary post-processing on the response text
+        let text = response.text;
+
+        // For Markdown formatting, ensure code blocks are properly formatted
+        if (options?.showThinking && text.includes('thinking:')) {
+            // Extract and format thinking section
+            const thinkingMatch = text.match(/thinking:(.*?)(?=answer:|$)/s);
+            if (thinkingMatch) {
+                const thinking = thinkingMatch[1].trim();
+                text = text.replace(/thinking:.*?(?=answer:|$)/s, `**Thinking:** \n\n\`\`\`\n${thinking}\n\`\`\`\n\n`);
+            }
+        }
+
+        // Clean up response text
+        text = text.replace(/^\s*assistant:\s*/i, ''); // Remove leading "Assistant:" if present
+
+        // Log tokens if available for monitoring
+        if (response.usage) {
+            log.info(`Token usage - prompt: ${response.usage.promptTokens}, completion: ${response.usage.completionTokens}, total: ${response.usage.totalTokens}`);
+        }
+
+        return { text };
+    }
+}
--- a/src/services/llm/pipeline/stages/semantic_context_extraction_stage.ts
+++ b/src/services/llm/pipeline/stages/semantic_context_extraction_stage.ts
@ -0,0 +1,26 @@
+import { BasePipelineStage } from '../pipeline_stage.js';
+import type { SemanticContextExtractionInput } from '../interfaces.js';
+import aiServiceManager from '../../ai_service_manager.js';
+import log from '../../../log.js';
+
+/**
+ * Pipeline stage for extracting semantic context from notes
+ */
+export class SemanticContextExtractionStage extends BasePipelineStage<SemanticContextExtractionInput, { context: string }> {
+    constructor() {
+        super('SemanticContextExtraction');
+    }
+
+    /**
+     * Extract semantic context based on a query
+     */
+    protected async process(input: SemanticContextExtractionInput): Promise<{ context: string }> {
+        const { noteId, query, maxResults = 5 } = input;
+        log.info(`Extracting semantic context from note ${noteId}, query: ${query?.substring(0, 50)}...`);
+
+        const contextService = aiServiceManager.getContextService();
+        const context = await contextService.getSemanticContext(noteId, query, maxResults);
+
+        return { context };
+    }
+}