Notes/apps/server/src/services/llm/ai_interface.ts

import type { ToolCall } from './tools/tool_interfaces.js';
import type { ModelMetadata } from './providers/provider_options.js';

/**
 * Interface for chat messages between client and LLM models
 */
export interface Message {
    role: 'user' | 'assistant' | 'system' | 'tool';
    content: string;
    name?: string;
    tool_call_id?: string;
    tool_calls?: ToolCall[] | null;
    sessionId?: string; // Optional session ID for WebSocket communication
}

// Define additional interfaces for tool-related types
export interface ToolChoice {
    type: 'none' | 'auto' | 'function';
    function?: {
        name: string;
    };
}

export interface ToolData {
    type: 'function';
    function: {
        name: string;
        description: string;
        parameters: Record<string, unknown>;
    };
}

export interface ToolExecutionInfo {
    type: 'start' | 'update' | 'complete' | 'error';
    tool: {
        name: string;
        arguments: Record<string, unknown>;
    };
    result?: string | Record<string, unknown>;
}

/**
 * Interface for streaming response chunks
 *
 * This is the standardized format for all streaming chunks across
 * different providers (OpenAI, Anthropic, Ollama, etc.).
 * The original provider-specific chunks are available through
 * the extended interface in the stream_manager.
 *
 * See STREAMING.md for complete documentation on streaming usage.
 */
export interface StreamChunk {
    /** The text content in this chunk (may be empty for status updates) */
    text: string;

    /** Whether this is the final chunk in the stream */
    done: boolean;

    /** Optional token usage statistics (rarely available in streaming mode) */
    usage?: {
        promptTokens?: number;
        completionTokens?: number;
        totalTokens?: number;
    };

    /**
     * Raw provider-specific data from the original response chunk
     * This can include thinking state, tool execution info, etc.
     */
    raw?: Record<string, unknown>;

    /**
     * Tool calls from the LLM (if any)
     * These may be accumulated over multiple chunks during streaming
     */
    tool_calls?: ToolCall[];

    /**
     * Tool execution information during streaming
     * Includes tool name, args, and execution status
     */
    toolExecution?: ToolExecutionInfo;
}

/**
 * Tool execution status for feedback to models
 */
export interface ToolExecutionStatus {
    toolCallId: string;
    name: string;
    success: boolean;
    result: string;
    error?: string;
}

/**
 * Options for chat completion requests
 *
 * Key properties:
 * - stream: If true, the response will be streamed
 * - model: Model name to use
 * - provider: Provider to use (openai, anthropic, ollama, etc.)
 * - enableTools: If true, enables tool support
 *
 * The stream option is particularly important and should be consistently handled
 * throughout the pipeline. It should be explicitly set to true or false.
 *
 * Streaming supports two approaches:
 * 1. Callback-based: Provide a streamCallback to receive chunks directly
 * 2. API-based: Use the stream property in the response to process chunks
 *
 * See STREAMING.md for complete documentation on streaming usage.
 */
export interface ChatCompletionOptions {
    model?: string;
    temperature?: number;
    maxTokens?: number;
    topP?: number;
    frequencyPenalty?: number;
    presencePenalty?: number;
    showThinking?: boolean;
    systemPrompt?: string;
    preserveSystemPrompt?: boolean; // Whether to preserve existing system message
    bypassFormatter?: boolean; // Whether to bypass the message formatter entirely
    expectsJsonResponse?: boolean; // Whether this request expects a JSON response

    /**
     * Whether to stream the response
     * When true, response will be delivered incrementally via either:
     * - The streamCallback if provided
     * - The stream property in the response object
     */
    stream?: boolean;

    /**
     * Optional callback function for streaming responses
     * When provided along with stream:true, this function will be called
     * for each chunk of the response.
     *
     * @param text The text content in this chunk
     * @param isDone Whether this is the final chunk
     * @param originalChunk Optional original provider-specific chunk for advanced usage
     */
    streamCallback?: (text: string, isDone: boolean, originalChunk?: Record<string, unknown>) => Promise<void> | void;

    enableTools?: boolean; // Whether to enable tool calling
    tools?: ToolData[]; // Tools to provide to the LLM
    tool_choice?: ToolChoice; // Tool choice parameter for the LLM
    useAdvancedContext?: boolean; // Whether to use advanced context enrichment
    toolExecutionStatus?: ToolExecutionStatus[]; // Status information about executed tools for feedback
    providerMetadata?: ModelMetadata; // Metadata about the provider and model capabilities
    sessionId?: string; // Session ID for storing tool execution results

    /**
     * Maximum number of tool execution iterations
     * Used to prevent infinite loops in tool execution
     */
    maxToolIterations?: number;

    /**
     * Current tool execution iteration counter
     * Internal use for tracking nested tool executions
     */
    currentToolIteration?: number;
}

/**
 * Response from a chat completion request
 *
 * When streaming is used, the behavior depends on how streaming was requested:
 *
 * 1. With streamCallback: The text field contains the complete response
 *    collected from all chunks, and the stream property is not present.
 *
 * 2. Without streamCallback: The text field is initially empty, and the
 *    stream property provides a function to process chunks and collect
 *    the complete response.
 *
 * See STREAMING.md for complete documentation on streaming usage.
 */
export interface ChatResponse {
    /**
     * The complete text response.
     * If streaming was used with streamCallback, this contains the collected response.
     * If streaming was used without streamCallback, this is initially empty.
     */
    text: string;

    /** The model that generated the response */
    model: string;

    /** The provider that served the request (openai, anthropic, ollama, etc.) */
    provider: string;

    /** Token usage statistics (may not be available when streaming) */
    usage?: {
        promptTokens?: number;
        completionTokens?: number;
        totalTokens?: number;
    };

    /**
     * Stream processor function - only present when streaming is enabled
     * without a streamCallback. When called with a chunk processor function,
     * it returns a Promise that resolves to the complete response text.
     *
     * @param callback Function to process each chunk of the stream
     * @returns Promise resolving to the complete text after stream processing
     */
    stream?: (callback: (chunk: StreamChunk) => Promise<void> | void) => Promise<string>;

    /** Tool calls from the LLM (if tools were used and the model supports them) */
    tool_calls?: ToolCall[] | null;
}

export interface AIService {
    /**
     * Generate a chat completion response
     */
    generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse>;

    /**
     * Check if the service can be used (API key is set, etc.)
     */
    isAvailable(): boolean;

    /**
     * Get the name of the service
     */
    getName(): string;
}

/**
 * Interface for the semantic context service, which provides enhanced context retrieval
 * for AI conversations based on semantic similarity.
 */
export interface SemanticContextService {
    /**
     * Initialize the semantic context service
     */
    initialize(): Promise<void>;

    /**
     * Retrieve semantic context based on relevance to user query
     */
    getSemanticContext(noteId: string, userQuery: string, maxResults?: number, messages?: Message[]): Promise<string>;

    /**
     * Get progressive context based on depth
     */
    getProgressiveContext?(noteId: string, depth?: number): Promise<string>;

    /**
     * Get smart context selection that adapts to query complexity
     */
    getSmartContext?(noteId: string, userQuery: string): Promise<string>;

    /**
     * Enhance LLM context with agent tools
     */
    getAgentToolsContext(noteId: string, query: string, showThinking?: boolean): Promise<string>;
}
tool calling is close to working getting closer to calling tools... we definitely need this closer to tool execution... agentic tool calling is...kind of working? 2025-04-06 20:50:08 +00:00			`import type { ToolCall } from './tools/tool_interfaces.js';`
use this new providerMetadata approach 2025-04-09 19:21:34 +00:00			`import type { ModelMetadata } from './providers/provider_options.js';`
tool calling is close to working getting closer to calling tools... we definitely need this closer to tool execution... agentic tool calling is...kind of working? 2025-04-06 20:50:08 +00:00
getting closer to streaming? even closer? closer streaming... this is darn close 2025-04-10 21:00:12 +00:00			`/**`
			`* Interface for chat messages between client and LLM models`
			`*/`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`export interface Message {`
tool calling is close to working getting closer to calling tools... we definitely need this closer to tool execution... agentic tool calling is...kind of working? 2025-04-06 20:50:08 +00:00			`role: 'user' \| 'assistant' \| 'system' \| 'tool';`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`content: string;`
tool calling is close to working getting closer to calling tools... we definitely need this closer to tool execution... agentic tool calling is...kind of working? 2025-04-06 20:50:08 +00:00			`name?: string;`
			`tool_call_id?: string;`
chore(server): fix more type errors 2025-05-28 19:57:55 +03:00			`tool_calls?: ToolCall[] \| null;`
getting closer to streaming? even closer? closer streaming... this is darn close 2025-04-10 21:00:12 +00:00			`sessionId?: string; // Optional session ID for WebSocket communication`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`}`

reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`// Define additional interfaces for tool-related types`
			`export interface ToolChoice {`
			`type: 'none' \| 'auto' \| 'function';`
			`function?: {`
			`name: string;`
			`};`
			`}`

			`export interface ToolData {`
			`type: 'function';`
			`function: {`
			`name: string;`
			`description: string;`
			`parameters: Record<string, unknown>;`
			`};`
			`}`

			`export interface ToolExecutionInfo {`
			`type: 'start' \| 'update' \| 'complete' \| 'error';`
			`tool: {`
			`name: string;`
			`arguments: Record<string, unknown>;`
			`};`
			`result?: string \| Record<string, unknown>;`
			`}`

some more docstrings 2025-04-09 21:33:30 +00:00			`/**`
			`* Interface for streaming response chunks`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* This is the standardized format for all streaming chunks across`
			`* different providers (OpenAI, Anthropic, Ollama, etc.).`
			`* The original provider-specific chunks are available through`
			`* the extended interface in the stream_manager.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* See STREAMING.md for complete documentation on streaming usage.`
			`*/`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`export interface StreamChunk {`
some more docstrings 2025-04-09 21:33:30 +00:00			`/** The text content in this chunk (may be empty for status updates) */`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`text: string;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** Whether this is the final chunk in the stream */`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`done: boolean;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** Optional token usage statistics (rarely available in streaming mode) */`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`usage?: {`
			`promptTokens?: number;`
			`completionTokens?: number;`
			`totalTokens?: number;`
			`};`
hmm 2025-04-12 19:09:25 +00:00
			`/**`
getting closer to streaming? even closer? closer streaming... this is darn close 2025-04-10 21:00:12 +00:00			`* Raw provider-specific data from the original response chunk`
			`* This can include thinking state, tool execution info, etc.`
			`*/`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`raw?: Record<string, unknown>;`
okay openai tool calling response is close to working 2025-04-14 19:39:29 +00:00
			`/**`
			`* Tool calls from the LLM (if any)`
			`* These may be accumulated over multiple chunks during streaming`
			`*/`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`tool_calls?: ToolCall[];`
get anthropic sdk to send tools nice close what is even going on lol anthropic tools mostly work 2025-04-14 23:42:38 +00:00
			`/**`
			`* Tool execution information during streaming`
			`* Includes tool name, args, and execution status`
			`*/`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`toolExecution?: ToolExecutionInfo;`
			`}`

			`/**`
			`* Tool execution status for feedback to models`
			`*/`
			`export interface ToolExecutionStatus {`
			`toolCallId: string;`
			`name: string;`
			`success: boolean;`
			`result: string;`
			`error?: string;`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`}`

yes, this finally does set streaming to true 2025-04-09 19:53:45 +00:00			`/**`
			`* Options for chat completion requests`
hmm 2025-04-12 19:09:25 +00:00			`*`
yes, this finally does set streaming to true 2025-04-09 19:53:45 +00:00			`* Key properties:`
			`* - stream: If true, the response will be streamed`
			`* - model: Model name to use`
			`* - provider: Provider to use (openai, anthropic, ollama, etc.)`
			`* - enableTools: If true, enables tool support`
hmm 2025-04-12 19:09:25 +00:00			`*`
yes, this finally does set streaming to true 2025-04-09 19:53:45 +00:00			`* The stream option is particularly important and should be consistently handled`
			`* throughout the pipeline. It should be explicitly set to true or false.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* Streaming supports two approaches:`
			`* 1. Callback-based: Provide a streamCallback to receive chunks directly`
			`* 2. API-based: Use the stream property in the response to process chunks`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* See STREAMING.md for complete documentation on streaming usage.`
yes, this finally does set streaming to true 2025-04-09 19:53:45 +00:00			`*/`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`export interface ChatCompletionOptions {`
			`model?: string;`
			`temperature?: number;`
			`maxTokens?: number;`
add agentic thinking to chat 2025-03-19 18:49:14 +00:00			`topP?: number;`
			`frequencyPenalty?: number;`
			`presencePenalty?: number;`
			`showThinking?: boolean;`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`systemPrompt?: string;`
do a better job at centralizing json extraction, and query "enhancer" search queries 2025-04-01 21:42:09 +00:00			`preserveSystemPrompt?: boolean; // Whether to preserve existing system message`
			`bypassFormatter?: boolean; // Whether to bypass the message formatter entirely`
			`expectsJsonResponse?: boolean; // Whether this request expects a JSON response`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/**`
			`* Whether to stream the response`
			`* When true, response will be delivered incrementally via either:`
			`* - The streamCallback if provided`
			`* - The stream property in the response object`
			`*/`
			`stream?: boolean;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/**`
			`* Optional callback function for streaming responses`
			`* When provided along with stream:true, this function will be called`
			`* for each chunk of the response.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* @param text The text content in this chunk`
			`* @param isDone Whether this is the final chunk`
			`* @param originalChunk Optional original provider-specific chunk for advanced usage`
			`*/`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`streamCallback?: (text: string, isDone: boolean, originalChunk?: Record<string, unknown>) => Promise<void> \| void;`
hmm 2025-04-12 19:09:25 +00:00
tool calling is close to working getting closer to calling tools... we definitely need this closer to tool execution... agentic tool calling is...kind of working? 2025-04-06 20:50:08 +00:00			`enableTools?: boolean; // Whether to enable tool calling`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`tools?: ToolData[]; // Tools to provide to the LLM`
			`tool_choice?: ToolChoice; // Tool choice parameter for the LLM`
add some more useful tools CLOSER.... works? 2025-04-07 21:57:18 +00:00			`useAdvancedContext?: boolean; // Whether to use advanced context enrichment`
reduce the use of any, part 1 2025-04-16 17:07:54 +00:00			`toolExecutionStatus?: ToolExecutionStatus[]; // Status information about executed tools for feedback`
use this new providerMetadata approach 2025-04-09 19:21:34 +00:00			`providerMetadata?: ModelMetadata; // Metadata about the provider and model capabilities`
saving chats finally works again, even if the UI is kinda...broken wow 2025-04-13 21:16:18 +00:00			`sessionId?: string; // Session ID for storing tool execution results`
hmm 2025-04-12 19:09:25 +00:00
			`/**`
			`* Maximum number of tool execution iterations`
			`* Used to prevent infinite loops in tool execution`
			`*/`
			`maxToolIterations?: number;`

			`/**`
			`* Current tool execution iteration counter`
			`* Internal use for tracking nested tool executions`
			`*/`
			`currentToolIteration?: number;`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`}`

some more docstrings 2025-04-09 21:33:30 +00:00			`/**`
			`* Response from a chat completion request`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* When streaming is used, the behavior depends on how streaming was requested:`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* 1. With streamCallback: The text field contains the complete response`
			`* collected from all chunks, and the stream property is not present.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* 2. Without streamCallback: The text field is initially empty, and the`
			`* stream property provides a function to process chunks and collect`
			`* the complete response.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* See STREAMING.md for complete documentation on streaming usage.`
			`*/`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`export interface ChatResponse {`
hmm 2025-04-12 19:09:25 +00:00			`/**`
			`* The complete text response.`
some more docstrings 2025-04-09 21:33:30 +00:00			`* If streaming was used with streamCallback, this contains the collected response.`
			`* If streaming was used without streamCallback, this is initially empty.`
			`*/`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`text: string;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** The model that generated the response */`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`model: string;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** The provider that served the request (openai, anthropic, ollama, etc.) */`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`provider: string;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** Token usage statistics (may not be available when streaming) */`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`usage?: {`
			`promptTokens?: number;`
			`completionTokens?: number;`
			`totalTokens?: number;`
			`};`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/**`
			`* Stream processor function - only present when streaming is enabled`
			`* without a streamCallback. When called with a chunk processor function,`
			`* it returns a Promise that resolves to the complete response text.`
hmm 2025-04-12 19:09:25 +00:00			`*`
some more docstrings 2025-04-09 21:33:30 +00:00			`* @param callback Function to process each chunk of the stream`
			`* @returns Promise resolving to the complete text after stream processing`
			`*/`
I'm 100% going to have to destroy this commit later 2025-03-09 02:19:26 +00:00			`stream?: (callback: (chunk: StreamChunk) => Promise<void> \| void) => Promise<string>;`
hmm 2025-04-12 19:09:25 +00:00
some more docstrings 2025-04-09 21:33:30 +00:00			`/** Tool calls from the LLM (if tools were used and the model supports them) */`
chore(server): fix more type errors 2025-05-28 19:57:55 +03:00			`tool_calls?: ToolCall[] \| null;`
hey look, it doesn't crash again 2025-03-02 19:39:10 -08:00			`}`

			`export interface AIService {`
			`/**`
			`* Generate a chat completion response`
			`*/`
			`generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse>;`

			`/**`
			`* Check if the service can be used (API key is set, etc.)`
			`*/`
			`isAvailable(): boolean;`

			`/**`
			`* Get the name of the service`
			`*/`
			`getName(): string;`
			`}`
add agentic thinking to chat 2025-03-19 18:49:14 +00:00
			`/**`
			`* Interface for the semantic context service, which provides enhanced context retrieval`
			`* for AI conversations based on semantic similarity.`
			`*/`
			`export interface SemanticContextService {`
			`/**`
			`* Initialize the semantic context service`
			`*/`
			`initialize(): Promise<void>;`

			`/**`
			`* Retrieve semantic context based on relevance to user query`
			`*/`
dynamically adjust context window sizes based on conversation context 2025-03-30 22:13:40 +00:00			`getSemanticContext(noteId: string, userQuery: string, maxResults?: number, messages?: Message[]): Promise<string>;`
add agentic thinking to chat 2025-03-19 18:49:14 +00:00
			`/**`
			`* Get progressive context based on depth`
			`*/`
			`getProgressiveContext?(noteId: string, depth?: number): Promise<string>;`

			`/**`
			`* Get smart context selection that adapts to query complexity`
			`*/`
			`getSmartContext?(noteId: string, userQuery: string): Promise<string>;`

			`/**`
			`* Enhance LLM context with agent tools`
			`*/`
			`getAgentToolsContext(noteId: string, query: string, showThinking?: boolean): Promise<string>;`
			`}`