diff --git a/src/services/llm/constants/search_constants.ts b/src/services/llm/constants/search_constants.ts index 2292e09bb..bc1689961 100644 --- a/src/services/llm/constants/search_constants.ts +++ b/src/services/llm/constants/search_constants.ts @@ -47,6 +47,7 @@ export const SEARCH_CONSTANTS = { // Token/char limits LIMITS: { DEFAULT_NOTE_SUMMARY_LENGTH: 500, + DEFAULT_MAX_TOKENS: 4096, RELATIONSHIP_TOOL_MAX_TOKENS: 50, VECTOR_SEARCH_MAX_TOKENS: 500, QUERY_PROCESSOR_MAX_TOKENS: 300, diff --git a/src/services/llm/context_extractors/note_navigator_tool.ts b/src/services/llm/context_extractors/note_navigator_tool.ts index 785d51566..27ec47659 100644 --- a/src/services/llm/context_extractors/note_navigator_tool.ts +++ b/src/services/llm/context_extractors/note_navigator_tool.ts @@ -289,7 +289,7 @@ export class NoteNavigatorTool { /** * Search for notes by title */ - searchNotesByTitle(searchTerm: string, limit: number = 10): NoteInfo[] { + searchNotesByTitle(searchTerm: string, limit: number = SEARCH_CONSTANTS.HIERARCHY.MAX_NOTES_PER_QUERY): NoteInfo[] { try { if (!searchTerm || searchTerm.trim().length === 0) { return []; @@ -369,14 +369,14 @@ export class NoteNavigatorTool { if (paths.length > 1) { result += `This note appears in ${paths.length} different locations:\n`; - // Show max 3 paths to avoid overwhelming context - for (let i = 0; i < Math.min(3, paths.length); i++) { + // Show max paths to avoid overwhelming context + for (let i = 0; i < Math.min(SEARCH_CONSTANTS.HIERARCHY.MAX_PATHS_TO_SHOW, paths.length); i++) { const path = paths[i]; result += `${i+1}. ${path.notePathTitles.join(' > ')}\n`; } - if (paths.length > 3) { - result += `... and ${paths.length - 3} more locations\n`; + if (paths.length > SEARCH_CONSTANTS.HIERARCHY.MAX_PATHS_TO_SHOW) { + result += `... and ${paths.length - SEARCH_CONSTANTS.HIERARCHY.MAX_PATHS_TO_SHOW} more locations\n`; } } else { // Just one path @@ -385,7 +385,7 @@ export class NoteNavigatorTool { } // Children info using the async function - const children = await this.getChildNotes(noteId, 5); + const children = await this.getChildNotes(noteId, SEARCH_CONSTANTS.CONTEXT.MAX_POINTS); if (children.length > 0) { result += `\nContains ${note.children.length} child notes`; @@ -520,7 +520,7 @@ export class NoteNavigatorTool { /** * Get child notes of a specified note */ - async getChildNotes(noteId: string, limit: number = 10): Promise> { + async getChildNotes(noteId: string, limit: number = SEARCH_CONSTANTS.CONTEXT.MAX_CHILDREN): Promise> { try { const note = becca.notes[noteId]; @@ -564,7 +564,7 @@ export class NoteNavigatorTool { /** * Find notes linked to/from the specified note */ - async getLinkedNotes(noteId: string, limit: number = 10): Promise> { + async getLinkedNotes(noteId: string, limit: number = SEARCH_CONSTANTS.CONTEXT.MAX_LINKS): Promise> { try { const note = becca.notes[noteId]; diff --git a/src/services/llm/embeddings/chunking/chunking_processor.ts b/src/services/llm/embeddings/chunking/chunking_processor.ts index 4963ffd2d..47eeae52c 100644 --- a/src/services/llm/embeddings/chunking/chunking_processor.ts +++ b/src/services/llm/embeddings/chunking/chunking_processor.ts @@ -5,6 +5,7 @@ import becca from "../../../../becca/becca.js"; import cls from "../../../../services/cls.js"; import type { NoteEmbeddingContext } from "../types.js"; import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js"; +import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js'; // Define error categories for better handling const ERROR_CATEGORIES = { @@ -27,14 +28,14 @@ const ERROR_CATEGORIES = { }; // Maximum time (in milliseconds) allowed for the entire chunking process -const MAX_TOTAL_PROCESSING_TIME = 5 * 60 * 1000; // 5 minutes +const MAX_TOTAL_PROCESSING_TIME = EMBEDDING_PROCESSING.MAX_TOTAL_PROCESSING_TIME; // Maximum number of retry attempts per chunk -const MAX_CHUNK_RETRY_ATTEMPTS = 2; +const MAX_CHUNK_RETRY_ATTEMPTS = EMBEDDING_PROCESSING.MAX_CHUNK_RETRY_ATTEMPTS; // Maximum time per chunk processing (to prevent individual chunks from hanging) -const DEFAULT_MAX_CHUNK_PROCESSING_TIME = 60 * 1000; // 1 minute -const OLLAMA_MAX_CHUNK_PROCESSING_TIME = 120 * 1000; // 2 minutes +const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME; +const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME; /** * Categorize an error as temporary or permanent based on its message diff --git a/src/services/llm/embeddings/providers/openai.ts b/src/services/llm/embeddings/providers/openai.ts index c48c0bf44..7e7782e72 100644 --- a/src/services/llm/embeddings/providers/openai.ts +++ b/src/services/llm/embeddings/providers/openai.ts @@ -5,6 +5,7 @@ import { NormalizationStatus } from "../embeddings_interface.js"; import { LLM_CONSTANTS } from "../../constants/provider_constants.js"; import type { EmbeddingModelInfo } from "../../interfaces/embedding_interfaces.js"; import OpenAI from "openai"; +import { PROVIDER_EMBEDDING_CAPABILITIES } from '../../constants/search_constants.js'; /** * OpenAI embedding provider implementation using the official SDK @@ -40,7 +41,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { if (!this.client && this.apiKey) { this.initClient(); } - + // Detect model capabilities const modelInfo = await this.getModelInfo(modelName); @@ -64,7 +65,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { try { // Get model details using the SDK const model = await this.client.models.retrieve(modelName); - + if (model) { // Different model families may have different ways of exposing context window let contextWindow = 0; @@ -72,7 +73,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { // Extract context window if available from the response const modelData = model as any; - + if (modelData.context_window) { contextWindow = modelData.context_window; } else if (modelData.limits && modelData.limits.context_window) { @@ -90,15 +91,11 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { // If we didn't get all the info, use defaults for missing values if (!contextWindow) { - // Set default context window based on model name patterns - if (modelName.includes('ada') || modelName.includes('embedding-ada')) { - contextWindow = LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI; - } else if (modelName.includes('davinci')) { - contextWindow = 8192; - } else if (modelName.includes('embedding-3')) { - contextWindow = 8191; + // Set contextWindow based on model name patterns + if (modelName.includes('embedding-3')) { + contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-small'].contextWindow; } else { - contextWindow = LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI; + contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; } } @@ -107,11 +104,11 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { if (modelName.includes('ada') || modelName.includes('embedding-ada')) { dimension = LLM_CONSTANTS.EMBEDDING_DIMENSIONS.OPENAI.ADA; } else if (modelName.includes('embedding-3-small')) { - dimension = 1536; + dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-small'].dimension; } else if (modelName.includes('embedding-3-large')) { - dimension = 3072; + dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS['text-embedding-3-large'].dimension; } else { - dimension = LLM_CONSTANTS.EMBEDDING_DIMENSIONS.OPENAI.DEFAULT; + dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.dimension; } } @@ -155,7 +152,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { const dimension = testEmbedding.length; // Use default context window - let contextWindow = LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI; + let contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; const modelInfo: EmbeddingModelInfo = { name: modelName, @@ -170,8 +167,8 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { return modelInfo; } catch (error: any) { // If detection fails, use defaults - const dimension = LLM_CONSTANTS.EMBEDDING_DIMENSIONS.OPENAI.DEFAULT; - const contextWindow = LLM_CONSTANTS.CONTEXT_WINDOW.OPENAI; + const dimension = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.dimension; + const contextWindow = PROVIDER_EMBEDDING_CAPABILITIES.OPENAI.MODELS.default.contextWindow; log.info(`Using default parameters for OpenAI model ${modelName}: dimension ${dimension}, context ${contextWindow}`); @@ -209,7 +206,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { input: text, encoding_format: "float" }); - + if (response && response.data && response.data[0] && response.data[0].embedding) { return new Float32Array(response.data[0].embedding); } else { @@ -258,7 +255,7 @@ export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider { input: texts, encoding_format: "float" }); - + if (response && response.data) { // Sort the embeddings by index to ensure they match the input order const sortedEmbeddings = response.data diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts index b0bae45d7..514e0f2f4 100644 --- a/src/services/llm/embeddings/providers/voyage.ts +++ b/src/services/llm/embeddings/providers/voyage.ts @@ -51,21 +51,17 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { */ private async fetchModelCapabilities(modelName: string): Promise { try { - // Get context window size from our local registry of known models - const modelBase = Object.keys(VOYAGE_MODEL_CONTEXT_WINDOWS).find( + // Find the closest matching model + const modelMapKey = Object.keys(PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS).find( model => modelName.startsWith(model) ) || "default"; - const modelInfo = VOYAGE_MODEL_CONTEXT_WINDOWS[modelBase as keyof typeof VOYAGE_MODEL_CONTEXT_WINDOWS]; - const contextWindow = modelInfo.contextWidth; - - // Get dimension from our registry of known models - const dimension = VOYAGE_MODEL_DIMENSIONS[modelBase as keyof typeof VOYAGE_MODEL_DIMENSIONS] || - VOYAGE_MODEL_DIMENSIONS["default"]; + // Use as keyof to tell TypeScript this is a valid key + const modelInfo = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS[modelMapKey as keyof typeof PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS]; return { - dimension, - contextWidth: contextWindow, + dimension: modelInfo.dimension, + contextWidth: modelInfo.contextWidth, name: modelName, type: 'float32' }; @@ -86,8 +82,9 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { // Try to determine model capabilities const capabilities = await this.fetchModelCapabilities(modelName); - const contextWindow = capabilities?.contextWidth || 8192; // Default context window for Voyage - const knownDimension = capabilities?.dimension || 1024; // Default dimension for Voyage models + const defaults = PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS.default; + const contextWindow = capabilities?.contextWidth || defaults.contextWidth; + const knownDimension = capabilities?.dimension || defaults.dimension; // For Voyage, we can use known dimensions or detect with a test call try { @@ -166,7 +163,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { const modelInfo = await this.getModelInfo(modelName); // Trim text if it might exceed context window (rough character estimate) - const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token + const charLimit = (modelInfo.contextWidth || PROVIDER_EMBEDDING_CAPABILITIES.VOYAGE.MODELS.default.contextWidth) * 4; // Rough estimate: avg 4 chars per token const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; const response = await fetch(`${this.baseUrl}/embeddings`, { diff --git a/src/services/llm/embeddings/storage.ts b/src/services/llm/embeddings/storage.ts index 37df3255e..67063e63b 100644 --- a/src/services/llm/embeddings/storage.ts +++ b/src/services/llm/embeddings/storage.ts @@ -7,6 +7,7 @@ import type { EmbeddingResult } from "./types.js"; import entityChangesService from "../../../services/entity_changes.js"; import type { EntityChange } from "../../../services/entity_changes_interface.js"; import { EMBEDDING_CONSTANTS } from "../constants/embedding_constants.js"; +import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; /** * Creates or updates an embedding for a note */ @@ -139,14 +140,14 @@ export async function findSimilarNotes( embedding: Float32Array, providerId: string, modelId: string, - limit = 10, + limit = SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS, threshold?: number, // Made optional to use constants useFallback = true // Whether to try other providers if no embeddings found ): Promise<{noteId: string, similarity: number, contentType?: string}[]> { // Import constants dynamically to avoid circular dependencies const llmModule = await import('../../../routes/api/llm.js'); - // Use a default threshold of 0.65 if not provided - const actualThreshold = threshold || 0.65; + // Use default threshold if not provided + const actualThreshold = threshold || SEARCH_CONSTANTS.VECTOR_SEARCH.EXACT_MATCH_THRESHOLD; try { log.info(`Finding similar notes with provider: ${providerId}, model: ${modelId}, dimension: ${embedding.length}, threshold: ${actualThreshold}`); diff --git a/src/services/llm/embeddings/vector_utils.ts b/src/services/llm/embeddings/vector_utils.ts index c28c4681f..73037cfac 100644 --- a/src/services/llm/embeddings/vector_utils.ts +++ b/src/services/llm/embeddings/vector_utils.ts @@ -1,3 +1,5 @@ +import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; + /** * Computes the cosine similarity between two vectors * If dimensions don't match, automatically adapts using the enhanced approach @@ -549,9 +551,9 @@ export function ensembleSimilarity( ): number { // Default weights if not provided const weights = options.ensembleWeights ?? { - [SimilarityMetric.COSINE]: 0.6, - [SimilarityMetric.HYBRID]: 0.3, - [SimilarityMetric.DIM_AWARE]: 0.1 + [SimilarityMetric.COSINE]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.COSINE, + [SimilarityMetric.HYBRID]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.HYBRID, + [SimilarityMetric.DIM_AWARE]: SEARCH_CONSTANTS.VECTOR_SEARCH.SIMILARITY_THRESHOLD.DIM_AWARE }; let totalWeight = 0; diff --git a/src/services/llm/providers/anthropic_service.ts b/src/services/llm/providers/anthropic_service.ts index 2c3df355d..5a4531b5a 100644 --- a/src/services/llm/providers/anthropic_service.ts +++ b/src/services/llm/providers/anthropic_service.ts @@ -6,6 +6,7 @@ import type { AnthropicOptions } from './provider_options.js'; import { getAnthropicOptions } from './providers.js'; import log from '../../log.js'; import Anthropic from '@anthropic-ai/sdk'; +import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; export class AnthropicService extends BaseAIService { private client: any = null; @@ -78,7 +79,7 @@ export class AnthropicService extends BaseAIService { model: providerOptions.model, messages: anthropicMessages, system: systemPrompt, - max_tokens: providerOptions.max_tokens || 4096, + max_tokens: providerOptions.max_tokens || SEARCH_CONSTANTS.LIMITS.DEFAULT_MAX_TOKENS, temperature: providerOptions.temperature, top_p: providerOptions.top_p, stream: !!providerOptions.stream diff --git a/src/services/llm/rest_chat_service.ts b/src/services/llm/rest_chat_service.ts index 1a6ea79a5..e59612440 100644 --- a/src/services/llm/rest_chat_service.ts +++ b/src/services/llm/rest_chat_service.ts @@ -355,7 +355,7 @@ class RestChatService { createdAt: now, lastActive: now, metadata: { - temperature: 0.7, + temperature: SEARCH_CONSTANTS.TEMPERATURE.DEFAULT, maxTokens: undefined, model: undefined, provider: undefined @@ -1645,7 +1645,7 @@ class RestChatService { lastActive: now, noteContext: options.contextNoteId, metadata: { - temperature: options.temperature, + temperature: SEARCH_CONSTANTS.TEMPERATURE.DEFAULT, maxTokens: options.maxTokens, model: options.model, provider: options.provider, diff --git a/src/services/llm/tools/note_summarization_tool.ts b/src/services/llm/tools/note_summarization_tool.ts index fa389eac0..a9f35737c 100644 --- a/src/services/llm/tools/note_summarization_tool.ts +++ b/src/services/llm/tools/note_summarization_tool.ts @@ -8,6 +8,7 @@ import type { Tool, ToolHandler } from './tool_interfaces.js'; import log from '../../log.js'; import becca from '../../../becca/becca.js'; import aiServiceManager from '../ai_service_manager.js'; +import { SEARCH_CONSTANTS } from '../constants/search_constants.js'; /** * Definition of the note summarization tool @@ -59,7 +60,7 @@ export class NoteSummarizationTool implements ToolHandler { focus?: string }): Promise { try { - const { noteId, maxLength = 500, format = 'paragraph', focus } = args; + const { noteId, maxLength = SEARCH_CONSTANTS.LIMITS.DEFAULT_NOTE_SUMMARY_LENGTH, format = 'paragraph', focus } = args; log.info(`Executing summarize_note tool - NoteID: "${noteId}", MaxLength: ${maxLength}, Format: ${format}`); @@ -134,8 +135,8 @@ export class NoteSummarizationTool implements ToolHandler { { role: 'system', content: 'You are a skilled summarizer. Create concise, accurate summaries while preserving the key information.' }, { role: 'user', content: prompt } ], { - temperature: 0.3, // Lower temperature for more focused summaries - maxTokens: 1000 // Enough tokens for the summary + temperature: SEARCH_CONSTANTS.TEMPERATURE.VECTOR_SEARCH, // Lower temperature for more focused summaries + maxTokens: SEARCH_CONSTANTS.LIMITS.VECTOR_SEARCH_MAX_TOKENS // Enough tokens for the summary }); const summaryDuration = Date.now() - summaryStartTime;