From 208267edb82ba0d8c9f718f27da4fa70cd095b43 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Wed, 9 Apr 2025 01:24:43 +0000 Subject: [PATCH] increase context window size --- .../llm/constants/provider_constants.ts | 32 +++++++++---------- .../llm/embeddings/providers/ollama.ts | 16 +++++----- .../llm/embeddings/providers/voyage.ts | 8 ++--- .../llm/interfaces/model_capabilities.ts | 6 ++-- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/services/llm/constants/provider_constants.ts b/src/services/llm/constants/provider_constants.ts index e4108daf0..9546fed01 100644 --- a/src/services/llm/constants/provider_constants.ts +++ b/src/services/llm/constants/provider_constants.ts @@ -19,37 +19,37 @@ export const PROVIDER_CONSTANTS = { id: 'claude-3-7-sonnet-20240620', name: 'Claude 3.7 Sonnet', description: 'Most intelligent model with hybrid reasoning capabilities', - maxTokens: 4096 + maxTokens: 8192 }, { id: 'claude-3-5-haiku-20240307', name: 'Claude 3.5 Haiku', description: 'Improved version of Haiku with better performance', - maxTokens: 4096 + maxTokens: 8192 }, { id: 'claude-3-opus-20240229', name: 'Claude 3 Opus', description: 'Most capable model for highly complex tasks', - maxTokens: 4096 + maxTokens: 8192 }, { id: 'claude-3-sonnet-20240229', name: 'Claude 3 Sonnet', description: 'Ideal balance of intelligence and speed', - maxTokens: 4096 + maxTokens: 8192 }, { id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', description: 'Fastest and most compact model', - maxTokens: 4096 + maxTokens: 8192 }, { id: 'claude-2.1', name: 'Claude 2.1', description: 'Previous generation model', - maxTokens: 4096 + maxTokens: 8192 } ] }, @@ -86,7 +86,7 @@ export const PROVIDER_CONSTANTS = { id: 'gpt-3.5-turbo', name: 'GPT-3.5 Turbo', description: 'Fast and efficient model for most tasks', - maxTokens: 4096 + maxTokens: 8192 } ] }, @@ -100,14 +100,14 @@ export const PROVIDER_CONSTANTS = { OVERLAP: 200 }, MODEL_DIMENSIONS: { - default: 4096, - llama2: 4096, - mixtral: 4096, - 'mistral': 4096 + default: 8192, + llama2: 8192, + mixtral: 8192, + 'mistral': 8192 }, MODEL_CONTEXT_WINDOWS: { default: 8192, - llama2: 4096, + llama2: 8192, mixtral: 8192, 'mistral': 8192 } @@ -147,9 +147,9 @@ export const LLM_CONSTANTS = { // Model-specific embedding dimensions for Ollama models OLLAMA_MODEL_DIMENSIONS: { - "llama3": 4096, - "llama3.1": 4096, - "mistral": 4096, + "llama3": 8192, + "llama3.1": 8192, + "mistral": 8192, "nomic": 768, "mxbai": 1024, "nomic-embed-text": 768, @@ -167,7 +167,7 @@ export const LLM_CONSTANTS = { "mxbai": 32768, "nomic-embed-text": 32768, "mxbai-embed-large": 32768, - "default": 4096 + "default": 8192 }, // Batch size configuration diff --git a/src/services/llm/embeddings/providers/ollama.ts b/src/services/llm/embeddings/providers/ollama.ts index 56db285de..cc63000a0 100644 --- a/src/services/llm/embeddings/providers/ollama.ts +++ b/src/services/llm/embeddings/providers/ollama.ts @@ -42,7 +42,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { // First try the /api/show endpoint which has detailed model information const url = new URL(`${this.baseUrl}/api/show`); url.searchParams.append('name', modelName); - + const showResponse = await fetch(url, { method: 'GET', headers: { "Content-Type": "application/json" }, @@ -52,9 +52,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { if (!showResponse.ok) { throw new Error(`HTTP error! status: ${showResponse.status}`); } - + const data = await showResponse.json(); - + if (data && data.parameters) { const params = data.parameters; // Extract context length from parameters (different models might use different parameter names) @@ -175,9 +175,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { if (!testResponse.ok) { throw new Error(`HTTP error! status: ${testResponse.status}`); } - + const data = await testResponse.json(); - + if (data && Array.isArray(data.embedding)) { return data.embedding.length; } else { @@ -215,7 +215,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { // Trim text if it might exceed context window (rough character estimate) // This is a simplistic approach - ideally we'd count tokens properly - const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token + const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; const response = await fetch(`${this.baseUrl}/api/embeddings`, { @@ -232,9 +232,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider { if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } - + const data = await response.json(); - + if (data && Array.isArray(data.embedding)) { // Success! Return the embedding return new Float32Array(data.embedding); diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts index 874a97cde..2db8254d0 100644 --- a/src/services/llm/embeddings/providers/voyage.ts +++ b/src/services/llm/embeddings/providers/voyage.ts @@ -118,14 +118,14 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { if (modelName.includes('voyage-2')) { return { dimension: dimension || 1024, - contextWidth: 4096, + contextWidth: 8192, name: modelName, type: 'float32' }; } else if (modelName.includes('voyage-lite-02')) { return { dimension: dimension || 768, - contextWidth: 4096, + contextWidth: 8192, name: modelName, type: 'float32' }; @@ -133,7 +133,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { // Default for other Voyage models return { dimension: dimension || 1024, - contextWidth: 4096, + contextWidth: 8192, name: modelName, type: 'float32' }; @@ -170,7 +170,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider { const modelInfo = await this.getModelInfo(modelName); // Trim text if it might exceed context window (rough character estimate) - const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token + const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; const response = await fetch(`${this.baseUrl}/embeddings`, { diff --git a/src/services/llm/interfaces/model_capabilities.ts b/src/services/llm/interfaces/model_capabilities.ts index 75dc4251b..ce340bd13 100644 --- a/src/services/llm/interfaces/model_capabilities.ts +++ b/src/services/llm/interfaces/model_capabilities.ts @@ -17,7 +17,7 @@ export interface ModelCapabilities { * Default model capabilities for unknown models */ export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = { - contextWindowTokens: 4096, + contextWindowTokens: 8192, contextWindowChars: 16000, // ~4 chars per token estimate maxCompletionTokens: 1024, hasFunctionCalling: false, @@ -32,7 +32,7 @@ export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = { export const MODEL_CAPABILITIES: Record> = { // OpenAI models 'gpt-3.5-turbo': { - contextWindowTokens: 4096, + contextWindowTokens: 8192, contextWindowChars: 16000, hasFunctionCalling: true }, @@ -95,7 +95,7 @@ export const MODEL_CAPABILITIES: Record> = { contextWindowChars: 32000 }, 'llama2': { - contextWindowTokens: 4096, + contextWindowTokens: 8192, contextWindowChars: 16000 } };