increase context window size

2025-10-18 12:41:54 +08:00 · 2025-04-09 01:24:43 +00:00 · 2025-04-09 01:24:43 +00:00 · 208267edb8
commit 208267edb8
parent c95b9f798c
4 changed files with 31 additions and 31 deletions
--- a/src/services/llm/constants/provider_constants.ts
+++ b/src/services/llm/constants/provider_constants.ts
@ -19,37 +19,37 @@ export const PROVIDER_CONSTANTS = {
                id: 'claude-3-7-sonnet-20240620',
                name: 'Claude 3.7 Sonnet',
                description: 'Most intelligent model with hybrid reasoning capabilities',
-                maxTokens: 4096
+                maxTokens: 8192
            },
            {
                id: 'claude-3-5-haiku-20240307',
                name: 'Claude 3.5 Haiku',
                description: 'Improved version of Haiku with better performance',
-                maxTokens: 4096
+                maxTokens: 8192
            },
            {
                id: 'claude-3-opus-20240229',
                name: 'Claude 3 Opus',
                description: 'Most capable model for highly complex tasks',
-                maxTokens: 4096
+                maxTokens: 8192
            },
            {
                id: 'claude-3-sonnet-20240229',
                name: 'Claude 3 Sonnet',
                description: 'Ideal balance of intelligence and speed',
-                maxTokens: 4096
+                maxTokens: 8192
            },
            {
                id: 'claude-3-haiku-20240307',
                name: 'Claude 3 Haiku',
                description: 'Fastest and most compact model',
-                maxTokens: 4096
+                maxTokens: 8192
            },
            {
                id: 'claude-2.1',
                name: 'Claude 2.1',
                description: 'Previous generation model',
-                maxTokens: 4096
+                maxTokens: 8192
            }
        ]
    },
@ -86,7 +86,7 @@ export const PROVIDER_CONSTANTS = {
                id: 'gpt-3.5-turbo',
                name: 'GPT-3.5 Turbo',
                description: 'Fast and efficient model for most tasks',
-                maxTokens: 4096
+                maxTokens: 8192
            }
        ]
    },
@ -100,14 +100,14 @@ export const PROVIDER_CONSTANTS = {
            OVERLAP: 200
        },
        MODEL_DIMENSIONS: {
-            default: 4096,
-            llama2: 4096,
-            mixtral: 4096,
-            'mistral': 4096
+            default: 8192,
+            llama2: 8192,
+            mixtral: 8192,
+            'mistral': 8192
        },
        MODEL_CONTEXT_WINDOWS: {
            default: 8192,
-            llama2: 4096,
+            llama2: 8192,
            mixtral: 8192,
            'mistral': 8192
        }
@ -147,9 +147,9 @@ export const LLM_CONSTANTS = {

    // Model-specific embedding dimensions for Ollama models
    OLLAMA_MODEL_DIMENSIONS: {
-        "llama3": 4096,
-        "llama3.1": 4096,
-        "mistral": 4096,
+        "llama3": 8192,
+        "llama3.1": 8192,
+        "mistral": 8192,
        "nomic": 768,
        "mxbai": 1024,
        "nomic-embed-text": 768,
@ -167,7 +167,7 @@ export const LLM_CONSTANTS = {
        "mxbai": 32768,
        "nomic-embed-text": 32768,
        "mxbai-embed-large": 32768,
-        "default": 4096
+        "default": 8192
    },

    // Batch size configuration
--- a/src/services/llm/embeddings/providers/ollama.ts
+++ b/src/services/llm/embeddings/providers/ollama.ts
@ -215,7 +215,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {

                // Trim text if it might exceed context window (rough character estimate)
                // This is a simplistic approach - ideally we'd count tokens properly
-                const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token
+                const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
                const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;

                const response = await fetch(`${this.baseUrl}/api/embeddings`, {
--- a/src/services/llm/embeddings/providers/voyage.ts
+++ b/src/services/llm/embeddings/providers/voyage.ts
@ -118,14 +118,14 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
                if (modelName.includes('voyage-2')) {
                    return {
                        dimension: dimension || 1024,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                        name: modelName,
                        type: 'float32'
                    };
                } else if (modelName.includes('voyage-lite-02')) {
                    return {
                        dimension: dimension || 768,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                        name: modelName,
                        type: 'float32'
                    };
@ -133,7 +133,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
                    // Default for other Voyage models
                    return {
                        dimension: dimension || 1024,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                        name: modelName,
                        type: 'float32'
                    };
@ -170,7 +170,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
            const modelInfo = await this.getModelInfo(modelName);

            // Trim text if it might exceed context window (rough character estimate)
-            const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token
+            const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
            const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;

            const response = await fetch(`${this.baseUrl}/embeddings`, {
--- a/src/services/llm/interfaces/model_capabilities.ts
+++ b/src/services/llm/interfaces/model_capabilities.ts
@ -17,7 +17,7 @@ export interface ModelCapabilities {
 * Default model capabilities for unknown models
 */
 export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
-    contextWindowTokens: 4096,
+    contextWindowTokens: 8192,
    contextWindowChars: 16000,  // ~4 chars per token estimate
    maxCompletionTokens: 1024,
    hasFunctionCalling: false,
@ -32,7 +32,7 @@ export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
 export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
    // OpenAI models
    'gpt-3.5-turbo': {
-        contextWindowTokens: 4096,
+        contextWindowTokens: 8192,
        contextWindowChars: 16000,
        hasFunctionCalling: true
    },
@ -95,7 +95,7 @@ export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
        contextWindowChars: 32000
    },
    'llama2': {
-        contextWindowTokens: 4096,
+        contextWindowTokens: 8192,
        contextWindowChars: 16000
    }
 };