From 208267edb82ba0d8c9f718f27da4fa70cd095b43 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Wed, 9 Apr 2025 01:24:43 +0000
Subject: [PATCH] increase context window size

---
 .../llm/constants/provider_constants.ts       | 32 +++++++++----------
 .../llm/embeddings/providers/ollama.ts        | 16 +++++-----
 .../llm/embeddings/providers/voyage.ts        |  8 ++---
 .../llm/interfaces/model_capabilities.ts      |  6 ++--
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/services/llm/constants/provider_constants.ts b/src/services/llm/constants/provider_constants.ts
index e4108daf0..9546fed01 100644
--- a/src/services/llm/constants/provider_constants.ts
+++ b/src/services/llm/constants/provider_constants.ts
@@ -19,37 +19,37 @@ export const PROVIDER_CONSTANTS = {
                 id: 'claude-3-7-sonnet-20240620',
                 name: 'Claude 3.7 Sonnet',
                 description: 'Most intelligent model with hybrid reasoning capabilities',
-                maxTokens: 4096
+                maxTokens: 8192
             },
             {
                 id: 'claude-3-5-haiku-20240307',
                 name: 'Claude 3.5 Haiku',
                 description: 'Improved version of Haiku with better performance',
-                maxTokens: 4096
+                maxTokens: 8192
             },
             {
                 id: 'claude-3-opus-20240229',
                 name: 'Claude 3 Opus',
                 description: 'Most capable model for highly complex tasks',
-                maxTokens: 4096
+                maxTokens: 8192
             },
             {
                 id: 'claude-3-sonnet-20240229',
                 name: 'Claude 3 Sonnet',
                 description: 'Ideal balance of intelligence and speed',
-                maxTokens: 4096
+                maxTokens: 8192
             },
             {
                 id: 'claude-3-haiku-20240307',
                 name: 'Claude 3 Haiku',
                 description: 'Fastest and most compact model',
-                maxTokens: 4096
+                maxTokens: 8192
             },
             {
                 id: 'claude-2.1',
                 name: 'Claude 2.1',
                 description: 'Previous generation model',
-                maxTokens: 4096
+                maxTokens: 8192
             }
         ]
     },
@@ -86,7 +86,7 @@ export const PROVIDER_CONSTANTS = {
                 id: 'gpt-3.5-turbo',
                 name: 'GPT-3.5 Turbo',
                 description: 'Fast and efficient model for most tasks',
-                maxTokens: 4096
+                maxTokens: 8192
             }
         ]
     },
@@ -100,14 +100,14 @@ export const PROVIDER_CONSTANTS = {
             OVERLAP: 200
         },
         MODEL_DIMENSIONS: {
-            default: 4096,
-            llama2: 4096,
-            mixtral: 4096,
-            'mistral': 4096
+            default: 8192,
+            llama2: 8192,
+            mixtral: 8192,
+            'mistral': 8192
         },
         MODEL_CONTEXT_WINDOWS: {
             default: 8192,
-            llama2: 4096,
+            llama2: 8192,
             mixtral: 8192,
             'mistral': 8192
         }
@@ -147,9 +147,9 @@ export const LLM_CONSTANTS = {
 
     // Model-specific embedding dimensions for Ollama models
     OLLAMA_MODEL_DIMENSIONS: {
-        "llama3": 4096,
-        "llama3.1": 4096,
-        "mistral": 4096,
+        "llama3": 8192,
+        "llama3.1": 8192,
+        "mistral": 8192,
         "nomic": 768,
         "mxbai": 1024,
         "nomic-embed-text": 768,
@@ -167,7 +167,7 @@ export const LLM_CONSTANTS = {
         "mxbai": 32768,
         "nomic-embed-text": 32768,
         "mxbai-embed-large": 32768,
-        "default": 4096
+        "default": 8192
     },
 
     // Batch size configuration
diff --git a/src/services/llm/embeddings/providers/ollama.ts b/src/services/llm/embeddings/providers/ollama.ts
index 56db285de..cc63000a0 100644
--- a/src/services/llm/embeddings/providers/ollama.ts
+++ b/src/services/llm/embeddings/providers/ollama.ts
@@ -42,7 +42,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
             // First try the /api/show endpoint which has detailed model information
             const url = new URL(`${this.baseUrl}/api/show`);
             url.searchParams.append('name', modelName);
-            
+
             const showResponse = await fetch(url, {
                 method: 'GET',
                 headers: { "Content-Type": "application/json" },
@@ -52,9 +52,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
             if (!showResponse.ok) {
                 throw new Error(`HTTP error! status: ${showResponse.status}`);
             }
-            
+
             const data = await showResponse.json();
-            
+
             if (data && data.parameters) {
                 const params = data.parameters;
                 // Extract context length from parameters (different models might use different parameter names)
@@ -175,9 +175,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
         if (!testResponse.ok) {
             throw new Error(`HTTP error! status: ${testResponse.status}`);
         }
-        
+
         const data = await testResponse.json();
-        
+
         if (data && Array.isArray(data.embedding)) {
             return data.embedding.length;
         } else {
@@ -215,7 +215,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
 
                 // Trim text if it might exceed context window (rough character estimate)
                 // This is a simplistic approach - ideally we'd count tokens properly
-                const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token
+                const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
                 const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
 
                 const response = await fetch(`${this.baseUrl}/api/embeddings`, {
@@ -232,9 +232,9 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
                 if (!response.ok) {
                     throw new Error(`HTTP error! status: ${response.status}`);
                 }
-                
+
                 const data = await response.json();
-                
+
                 if (data && Array.isArray(data.embedding)) {
                     // Success! Return the embedding
                     return new Float32Array(data.embedding);
diff --git a/src/services/llm/embeddings/providers/voyage.ts b/src/services/llm/embeddings/providers/voyage.ts
index 874a97cde..2db8254d0 100644
--- a/src/services/llm/embeddings/providers/voyage.ts
+++ b/src/services/llm/embeddings/providers/voyage.ts
@@ -118,14 +118,14 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
                 if (modelName.includes('voyage-2')) {
                     return {
                         dimension: dimension || 1024,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                         name: modelName,
                         type: 'float32'
                     };
                 } else if (modelName.includes('voyage-lite-02')) {
                     return {
                         dimension: dimension || 768,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                         name: modelName,
                         type: 'float32'
                     };
@@ -133,7 +133,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
                     // Default for other Voyage models
                     return {
                         dimension: dimension || 1024,
-                        contextWidth: 4096,
+                        contextWidth: 8192,
                         name: modelName,
                         type: 'float32'
                     };
@@ -170,7 +170,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
             const modelInfo = await this.getModelInfo(modelName);
 
             // Trim text if it might exceed context window (rough character estimate)
-            const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token
+            const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
             const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
 
             const response = await fetch(`${this.baseUrl}/embeddings`, {
diff --git a/src/services/llm/interfaces/model_capabilities.ts b/src/services/llm/interfaces/model_capabilities.ts
index 75dc4251b..ce340bd13 100644
--- a/src/services/llm/interfaces/model_capabilities.ts
+++ b/src/services/llm/interfaces/model_capabilities.ts
@@ -17,7 +17,7 @@ export interface ModelCapabilities {
  * Default model capabilities for unknown models
  */
 export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
-    contextWindowTokens: 4096,
+    contextWindowTokens: 8192,
     contextWindowChars: 16000,  // ~4 chars per token estimate
     maxCompletionTokens: 1024,
     hasFunctionCalling: false,
@@ -32,7 +32,7 @@ export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
 export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
     // OpenAI models
     'gpt-3.5-turbo': {
-        contextWindowTokens: 4096,
+        contextWindowTokens: 8192,
         contextWindowChars: 16000,
         hasFunctionCalling: true
     },
@@ -95,7 +95,7 @@ export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
         contextWindowChars: 32000
     },
     'llama2': {
-        contextWindowTokens: 4096,
+        contextWindowTokens: 8192,
         contextWindowChars: 16000
     }
 };