increase context window size

This commit is contained in:
perf3ct 2025-04-09 01:24:43 +00:00
parent c95b9f798c
commit 208267edb8
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 31 additions and 31 deletions

View File

@ -19,37 +19,37 @@ export const PROVIDER_CONSTANTS = {
id: 'claude-3-7-sonnet-20240620', id: 'claude-3-7-sonnet-20240620',
name: 'Claude 3.7 Sonnet', name: 'Claude 3.7 Sonnet',
description: 'Most intelligent model with hybrid reasoning capabilities', description: 'Most intelligent model with hybrid reasoning capabilities',
maxTokens: 4096 maxTokens: 8192
}, },
{ {
id: 'claude-3-5-haiku-20240307', id: 'claude-3-5-haiku-20240307',
name: 'Claude 3.5 Haiku', name: 'Claude 3.5 Haiku',
description: 'Improved version of Haiku with better performance', description: 'Improved version of Haiku with better performance',
maxTokens: 4096 maxTokens: 8192
}, },
{ {
id: 'claude-3-opus-20240229', id: 'claude-3-opus-20240229',
name: 'Claude 3 Opus', name: 'Claude 3 Opus',
description: 'Most capable model for highly complex tasks', description: 'Most capable model for highly complex tasks',
maxTokens: 4096 maxTokens: 8192
}, },
{ {
id: 'claude-3-sonnet-20240229', id: 'claude-3-sonnet-20240229',
name: 'Claude 3 Sonnet', name: 'Claude 3 Sonnet',
description: 'Ideal balance of intelligence and speed', description: 'Ideal balance of intelligence and speed',
maxTokens: 4096 maxTokens: 8192
}, },
{ {
id: 'claude-3-haiku-20240307', id: 'claude-3-haiku-20240307',
name: 'Claude 3 Haiku', name: 'Claude 3 Haiku',
description: 'Fastest and most compact model', description: 'Fastest and most compact model',
maxTokens: 4096 maxTokens: 8192
}, },
{ {
id: 'claude-2.1', id: 'claude-2.1',
name: 'Claude 2.1', name: 'Claude 2.1',
description: 'Previous generation model', description: 'Previous generation model',
maxTokens: 4096 maxTokens: 8192
} }
] ]
}, },
@ -86,7 +86,7 @@ export const PROVIDER_CONSTANTS = {
id: 'gpt-3.5-turbo', id: 'gpt-3.5-turbo',
name: 'GPT-3.5 Turbo', name: 'GPT-3.5 Turbo',
description: 'Fast and efficient model for most tasks', description: 'Fast and efficient model for most tasks',
maxTokens: 4096 maxTokens: 8192
} }
] ]
}, },
@ -100,14 +100,14 @@ export const PROVIDER_CONSTANTS = {
OVERLAP: 200 OVERLAP: 200
}, },
MODEL_DIMENSIONS: { MODEL_DIMENSIONS: {
default: 4096, default: 8192,
llama2: 4096, llama2: 8192,
mixtral: 4096, mixtral: 8192,
'mistral': 4096 'mistral': 8192
}, },
MODEL_CONTEXT_WINDOWS: { MODEL_CONTEXT_WINDOWS: {
default: 8192, default: 8192,
llama2: 4096, llama2: 8192,
mixtral: 8192, mixtral: 8192,
'mistral': 8192 'mistral': 8192
} }
@ -147,9 +147,9 @@ export const LLM_CONSTANTS = {
// Model-specific embedding dimensions for Ollama models // Model-specific embedding dimensions for Ollama models
OLLAMA_MODEL_DIMENSIONS: { OLLAMA_MODEL_DIMENSIONS: {
"llama3": 4096, "llama3": 8192,
"llama3.1": 4096, "llama3.1": 8192,
"mistral": 4096, "mistral": 8192,
"nomic": 768, "nomic": 768,
"mxbai": 1024, "mxbai": 1024,
"nomic-embed-text": 768, "nomic-embed-text": 768,
@ -167,7 +167,7 @@ export const LLM_CONSTANTS = {
"mxbai": 32768, "mxbai": 32768,
"nomic-embed-text": 32768, "nomic-embed-text": 32768,
"mxbai-embed-large": 32768, "mxbai-embed-large": 32768,
"default": 4096 "default": 8192
}, },
// Batch size configuration // Batch size configuration

View File

@ -215,7 +215,7 @@ export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
// Trim text if it might exceed context window (rough character estimate) // Trim text if it might exceed context window (rough character estimate)
// This is a simplistic approach - ideally we'd count tokens properly // This is a simplistic approach - ideally we'd count tokens properly
const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
const response = await fetch(`${this.baseUrl}/api/embeddings`, { const response = await fetch(`${this.baseUrl}/api/embeddings`, {

View File

@ -118,14 +118,14 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
if (modelName.includes('voyage-2')) { if (modelName.includes('voyage-2')) {
return { return {
dimension: dimension || 1024, dimension: dimension || 1024,
contextWidth: 4096, contextWidth: 8192,
name: modelName, name: modelName,
type: 'float32' type: 'float32'
}; };
} else if (modelName.includes('voyage-lite-02')) { } else if (modelName.includes('voyage-lite-02')) {
return { return {
dimension: dimension || 768, dimension: dimension || 768,
contextWidth: 4096, contextWidth: 8192,
name: modelName, name: modelName,
type: 'float32' type: 'float32'
}; };
@ -133,7 +133,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
// Default for other Voyage models // Default for other Voyage models
return { return {
dimension: dimension || 1024, dimension: dimension || 1024,
contextWidth: 4096, contextWidth: 8192,
name: modelName, name: modelName,
type: 'float32' type: 'float32'
}; };
@ -170,7 +170,7 @@ export class VoyageEmbeddingProvider extends BaseEmbeddingProvider {
const modelInfo = await this.getModelInfo(modelName); const modelInfo = await this.getModelInfo(modelName);
// Trim text if it might exceed context window (rough character estimate) // Trim text if it might exceed context window (rough character estimate)
const charLimit = (modelInfo.contextWidth || 4096) * 4; // Rough estimate: avg 4 chars per token const charLimit = (modelInfo.contextWidth || 8192) * 4; // Rough estimate: avg 4 chars per token
const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text; const trimmedText = text.length > charLimit ? text.substring(0, charLimit) : text;
const response = await fetch(`${this.baseUrl}/embeddings`, { const response = await fetch(`${this.baseUrl}/embeddings`, {

View File

@ -17,7 +17,7 @@ export interface ModelCapabilities {
* Default model capabilities for unknown models * Default model capabilities for unknown models
*/ */
export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = { export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
contextWindowTokens: 4096, contextWindowTokens: 8192,
contextWindowChars: 16000, // ~4 chars per token estimate contextWindowChars: 16000, // ~4 chars per token estimate
maxCompletionTokens: 1024, maxCompletionTokens: 1024,
hasFunctionCalling: false, hasFunctionCalling: false,
@ -32,7 +32,7 @@ export const DEFAULT_MODEL_CAPABILITIES: ModelCapabilities = {
export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = { export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
// OpenAI models // OpenAI models
'gpt-3.5-turbo': { 'gpt-3.5-turbo': {
contextWindowTokens: 4096, contextWindowTokens: 8192,
contextWindowChars: 16000, contextWindowChars: 16000,
hasFunctionCalling: true hasFunctionCalling: true
}, },
@ -95,7 +95,7 @@ export const MODEL_CAPABILITIES: Record<string, Partial<ModelCapabilities>> = {
contextWindowChars: 32000 contextWindowChars: 32000
}, },
'llama2': { 'llama2': {
contextWindowTokens: 4096, contextWindowTokens: 8192,
contextWindowChars: 16000 contextWindowChars: 16000
} }
}; };