diff --git a/src/services/llm/context/code_handlers.ts b/src/services/llm/context/code_handlers.ts index 19cc24e0b..f4b1fca97 100644 --- a/src/services/llm/context/code_handlers.ts +++ b/src/services/llm/context/code_handlers.ts @@ -3,7 +3,7 @@ */ // Import highlight.js dynamically when needed -let hljs: any = null; +let hljs: object | null = null; /** * Attempt to detect the programming language from code content or note attributes @@ -134,8 +134,9 @@ export function extractCodeStructure(content: string, language: string): string structure += "First few code lines:\n" + firstCodeLines.join('\n'); } } - } catch (e: any) { - return `Error extracting code structure: ${e.message}`; + } catch (e: unknown) { + const errorMessage = e instanceof Error ? e.message : String(e); + return `Error extracting code structure: ${errorMessage}`; } return structure; diff --git a/src/services/llm/embeddings/chunking/chunking_interface.ts b/src/services/llm/embeddings/chunking/chunking_interface.ts index c59752c6c..d2515ac02 100644 --- a/src/services/llm/embeddings/chunking/chunking_interface.ts +++ b/src/services/llm/embeddings/chunking/chunking_interface.ts @@ -1,4 +1,5 @@ import type { NoteEmbeddingContext } from "../types.js"; +import type { EmbeddingProvider } from "../embeddings_interface.js"; /** * Interface for chunking operations @@ -9,7 +10,7 @@ export interface ChunkingOperations { */ processNoteWithChunking( noteId: string, - provider: any, + provider: EmbeddingProvider, context: NoteEmbeddingContext ): Promise; } diff --git a/src/services/llm/embeddings/chunking/chunking_processor.ts b/src/services/llm/embeddings/chunking/chunking_processor.ts index 47eeae52c..60e1267fa 100644 --- a/src/services/llm/embeddings/chunking/chunking_processor.ts +++ b/src/services/llm/embeddings/chunking/chunking_processor.ts @@ -4,6 +4,8 @@ import sql from "../../../sql.js"; import becca from "../../../../becca/becca.js"; import cls from "../../../../services/cls.js"; import type { NoteEmbeddingContext } from "../types.js"; +import type { EmbeddingProvider } from "../embeddings_interface.js"; +import type { EmbeddingConfig } from "../embeddings_interface.js"; import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js"; import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js'; @@ -37,6 +39,15 @@ const MAX_CHUNK_RETRY_ATTEMPTS = EMBEDDING_PROCESSING.MAX_CHUNK_RETRY_ATTEMPTS; const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME; const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME; +/** + * Interface for chunks from the chunking process + */ +interface ContentChunk { + content: string; + index: number; + metadata?: Record; +} + /** * Categorize an error as temporary or permanent based on its message * @param errorMessage - The error message to categorize @@ -70,9 +81,13 @@ function categorizeError(errorMessage: string): 'temporary' | 'permanent' | 'unk * @param timeoutMs - Timeout in milliseconds * @returns The generated embedding */ -async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: number): Promise { +async function processChunkWithTimeout( + provider: EmbeddingProvider, + chunk: { content: string }, + timeoutMs: number +): Promise { // Create a promise that rejects after the timeout - const timeoutPromise = new Promise((_, reject) => { + const timeoutPromise = new Promise((_, reject) => { setTimeout(() => { reject(new Error(`Chunk processing timed out after ${timeoutMs}ms`)); }, timeoutMs); @@ -95,7 +110,7 @@ async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: num */ export async function processNoteWithChunking( noteId: string, - provider: any, + provider: EmbeddingProvider, context: NoteEmbeddingContext ): Promise { // Track the overall start time @@ -407,3 +422,56 @@ export async function processNoteWithChunking( throw error; } } + +/** + * Process a chunk with retry logic to handle errors + * @param index - The chunk index for tracking + * @param chunk - The content chunk + * @param provider - The embedding provider + * @param noteId - ID of the note being processed + * @param config - Embedding configuration + * @param startTime - When the overall process started + * @param storage - The storage module + * @param maxTimePerChunk - Max time per chunk processing + * @param retryAttempt - Current retry attempt number + */ +async function processChunkWithRetry( + index: number, + chunk: ContentChunk, + provider: EmbeddingProvider, + noteId: string, + config: EmbeddingConfig, + startTime: number, + storage: typeof import('../storage.js'), + maxTimePerChunk: number, + retryAttempt = 0 +): Promise { + try { + // Try to generate embedding with timeout + const embedding = await processChunkWithTimeout(provider, chunk, maxTimePerChunk); + + // Store the embedding with the chunk ID + const chunkId = `${noteId}_chunk${index}`; + await storage.storeNoteEmbedding(chunkId, provider.name, config.model, embedding); + + return true; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + const category = categorizeError(errorMessage); + + // If we haven't exceeded the retry limit and it's a temporary error, retry + if (retryAttempt < MAX_CHUNK_RETRY_ATTEMPTS && (category === 'temporary' || category === 'unknown')) { + // Exponential backoff + const delayMs = Math.min(1000 * Math.pow(2, retryAttempt), 15000); + log.info(`Retrying chunk ${index} after ${delayMs}ms (attempt ${retryAttempt + 1}/${MAX_CHUNK_RETRY_ATTEMPTS})`); + await new Promise(resolve => setTimeout(resolve, delayMs)); + + return processChunkWithRetry( + index, chunk, provider, noteId, config, startTime, storage, maxTimePerChunk, retryAttempt + 1 + ); + } else { + log.error(`Failed to process chunk ${index} after ${retryAttempt + 1} attempts: ${errorMessage}`); + return false; + } + } +} diff --git a/src/services/llm/pipeline/interfaces.ts b/src/services/llm/pipeline/interfaces.ts index 311f25a56..13c39414e 100644 --- a/src/services/llm/pipeline/interfaces.ts +++ b/src/services/llm/pipeline/interfaces.ts @@ -5,7 +5,7 @@ import type { LLMServiceInterface } from '../interfaces/agent_tool_interfaces.js * Base interface for pipeline input */ export interface PipelineInput { - [key: string]: any; + [key: string]: unknown; } /** @@ -51,7 +51,7 @@ export interface StageMetrics { * @param isDone Whether this is the final chunk * @param originalChunk The original chunk with all metadata for custom handling */ -export type StreamCallback = (text: string, isDone: boolean, originalChunk?: any) => Promise | void; +export type StreamCallback = (text: string, isDone: boolean, originalChunk?: StreamChunk) => Promise | void; /** * Common input for all chat-related pipeline stages @@ -88,7 +88,7 @@ export interface VectorSearchInput extends PipelineInput { * Base interface for pipeline stage output */ export interface PipelineOutput { - [key: string]: any; + [key: string]: unknown; } /**