reduce the use of any, part 2

This commit is contained in:
perf3ct 2025-04-16 17:20:36 +00:00
parent 64f2a93ac0
commit 4601e3bfdb
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 80 additions and 10 deletions

View File

@ -3,7 +3,7 @@
*/ */
// Import highlight.js dynamically when needed // Import highlight.js dynamically when needed
let hljs: any = null; let hljs: object | null = null;
/** /**
* Attempt to detect the programming language from code content or note attributes * Attempt to detect the programming language from code content or note attributes
@ -134,8 +134,9 @@ export function extractCodeStructure(content: string, language: string): string
structure += "First few code lines:\n" + firstCodeLines.join('\n'); structure += "First few code lines:\n" + firstCodeLines.join('\n');
} }
} }
} catch (e: any) { } catch (e: unknown) {
return `Error extracting code structure: ${e.message}`; const errorMessage = e instanceof Error ? e.message : String(e);
return `Error extracting code structure: ${errorMessage}`;
} }
return structure; return structure;

View File

@ -1,4 +1,5 @@
import type { NoteEmbeddingContext } from "../types.js"; import type { NoteEmbeddingContext } from "../types.js";
import type { EmbeddingProvider } from "../embeddings_interface.js";
/** /**
* Interface for chunking operations * Interface for chunking operations
@ -9,7 +10,7 @@ export interface ChunkingOperations {
*/ */
processNoteWithChunking( processNoteWithChunking(
noteId: string, noteId: string,
provider: any, provider: EmbeddingProvider,
context: NoteEmbeddingContext context: NoteEmbeddingContext
): Promise<void>; ): Promise<void>;
} }

View File

@ -4,6 +4,8 @@ import sql from "../../../sql.js";
import becca from "../../../../becca/becca.js"; import becca from "../../../../becca/becca.js";
import cls from "../../../../services/cls.js"; import cls from "../../../../services/cls.js";
import type { NoteEmbeddingContext } from "../types.js"; import type { NoteEmbeddingContext } from "../types.js";
import type { EmbeddingProvider } from "../embeddings_interface.js";
import type { EmbeddingConfig } from "../embeddings_interface.js";
import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js"; import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js";
import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js'; import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js';
@ -37,6 +39,15 @@ const MAX_CHUNK_RETRY_ATTEMPTS = EMBEDDING_PROCESSING.MAX_CHUNK_RETRY_ATTEMPTS;
const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME; const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME;
const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME; const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME;
/**
* Interface for chunks from the chunking process
*/
interface ContentChunk {
content: string;
index: number;
metadata?: Record<string, unknown>;
}
/** /**
* Categorize an error as temporary or permanent based on its message * Categorize an error as temporary or permanent based on its message
* @param errorMessage - The error message to categorize * @param errorMessage - The error message to categorize
@ -70,9 +81,13 @@ function categorizeError(errorMessage: string): 'temporary' | 'permanent' | 'unk
* @param timeoutMs - Timeout in milliseconds * @param timeoutMs - Timeout in milliseconds
* @returns The generated embedding * @returns The generated embedding
*/ */
async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: number): Promise<any> { async function processChunkWithTimeout(
provider: EmbeddingProvider,
chunk: { content: string },
timeoutMs: number
): Promise<Float32Array> {
// Create a promise that rejects after the timeout // Create a promise that rejects after the timeout
const timeoutPromise = new Promise((_, reject) => { const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => { setTimeout(() => {
reject(new Error(`Chunk processing timed out after ${timeoutMs}ms`)); reject(new Error(`Chunk processing timed out after ${timeoutMs}ms`));
}, timeoutMs); }, timeoutMs);
@ -95,7 +110,7 @@ async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: num
*/ */
export async function processNoteWithChunking( export async function processNoteWithChunking(
noteId: string, noteId: string,
provider: any, provider: EmbeddingProvider,
context: NoteEmbeddingContext context: NoteEmbeddingContext
): Promise<void> { ): Promise<void> {
// Track the overall start time // Track the overall start time
@ -407,3 +422,56 @@ export async function processNoteWithChunking(
throw error; throw error;
} }
} }
/**
* Process a chunk with retry logic to handle errors
* @param index - The chunk index for tracking
* @param chunk - The content chunk
* @param provider - The embedding provider
* @param noteId - ID of the note being processed
* @param config - Embedding configuration
* @param startTime - When the overall process started
* @param storage - The storage module
* @param maxTimePerChunk - Max time per chunk processing
* @param retryAttempt - Current retry attempt number
*/
async function processChunkWithRetry(
index: number,
chunk: ContentChunk,
provider: EmbeddingProvider,
noteId: string,
config: EmbeddingConfig,
startTime: number,
storage: typeof import('../storage.js'),
maxTimePerChunk: number,
retryAttempt = 0
): Promise<boolean> {
try {
// Try to generate embedding with timeout
const embedding = await processChunkWithTimeout(provider, chunk, maxTimePerChunk);
// Store the embedding with the chunk ID
const chunkId = `${noteId}_chunk${index}`;
await storage.storeNoteEmbedding(chunkId, provider.name, config.model, embedding);
return true;
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
const category = categorizeError(errorMessage);
// If we haven't exceeded the retry limit and it's a temporary error, retry
if (retryAttempt < MAX_CHUNK_RETRY_ATTEMPTS && (category === 'temporary' || category === 'unknown')) {
// Exponential backoff
const delayMs = Math.min(1000 * Math.pow(2, retryAttempt), 15000);
log.info(`Retrying chunk ${index} after ${delayMs}ms (attempt ${retryAttempt + 1}/${MAX_CHUNK_RETRY_ATTEMPTS})`);
await new Promise(resolve => setTimeout(resolve, delayMs));
return processChunkWithRetry(
index, chunk, provider, noteId, config, startTime, storage, maxTimePerChunk, retryAttempt + 1
);
} else {
log.error(`Failed to process chunk ${index} after ${retryAttempt + 1} attempts: ${errorMessage}`);
return false;
}
}
}

View File

@ -5,7 +5,7 @@ import type { LLMServiceInterface } from '../interfaces/agent_tool_interfaces.js
* Base interface for pipeline input * Base interface for pipeline input
*/ */
export interface PipelineInput { export interface PipelineInput {
[key: string]: any; [key: string]: unknown;
} }
/** /**
@ -51,7 +51,7 @@ export interface StageMetrics {
* @param isDone Whether this is the final chunk * @param isDone Whether this is the final chunk
* @param originalChunk The original chunk with all metadata for custom handling * @param originalChunk The original chunk with all metadata for custom handling
*/ */
export type StreamCallback = (text: string, isDone: boolean, originalChunk?: any) => Promise<void> | void; export type StreamCallback = (text: string, isDone: boolean, originalChunk?: StreamChunk) => Promise<void> | void;
/** /**
* Common input for all chat-related pipeline stages * Common input for all chat-related pipeline stages
@ -88,7 +88,7 @@ export interface VectorSearchInput extends PipelineInput {
* Base interface for pipeline stage output * Base interface for pipeline stage output
*/ */
export interface PipelineOutput { export interface PipelineOutput {
[key: string]: any; [key: string]: unknown;
} }
/** /**