reduce the use of any, part 2

This commit is contained in:
perf3ct 2025-04-16 17:20:36 +00:00
parent 64f2a93ac0
commit 4601e3bfdb
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
4 changed files with 80 additions and 10 deletions

View File

@ -3,7 +3,7 @@
*/
// Import highlight.js dynamically when needed
let hljs: any = null;
let hljs: object | null = null;
/**
* Attempt to detect the programming language from code content or note attributes
@ -134,8 +134,9 @@ export function extractCodeStructure(content: string, language: string): string
structure += "First few code lines:\n" + firstCodeLines.join('\n');
}
}
} catch (e: any) {
return `Error extracting code structure: ${e.message}`;
} catch (e: unknown) {
const errorMessage = e instanceof Error ? e.message : String(e);
return `Error extracting code structure: ${errorMessage}`;
}
return structure;

View File

@ -1,4 +1,5 @@
import type { NoteEmbeddingContext } from "../types.js";
import type { EmbeddingProvider } from "../embeddings_interface.js";
/**
* Interface for chunking operations
@ -9,7 +10,7 @@ export interface ChunkingOperations {
*/
processNoteWithChunking(
noteId: string,
provider: any,
provider: EmbeddingProvider,
context: NoteEmbeddingContext
): Promise<void>;
}

View File

@ -4,6 +4,8 @@ import sql from "../../../sql.js";
import becca from "../../../../becca/becca.js";
import cls from "../../../../services/cls.js";
import type { NoteEmbeddingContext } from "../types.js";
import type { EmbeddingProvider } from "../embeddings_interface.js";
import type { EmbeddingConfig } from "../embeddings_interface.js";
import { LLM_CONSTANTS } from "../../../llm/constants/provider_constants.js";
import { EMBEDDING_PROCESSING } from '../../constants/search_constants.js';
@ -37,6 +39,15 @@ const MAX_CHUNK_RETRY_ATTEMPTS = EMBEDDING_PROCESSING.MAX_CHUNK_RETRY_ATTEMPTS;
const DEFAULT_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.DEFAULT_MAX_CHUNK_PROCESSING_TIME;
const OLLAMA_MAX_CHUNK_PROCESSING_TIME = EMBEDDING_PROCESSING.OLLAMA_MAX_CHUNK_PROCESSING_TIME;
/**
* Interface for chunks from the chunking process
*/
interface ContentChunk {
content: string;
index: number;
metadata?: Record<string, unknown>;
}
/**
* Categorize an error as temporary or permanent based on its message
* @param errorMessage - The error message to categorize
@ -70,9 +81,13 @@ function categorizeError(errorMessage: string): 'temporary' | 'permanent' | 'unk
* @param timeoutMs - Timeout in milliseconds
* @returns The generated embedding
*/
async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: number): Promise<any> {
async function processChunkWithTimeout(
provider: EmbeddingProvider,
chunk: { content: string },
timeoutMs: number
): Promise<Float32Array> {
// Create a promise that rejects after the timeout
const timeoutPromise = new Promise((_, reject) => {
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => {
reject(new Error(`Chunk processing timed out after ${timeoutMs}ms`));
}, timeoutMs);
@ -95,7 +110,7 @@ async function processChunkWithTimeout(provider: any, chunk: any, timeoutMs: num
*/
export async function processNoteWithChunking(
noteId: string,
provider: any,
provider: EmbeddingProvider,
context: NoteEmbeddingContext
): Promise<void> {
// Track the overall start time
@ -407,3 +422,56 @@ export async function processNoteWithChunking(
throw error;
}
}
/**
* Process a chunk with retry logic to handle errors
* @param index - The chunk index for tracking
* @param chunk - The content chunk
* @param provider - The embedding provider
* @param noteId - ID of the note being processed
* @param config - Embedding configuration
* @param startTime - When the overall process started
* @param storage - The storage module
* @param maxTimePerChunk - Max time per chunk processing
* @param retryAttempt - Current retry attempt number
*/
async function processChunkWithRetry(
index: number,
chunk: ContentChunk,
provider: EmbeddingProvider,
noteId: string,
config: EmbeddingConfig,
startTime: number,
storage: typeof import('../storage.js'),
maxTimePerChunk: number,
retryAttempt = 0
): Promise<boolean> {
try {
// Try to generate embedding with timeout
const embedding = await processChunkWithTimeout(provider, chunk, maxTimePerChunk);
// Store the embedding with the chunk ID
const chunkId = `${noteId}_chunk${index}`;
await storage.storeNoteEmbedding(chunkId, provider.name, config.model, embedding);
return true;
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
const category = categorizeError(errorMessage);
// If we haven't exceeded the retry limit and it's a temporary error, retry
if (retryAttempt < MAX_CHUNK_RETRY_ATTEMPTS && (category === 'temporary' || category === 'unknown')) {
// Exponential backoff
const delayMs = Math.min(1000 * Math.pow(2, retryAttempt), 15000);
log.info(`Retrying chunk ${index} after ${delayMs}ms (attempt ${retryAttempt + 1}/${MAX_CHUNK_RETRY_ATTEMPTS})`);
await new Promise(resolve => setTimeout(resolve, delayMs));
return processChunkWithRetry(
index, chunk, provider, noteId, config, startTime, storage, maxTimePerChunk, retryAttempt + 1
);
} else {
log.error(`Failed to process chunk ${index} after ${retryAttempt + 1} attempts: ${errorMessage}`);
return false;
}
}
}

View File

@ -5,7 +5,7 @@ import type { LLMServiceInterface } from '../interfaces/agent_tool_interfaces.js
* Base interface for pipeline input
*/
export interface PipelineInput {
[key: string]: any;
[key: string]: unknown;
}
/**
@ -51,7 +51,7 @@ export interface StageMetrics {
* @param isDone Whether this is the final chunk
* @param originalChunk The original chunk with all metadata for custom handling
*/
export type StreamCallback = (text: string, isDone: boolean, originalChunk?: any) => Promise<void> | void;
export type StreamCallback = (text: string, isDone: boolean, originalChunk?: StreamChunk) => Promise<void> | void;
/**
* Common input for all chat-related pipeline stages
@ -88,7 +88,7 @@ export interface VectorSearchInput extends PipelineInput {
* Base interface for pipeline stage output
*/
export interface PipelineOutput {
[key: string]: any;
[key: string]: unknown;
}
/**