From 7062e51f2d73b1975ca488c3b17e7c2d344df10b Mon Sep 17 00:00:00 2001 From: perf3ct Date: Thu, 17 Apr 2025 17:19:52 +0000 Subject: [PATCH] well at least query decomposition is working..for now --- .../llm/context/services/context_service.ts | 568 ++++----- .../llm/context/services/query_processor.ts | 1014 ++++++++++------- .../query_decomposition_tool.ts | 182 +-- .../llm/interfaces/agent_tool_interfaces.ts | 7 - src/services/llm/pipeline/chat_pipeline.ts | 26 +- 5 files changed, 1002 insertions(+), 795 deletions(-) diff --git a/src/services/llm/context/services/context_service.ts b/src/services/llm/context/services/context_service.ts index 604cd1d3c..a227c3936 100644 --- a/src/services/llm/context/services/context_service.ts +++ b/src/services/llm/context/services/context_service.ts @@ -24,306 +24,324 @@ import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces // Options for context processing export interface ContextOptions { - // Content options - summarizeContent?: boolean; - maxResults?: number; - contextNoteId?: string | null; + // Content options + summarizeContent?: boolean; + maxResults?: number; + contextNoteId?: string | null; - // Processing options - useQueryEnhancement?: boolean; - useQueryDecomposition?: boolean; + // Processing options + useQueryEnhancement?: boolean; + useQueryDecomposition?: boolean; - // Debugging options - showThinking?: boolean; + // Debugging options + showThinking?: boolean; } export class ContextService { - private initialized = false; - private initPromise: Promise | null = null; - private contextExtractor: ContextExtractor; + private initialized = false; + private initPromise: Promise | null = null; + private contextExtractor: ContextExtractor; - constructor() { - this.contextExtractor = new ContextExtractor(); - } - - /** - * Initialize the service - */ - async initialize(): Promise { - if (this.initialized) return; - - // Use a promise to prevent multiple simultaneous initializations - if (this.initPromise) return this.initPromise; - - this.initPromise = (async () => { - try { - // Initialize provider - const provider = await providerManager.getPreferredEmbeddingProvider(); - if (!provider) { - throw new Error(`No embedding provider available. Could not initialize context service.`); - } - - // Agent tools are already initialized in the AIServiceManager constructor - // No need to initialize them again - - this.initialized = true; - log.info(`Context service initialized with provider: ${provider.name}`); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - log.error(`Failed to initialize context service: ${errorMessage}`); - throw error; - } finally { - this.initPromise = null; - } - })(); - - return this.initPromise; - } - - /** - * Process a user query to find relevant context in Trilium notes - * - * @param userQuestion - The user's query - * @param llmService - The LLM service to use - * @param options - Context processing options - * @returns Context information and relevant notes - */ - async processQuery( - userQuestion: string, - llmService: LLMServiceInterface, - options: ContextOptions = {} - ): Promise<{ - context: string; - sources: NoteSearchResult[]; - thinking?: string; - decomposedQuery?: any; - }> { - // Set default options - const { - summarizeContent = false, - maxResults = 10, - contextNoteId = null, - useQueryEnhancement = true, - useQueryDecomposition = false, - showThinking = false - } = options; - - log.info(`Processing query: "${userQuestion.substring(0, 50)}..."`); - log.info(`Options: summarize=${summarizeContent}, maxResults=${maxResults}, contextNoteId=${contextNoteId || 'global'}`); - log.info(`Processing: enhancement=${useQueryEnhancement}, decomposition=${useQueryDecomposition}, showThinking=${showThinking}`); - - if (!this.initialized) { - try { - await this.initialize(); - } catch (error) { - log.error(`Failed to initialize ContextService: ${error}`); - // Return a fallback response if initialization fails - return { - context: CONTEXT_PROMPTS.NO_NOTES_CONTEXT, - sources: [], - thinking: undefined - }; - } + constructor() { + this.contextExtractor = new ContextExtractor(); } - try { - let decomposedQuery; - let searchQueries: string[] = [userQuestion]; - let relevantNotes: NoteSearchResult[] = []; + /** + * Initialize the service + */ + async initialize(): Promise { + if (this.initialized) return; - // Step 1: Decompose query if requested - if (useQueryDecomposition) { - log.info(`Decomposing query for better understanding`); - decomposedQuery = queryProcessor.decomposeQuery(userQuestion); + // Use a promise to prevent multiple simultaneous initializations + if (this.initPromise) return this.initPromise; - // Extract sub-queries to use for search - if (decomposedQuery.subQueries.length > 0) { - searchQueries = decomposedQuery.subQueries - .map(sq => sq.text) - .filter(text => text !== userQuestion); // Remove the original query to avoid duplication + this.initPromise = (async () => { + try { + // Initialize provider + const provider = await providerManager.getPreferredEmbeddingProvider(); + if (!provider) { + throw new Error(`No embedding provider available. Could not initialize context service.`); + } - // Always include the original query - searchQueries.unshift(userQuestion); + // Agent tools are already initialized in the AIServiceManager constructor + // No need to initialize them again - log.info(`Query decomposed into ${searchQueries.length} search queries`); + this.initialized = true; + log.info(`Context service initialized with provider: ${provider.name}`); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + log.error(`Failed to initialize context service: ${errorMessage}`); + throw error; + } finally { + this.initPromise = null; + } + })(); + + return this.initPromise; + } + + /** + * Process a user query to find relevant context in Trilium notes + * + * @param userQuestion - The user's query + * @param llmService - The LLM service to use + * @param options - Context processing options + * @returns Context information and relevant notes + */ + async processQuery( + userQuestion: string, + llmService: LLMServiceInterface, + options: ContextOptions = {} + ): Promise<{ + context: string; + sources: NoteSearchResult[]; + thinking?: string; + decomposedQuery?: any; + }> { + // Set default options + const { + summarizeContent = false, + maxResults = 10, + contextNoteId = null, + useQueryEnhancement = true, + useQueryDecomposition = false, + showThinking = false + } = options; + + log.info(`Processing query: "${userQuestion.substring(0, 50)}..."`); + log.info(`Options: summarize=${summarizeContent}, maxResults=${maxResults}, contextNoteId=${contextNoteId || 'global'}`); + log.info(`Processing: enhancement=${useQueryEnhancement}, decomposition=${useQueryDecomposition}, showThinking=${showThinking}`); + + if (!this.initialized) { + try { + await this.initialize(); + } catch (error) { + log.error(`Failed to initialize ContextService: ${error}`); + // Return a fallback response if initialization fails + return { + context: CONTEXT_PROMPTS.NO_NOTES_CONTEXT, + sources: [], + thinking: undefined + }; + } } - } - // Step 2: Or enhance query if requested - else if (useQueryEnhancement) { + try { - log.info(`Enhancing query for better semantic matching`); - searchQueries = await queryProcessor.generateSearchQueries(userQuestion, llmService); - log.info(`Generated ${searchQueries.length} enhanced search queries`); + let decomposedQuery; + let searchQueries: string[] = [userQuestion]; + let relevantNotes: NoteSearchResult[] = []; + + // Step 1: Decompose query if requested + if (useQueryDecomposition) { + log.info(`Decomposing query for better understanding`); + try { + // Use the async version with the LLM service + decomposedQuery = await queryProcessor.decomposeQuery(userQuestion, undefined, llmService); + log.info(`Successfully decomposed query complexity: ${decomposedQuery.complexity}/10 with ${decomposedQuery.subQueries.length} sub-queries`); + } catch (error) { + log.error(`Error in query decomposition, using fallback: ${error}`); + // Fallback to simpler decomposition + decomposedQuery = { + originalQuery: userQuestion, + subQueries: [{ + id: `sq_fallback_${Date.now()}`, + text: userQuestion, + reason: "Fallback to original query due to decomposition error", + isAnswered: false + }], + status: 'pending', + complexity: 1 + }; + } + + // Extract sub-queries to use for search + if (decomposedQuery.subQueries.length > 0) { + searchQueries = decomposedQuery.subQueries + .map(sq => sq.text) + .filter(text => text !== userQuestion); // Remove the original query to avoid duplication + + // Always include the original query + searchQueries.unshift(userQuestion); + + log.info(`Query decomposed into ${searchQueries.length} search queries`); + } + } + // Step 2: Or enhance query if requested + else if (useQueryEnhancement) { + try { + log.info(`Enhancing query for better semantic matching`); + searchQueries = await queryProcessor.generateSearchQueries(userQuestion, llmService); + log.info(`Generated ${searchQueries.length} enhanced search queries`); + } catch (error) { + log.error(`Error generating search queries, using fallback: ${error}`); + searchQueries = [userQuestion]; // Fallback to using the original question + } + } + + // Step 3: Find relevant notes using vector search + const allResults = new Map(); + + for (const query of searchQueries) { + try { + log.info(`Searching for: "${query.substring(0, 50)}..."`); + + // Use the unified vector search service + const results = await vectorSearchService.findRelevantNotes( + query, + contextNoteId, + { + maxResults: maxResults, + summarizeContent: summarizeContent, + llmService: summarizeContent ? llmService : null + } + ); + + log.info(`Found ${results.length} results for query "${query.substring(0, 30)}..."`); + + // Combine results, avoiding duplicates + for (const result of results) { + if (!allResults.has(result.noteId)) { + allResults.set(result.noteId, result); + } else { + // If note already exists, update similarity to max of both values + const existing = allResults.get(result.noteId); + if (existing && result.similarity > existing.similarity) { + existing.similarity = result.similarity; + allResults.set(result.noteId, existing); + } + } + } + } catch (error) { + log.error(`Error searching for query "${query}": ${error}`); + } + } + + // Convert to array and sort by similarity + relevantNotes = Array.from(allResults.values()) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, maxResults); + + log.info(`Final combined results: ${relevantNotes.length} relevant notes`); + + // Step 4: Build context from the notes + const provider = await providerManager.getPreferredEmbeddingProvider(); + const providerId = provider?.name || 'default'; + + const context = await contextFormatter.buildContextFromNotes( + relevantNotes, + userQuestion, + providerId + ); + + // Step 5: Add agent tools context if requested + let enhancedContext = context; + let thinkingProcess: string | undefined = undefined; + + if (showThinking) { + thinkingProcess = this.generateThinkingProcess( + userQuestion, + searchQueries, + relevantNotes, + decomposedQuery + ); + } + + return { + context: enhancedContext, + sources: relevantNotes, + thinking: thinkingProcess, + decomposedQuery + }; } catch (error) { - log.error(`Error generating search queries, using fallback: ${error}`); - searchQueries = [userQuestion]; // Fallback to using the original question + log.error(`Error processing query: ${error}`); + return { + context: CONTEXT_PROMPTS.NO_NOTES_CONTEXT, + sources: [], + thinking: undefined + }; } - } + } - // Step 3: Find relevant notes using vector search - const allResults = new Map(); + /** + * Generate a thinking process for debugging and transparency + */ + private generateThinkingProcess( + originalQuery: string, + searchQueries: string[], + relevantNotes: NoteSearchResult[], + decomposedQuery?: any + ): string { + let thinking = `## Query Processing\n\n`; + thinking += `Original query: "${originalQuery}"\n\n`; - for (const query of searchQueries) { - try { - log.info(`Searching for: "${query.substring(0, 50)}..."`); + // Add decomposition analysis if available + if (decomposedQuery) { + thinking += `Query complexity: ${decomposedQuery.complexity}/10\n\n`; + thinking += `### Decomposed into ${decomposedQuery.subQueries.length} sub-queries:\n`; - // Use the unified vector search service - const results = await vectorSearchService.findRelevantNotes( + decomposedQuery.subQueries.forEach((sq: any, i: number) => { + thinking += `${i + 1}. ${sq.text}\n Reason: ${sq.reason}\n\n`; + }); + } + + // Add search queries + thinking += `### Search Queries Used:\n`; + searchQueries.forEach((q, i) => { + thinking += `${i + 1}. "${q}"\n`; + }); + + // Add found sources + thinking += `\n## Sources Retrieved (${relevantNotes.length})\n\n`; + + relevantNotes.slice(0, 5).forEach((note, i) => { + thinking += `${i + 1}. "${note.title}" (Score: ${Math.round(note.similarity * 100)}%)\n`; + thinking += ` ID: ${note.noteId}\n`; + + // Check if parentPath exists before using it + if ('parentPath' in note && note.parentPath) { + thinking += ` Path: ${note.parentPath}\n`; + } + + if (note.content) { + const contentPreview = note.content.length > 100 + ? note.content.substring(0, 100) + '...' + : note.content; + thinking += ` Preview: ${contentPreview}\n`; + } + + thinking += '\n'; + }); + + if (relevantNotes.length > 5) { + thinking += `... and ${relevantNotes.length - 5} more sources\n`; + } + + return thinking; + } + + /** + * Find notes semantically related to a query + * (Shorthand method that directly uses vectorSearchService) + */ + async findRelevantNotes( + query: string, + contextNoteId: string | null = null, + options: { + maxResults?: number, + summarize?: boolean, + llmService?: LLMServiceInterface | null + } = {} + ): Promise { + return vectorSearchService.findRelevantNotes( query, contextNoteId, { - maxResults: maxResults, - summarizeContent: summarizeContent, - llmService: summarizeContent ? llmService : null + maxResults: options.maxResults, + summarizeContent: options.summarize, + llmService: options.llmService } - ); - - log.info(`Found ${results.length} results for query "${query.substring(0, 30)}..."`); - - // Combine results, avoiding duplicates - for (const result of results) { - if (!allResults.has(result.noteId)) { - allResults.set(result.noteId, result); - } else { - // If note already exists, update similarity to max of both values - const existing = allResults.get(result.noteId); - if (existing && result.similarity > existing.similarity) { - existing.similarity = result.similarity; - allResults.set(result.noteId, existing); - } - } - } - } catch (error) { - log.error(`Error searching for query "${query}": ${error}`); - } - } - - // Convert to array and sort by similarity - relevantNotes = Array.from(allResults.values()) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, maxResults); - - log.info(`Final combined results: ${relevantNotes.length} relevant notes`); - - // Step 4: Build context from the notes - const provider = await providerManager.getPreferredEmbeddingProvider(); - const providerId = provider?.name || 'default'; - - const context = await contextFormatter.buildContextFromNotes( - relevantNotes, - userQuestion, - providerId - ); - - // Step 5: Add agent tools context if requested - let enhancedContext = context; - let thinkingProcess: string | undefined = undefined; - - if (showThinking) { - thinkingProcess = this.generateThinkingProcess( - userQuestion, - searchQueries, - relevantNotes, - decomposedQuery ); - } - - return { - context: enhancedContext, - sources: relevantNotes, - thinking: thinkingProcess, - decomposedQuery - }; - } catch (error) { - log.error(`Error processing query: ${error}`); - return { - context: CONTEXT_PROMPTS.NO_NOTES_CONTEXT, - sources: [], - thinking: undefined - }; } - } - - /** - * Generate a thinking process for debugging and transparency - */ - private generateThinkingProcess( - originalQuery: string, - searchQueries: string[], - relevantNotes: NoteSearchResult[], - decomposedQuery?: any - ): string { - let thinking = `## Query Processing\n\n`; - thinking += `Original query: "${originalQuery}"\n\n`; - - // Add decomposition analysis if available - if (decomposedQuery) { - thinking += `Query complexity: ${decomposedQuery.complexity}/10\n\n`; - thinking += `### Decomposed into ${decomposedQuery.subQueries.length} sub-queries:\n`; - - decomposedQuery.subQueries.forEach((sq: any, i: number) => { - thinking += `${i+1}. ${sq.text}\n Reason: ${sq.reason}\n\n`; - }); - } - - // Add search queries - thinking += `### Search Queries Used:\n`; - searchQueries.forEach((q, i) => { - thinking += `${i+1}. "${q}"\n`; - }); - - // Add found sources - thinking += `\n## Sources Retrieved (${relevantNotes.length})\n\n`; - - relevantNotes.slice(0, 5).forEach((note, i) => { - thinking += `${i+1}. "${note.title}" (Score: ${Math.round(note.similarity * 100)}%)\n`; - thinking += ` ID: ${note.noteId}\n`; - - // Check if parentPath exists before using it - if ('parentPath' in note && note.parentPath) { - thinking += ` Path: ${note.parentPath}\n`; - } - - if (note.content) { - const contentPreview = note.content.length > 100 - ? note.content.substring(0, 100) + '...' - : note.content; - thinking += ` Preview: ${contentPreview}\n`; - } - - thinking += '\n'; - }); - - if (relevantNotes.length > 5) { - thinking += `... and ${relevantNotes.length - 5} more sources\n`; - } - - return thinking; - } - - /** - * Find notes semantically related to a query - * (Shorthand method that directly uses vectorSearchService) - */ - async findRelevantNotes( - query: string, - contextNoteId: string | null = null, - options: { - maxResults?: number, - summarize?: boolean, - llmService?: LLMServiceInterface | null - } = {} - ): Promise { - return vectorSearchService.findRelevantNotes( - query, - contextNoteId, - { - maxResults: options.maxResults, - summarizeContent: options.summarize, - llmService: options.llmService - } - ); - } } // Export a singleton instance diff --git a/src/services/llm/context/services/query_processor.ts b/src/services/llm/context/services/query_processor.ts index 86478e5ed..d62561aac 100644 --- a/src/services/llm/context/services/query_processor.ts +++ b/src/services/llm/context/services/query_processor.ts @@ -16,501 +16,639 @@ import { QUERY_DECOMPOSITION_STRINGS } from '../../constants/query_decomposition import JsonExtractor from '../../utils/json_extractor.js'; import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js'; import { SEARCH_CONSTANTS } from '../../constants/search_constants.js'; +import aiServiceManager from '../../ai_service_manager.js'; // Interfaces export interface SubQuery { - id: string; - text: string; - reason: string; - isAnswered: boolean; - answer?: string; + id: string; + text: string; + reason: string; + isAnswered: boolean; + answer?: string; } export interface DecomposedQuery { - originalQuery: string; - subQueries: SubQuery[]; - status: 'pending' | 'in_progress' | 'completed'; - complexity: number; + originalQuery: string; + subQueries: SubQuery[]; + status: 'pending' | 'in_progress' | 'completed'; + complexity: number; } export class QueryProcessor { - private static queryCounter: number = 0; + private static queryCounter: number = 0; - // Prompt templates - private enhancerPrompt = CONTEXT_PROMPTS.QUERY_ENHANCER; + // Prompt templates + private enhancerPrompt = CONTEXT_PROMPTS.QUERY_ENHANCER; - /** - * Generate enhanced search queries for better semantic matching - * - * @param userQuestion - The user's question - * @param llmService - The LLM service to use for generating queries - * @returns Array of search queries - */ - async generateSearchQueries( - userQuestion: string, - llmService: LLMServiceInterface - ): Promise { - if (!userQuestion || userQuestion.trim() === '') { - return []; // Return empty array for empty input + /** + * Get a valid LLM service or null if none available + * + * @returns Available LLM service or null + */ + private async getLLMService(): Promise { + try { + // Get the service from the AI service manager + return aiServiceManager.getService(); + } catch (error: any) { + log.error(`Error getting LLM service: ${error.message || String(error)}`); + return null; + } } - try { - // Check cache - const cacheKey = `searchQueries:${userQuestion}`; - const cached = cacheManager.getQueryResults(cacheKey); - if (cached && Array.isArray(cached)) { - return cached; - } + /** + * Generate enhanced search queries for better semantic matching + * + * @param userQuestion - The user's question + * @param llmService - The LLM service to use for generating queries, or null to auto-detect + * @returns Array of search queries + */ + async generateSearchQueries( + userQuestion: string, + llmService?: LLMServiceInterface + ): Promise { + if (!userQuestion || userQuestion.trim() === '') { + return []; // Return empty array for empty input + } - // Prepare the prompt with JSON formatting instructions - const enhancedPrompt = `${this.enhancerPrompt} + try { + // Check cache + const cacheKey = `searchQueries:${userQuestion}`; + const cached = cacheManager.getQueryResults(cacheKey); + if (cached && Array.isArray(cached)) { + return cached; + } + + // Get LLM service if not provided + const service = llmService || await this.getLLMService(); + if (!service) { + log.info(`No LLM service available for query enhancement, using original query`); + return [userQuestion]; + } + + // Prepare the prompt with JSON formatting instructions + const enhancedPrompt = `${this.enhancerPrompt} IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements. Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`; - const messages = [ - { role: "system" as const, content: enhancedPrompt }, - { role: "user" as const, content: userQuestion } - ]; + const messages = [ + { role: "system" as const, content: enhancedPrompt }, + { role: "user" as const, content: userQuestion } + ]; - const options = { - temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR, - maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, - bypassFormatter: true, - expectsJsonResponse: true, - _bypassContextProcessing: true // Prevent recursive calls - }; + const options = { + temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR, + maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, + bypassFormatter: true, + expectsJsonResponse: true, + _bypassContextProcessing: true // Prevent recursive calls + }; - // Get the response from the LLM - const response = await llmService.generateChatCompletion(messages, options); - const responseText = response.text; + // Get the response from the LLM + const response = await service.generateChatCompletion(messages, options); + const responseText = response.text; - // Use the JsonExtractor to parse the response - const queries = JsonExtractor.extract(responseText, { - extractArrays: true, - minStringLength: 3, - applyFixes: true, - useFallbacks: true - }); + // Use the JsonExtractor to parse the response + const queries = JsonExtractor.extract(responseText, { + extractArrays: true, + minStringLength: 3, + applyFixes: true, + useFallbacks: true + }); - if (queries && queries.length > 0) { - log.info(`Extracted ${queries.length} queries using JsonExtractor`); - cacheManager.storeQueryResults(cacheKey, queries); - return queries; - } - - // Fallback to original question - const fallback = [userQuestion]; - log.info(`No queries extracted, using fallback: "${userQuestion}"`); - cacheManager.storeQueryResults(cacheKey, fallback); - return fallback; - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - log.error(`Error generating search queries: ${errorMessage}`); - return [userQuestion]; - } - } - - /** - * Break down a complex query into smaller, more manageable sub-queries - * - * @param query The original user query - * @param context Optional context about the current note being viewed - * @returns A decomposed query object with sub-queries - */ - decomposeQuery(query: string, context?: string): DecomposedQuery { - try { - // Log the decomposition attempt - log.info(`Decomposing query: "${query}"`); - - if (!query || query.trim().length === 0) { - log.info(`Query is empty, skipping decomposition`); - return { - originalQuery: query, - subQueries: [], - status: 'pending', - complexity: 0 - }; - } - - // Assess query complexity - const complexity = this.assessQueryComplexity(query); - log.info(`Query complexity assessment: ${complexity}/10`); - - // For simple queries, just return the original as a single sub-query - if (complexity < 3) { - log.info(`Simple query detected (complexity: ${complexity}), using direct approach`); - - const mainSubQuery = { - id: this.generateSubQueryId(), - text: query, - reason: "Direct question that can be answered without decomposition", - isAnswered: false - }; - - // Add a generic exploration query for context - const genericQuery = { - id: this.generateSubQueryId(), - text: `What information is related to ${query}?`, - reason: "General exploration to find related content", - isAnswered: false - }; - - return { - originalQuery: query, - subQueries: [mainSubQuery, genericQuery], - status: 'pending', - complexity - }; - } - - // For complex queries, break it down into sub-queries - const subQueries = this.createSubQueries(query, context); - log.info(`Decomposed query into ${subQueries.length} sub-queries`); - - return { - originalQuery: query, - subQueries, - status: 'pending', - complexity - }; - } catch (error: any) { - log.error(`Error decomposing query: ${error.message}`); - - // Fallback to treating it as a simple query - return { - originalQuery: query, - subQueries: [{ - id: this.generateSubQueryId(), - text: query, - reason: "Error occurred during decomposition, using original query", - isAnswered: false - }], - status: 'pending', - complexity: 1 - }; - } - } - - /** - * Create sub-queries from a complex query - * - * @param query The original complex query - * @param context Optional context to help with decomposition - * @returns Array of sub-queries - */ - private createSubQueries(query: string, context?: string): SubQuery[] { - // Analyze the query to identify potential aspects to explore - const questionParts = this.identifyQuestionParts(query); - const subQueries: SubQuery[] = []; - - // Add the main query as the first sub-query - subQueries.push({ - id: this.generateSubQueryId(), - text: query, - reason: "Main question (for direct matching)", - isAnswered: false - }); - - // Add sub-queries for each identified question part - for (const part of questionParts) { - subQueries.push({ - id: this.generateSubQueryId(), - text: part, - reason: "Sub-aspect of the main question", - isAnswered: false - }); - } - - // Add a generic exploration query to find related information - subQueries.push({ - id: this.generateSubQueryId(), - text: `What information is related to ${query}?`, - reason: "General exploration to find related content", - isAnswered: false - }); - - // If we have context, add a specific query for that context - if (context) { - subQueries.push({ - id: this.generateSubQueryId(), - text: `How does "${context}" relate to ${query}?`, - reason: "Contextual relationship exploration", - isAnswered: false - }); - } - - return subQueries; - } - - /** - * Identify parts of a complex question that could be individual sub-questions - * - * @param query The complex query to analyze - * @returns Array of potential sub-questions - */ - private identifyQuestionParts(query: string): string[] { - const parts: string[] = []; - - // Check for multiple question marks - const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?')); - if (questionSentences.length > 1) { - // Multiple explicit questions detected - return questionSentences.map(s => s.trim()); - } - - // Check for conjunctions that might separate multiple questions - const conjunctions = ['and', 'or', 'but', 'plus', 'also']; - for (const conjunction of conjunctions) { - const pattern = new RegExp(`\\b${conjunction}\\b`, 'i'); - if (pattern.test(query)) { - // Split by conjunction and check if each part could be a question - const splitParts = query.split(pattern); - for (const part of splitParts) { - const trimmed = part.trim(); - if (trimmed.length > 10) { // Avoid tiny fragments - parts.push(trimmed); - } - } - if (parts.length > 0) { - return parts; - } - } - } - - // Check for comparison indicators - const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs']; - for (const term of comparisonTerms) { - if (query.toLowerCase().includes(term)) { - // This is likely a comparison question, extract the items being compared - const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i')); - if (beforeAfter.length === 2) { - // Try to extract compared items - const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]); - if (aspects.length > 0) { - for (const aspect of aspects) { - parts.push(`What are the key points about ${aspect}?`); + if (queries && queries.length > 0) { + log.info(`Extracted ${queries.length} queries using JsonExtractor`); + cacheManager.storeQueryResults(cacheKey, queries); + return queries; } - parts.push(`What are the differences between ${aspects.join(' and ')}?`); - return parts; - } + + // Fallback to original question + const fallback = [userQuestion]; + log.info(`No queries extracted, using fallback: "${userQuestion}"`); + cacheManager.storeQueryResults(cacheKey, fallback); + return fallback; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + log.error(`Error generating search queries: ${errorMessage}`); + return [userQuestion]; } - } } - // Check for "multiple aspects" questions - const aspectPatterns = [ - /what (?:are|is) the (\w+) (?:of|about|for|in) /i, - /how (?:to|do|does|can) .+ (\w+)/i - ]; + /** + * Break down a complex query into smaller, more manageable sub-queries + * + * @param query The original user query + * @param context Optional context about the current note being viewed + * @param llmService Optional LLM service to use for advanced decomposition + * @returns A decomposed query object with sub-queries + */ + async decomposeQuery( + query: string, + context?: string, + llmService?: LLMServiceInterface + ): Promise { + try { + // Log the decomposition attempt + log.info(`Decomposing query: "${query}"`); - for (const pattern of aspectPatterns) { - const match = query.match(pattern); - if (match && match[1]) { - const aspect = match[1]; - parts.push(`What is the ${aspect}?`); - parts.push(`How does ${aspect} relate to the main topic?`); - } + if (!query || query.trim().length === 0) { + log.info(`Query is empty, skipping decomposition`); + return { + originalQuery: query, + subQueries: [], + status: 'pending', + complexity: 0 + }; + } + + // Assess query complexity + const complexity = this.assessQueryComplexity(query); + log.info(`Query complexity assessment: ${complexity}/10`); + + // Try to get LLM service if not provided + const service = llmService || await this.getLLMService(); + + // For when no LLM service is available, use the basic approach + if (!service) { + if (!service) { + log.info(`No LLM service available for query decomposition, using original query`); + } + + log.info(`Using basic decomposition approach (complexity: ${complexity})`); + + const mainSubQuery = { + id: this.generateSubQueryId(), + text: query, + reason: "Direct question that can be answered without decomposition", + isAnswered: false + }; + + // Add a generic exploration query for context + const genericQuery = { + id: this.generateSubQueryId(), + text: `What information is related to ${query}?`, + reason: "General exploration to find related content", + isAnswered: false + }; + + return { + originalQuery: query, + subQueries: [mainSubQuery, genericQuery], + status: 'pending', + complexity + }; + } + + // For when the LLM available, we can use more advanced decomposition + if (service) { + try { + // Try to use LLM for advanced decomposition + log.info(`Using advanced LLM-based decomposition for complex query (complexity: ${complexity})`); + const enhancedSubQueries = await this.createLLMSubQueries(query, context, service); + + if (enhancedSubQueries && enhancedSubQueries.length > 0) { + log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries: ${JSON.stringify(enhancedSubQueries)}`); + return { + originalQuery: query, + subQueries: enhancedSubQueries, + status: 'pending', + complexity + }; + } + } catch (error: any) { + log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`); + // Continue to fallback with basic decomposition + } + } + + // Fallback to basic decomposition + const subQueries = this.createSubQueries(query, context); + log.info(`Decomposed query into ${subQueries.length} sub-queries`); + + return { + originalQuery: query, + subQueries, + status: 'pending', + complexity + }; + } catch (error: any) { + log.error(`Error decomposing query: ${error.message}`); + + // Fallback to treating it as a simple query + return { + originalQuery: query, + subQueries: [{ + id: this.generateSubQueryId(), + text: query, + reason: "Error occurred during decomposition, using original query", + isAnswered: false + }], + status: 'pending', + complexity: 1 + }; + } } - return parts; - } + /** + * Use LLM to create advanced sub-queries from a complex query + * + * @param query The original complex query + * @param context Optional context to help with decomposition + * @param llmService LLM service to use for advanced decomposition + * @returns Array of sub-queries + */ + private async createLLMSubQueries( + query: string, + context?: string, + llmService?: LLMServiceInterface + ): Promise { + // If no LLM service, use basic decomposition + if (!llmService) { + return this.createSubQueries(query, context); + } - /** - * Extract items being compared from a comparison question - * - * @param before Text before the comparison term - * @param after Text after the comparison term - * @returns Array of items being compared - */ - private extractComparisonAspects(before: string, after: string): string[] { - const aspects: string[] = []; + try { + // Build a prompt from existing templates in the constants + const contextPart = context ? `\nContext: ${context}` : ''; - // Look for "between A and B" pattern - const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i); - if (betweenMatch) { - aspects.push(betweenMatch[1].trim()); - aspects.push(betweenMatch[2].trim()); - return aspects; + // Use existing templates from QUERY_DECOMPOSITION_STRINGS to build the prompt + const prompt = `I need to break down a complex query into sub-queries. +Query: ${query}${contextPart} + +Please analyze this query and identify the key aspects that need to be addressed.`; + + const messages = [ + { role: "system" as const, content: prompt } + ]; + + const options = { + temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR, + maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, + bypassFormatter: true, + expectsJsonResponse: true, + _bypassContextProcessing: true // Prevent recursive calls + }; + + // Get the response from the LLM + const response = await llmService.generateChatCompletion(messages, options); + const responseText = response.text; + + // Try to extract structured sub-queries from the response + try { + // Expected format is an array of objects with "text" and "reason" keys + interface RawSubQuery { + text: string; + reason?: string; + } + + // Extract JSON from the response + const extractedData = JsonExtractor.extract(responseText, { + extractArrays: true, + applyFixes: true, + useFallbacks: true + }); + + if (Array.isArray(extractedData) && extractedData.length > 0) { + // Convert the raw data to SubQuery objects + return extractedData.map(item => ({ + id: this.generateSubQueryId(), + text: item.text, + reason: item.reason || "Sub-aspect of the main question", + isAnswered: false + })); + } + } catch (error: any) { + log.error(`Error extracting sub-queries from LLM response: ${error.message}`); + // Fall through to traditional decomposition + } + + // Fallback to traditional decomposition + return this.createSubQueries(query, context); + } catch (error: any) { + log.error(`Error in createLLMSubQueries: ${error.message}`); + return this.createSubQueries(query, context); + } } - // Look for A vs B pattern - const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i); - if (directComparison) { - aspects.push(directComparison[1].trim()); - aspects.push(directComparison[2].trim()); - return aspects; + /** + * Create sub-queries from a complex query + * + * @param query The original complex query + * @param context Optional context to help with decomposition + * @returns Array of sub-queries + */ + private createSubQueries(query: string, context?: string): SubQuery[] { + // Analyze the query to identify potential aspects to explore + const questionParts = this.identifyQuestionParts(query); + const subQueries: SubQuery[] = []; + + // Add the main query as the first sub-query + subQueries.push({ + id: this.generateSubQueryId(), + text: query, + reason: "Main question (for direct matching)", + isAnswered: false + }); + + // Add sub-queries for each identified question part + for (const part of questionParts) { + subQueries.push({ + id: this.generateSubQueryId(), + text: part, + reason: "Sub-aspect of the main question", + isAnswered: false + }); + } + + // Add a generic exploration query to find related information + subQueries.push({ + id: this.generateSubQueryId(), + text: `What information is related to ${query}?`, + reason: "General exploration to find related content", + isAnswered: false + }); + + // If we have context, add a specific query for that context + if (context) { + subQueries.push({ + id: this.generateSubQueryId(), + text: `How does "${context}" relate to ${query}?`, + reason: "Contextual relationship exploration", + isAnswered: false + }); + } + + return subQueries; } - // Fall back to looking for named entities or key terms in both parts - const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || []; - const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || []; + /** + * Identify parts of a complex question that could be individual sub-questions + * + * @param query The complex query to analyze + * @returns Array of potential sub-questions + */ + private identifyQuestionParts(query: string): string[] { + const parts: string[] = []; - // Look for substantial terms (longer than 3 chars) - const candidateTerms = [...beforeTerms, ...afterTerms] - .filter(term => term.length > 3) - .map(term => term.trim()); + // Check for multiple question marks + const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?')); + if (questionSentences.length > 1) { + // Multiple explicit questions detected + return questionSentences.map(s => s.trim()); + } - // Take up to 2 distinct terms - return [...new Set(candidateTerms)].slice(0, 2); - } + // Check for conjunctions that might separate multiple questions + const conjunctions = ['and', 'or', 'but', 'plus', 'also']; + for (const conjunction of conjunctions) { + const pattern = new RegExp(`\\b${conjunction}\\b`, 'i'); + if (pattern.test(query)) { + // Split by conjunction and check if each part could be a question + const splitParts = query.split(pattern); + for (const part of splitParts) { + const trimmed = part.trim(); + if (trimmed.length > 10) { // Avoid tiny fragments + parts.push(trimmed); + } + } + if (parts.length > 0) { + return parts; + } + } + } - /** - * Generate a unique ID for a sub-query - * - * @returns A unique sub-query ID - */ - private generateSubQueryId(): string { - QueryProcessor.queryCounter++; - return `sq_${Date.now()}_${QueryProcessor.queryCounter}`; - } + // Check for comparison indicators + const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs']; + for (const term of comparisonTerms) { + if (query.toLowerCase().includes(term)) { + // This is likely a comparison question, extract the items being compared + const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i')); + if (beforeAfter.length === 2) { + // Try to extract compared items + const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]); + if (aspects.length > 0) { + for (const aspect of aspects) { + parts.push(`What are the key points about ${aspect}?`); + } + parts.push(`What are the differences between ${aspects.join(' and ')}?`); + return parts; + } + } + } + } - /** - * Assess the complexity of a query on a scale of 1-10 - * This helps determine if decomposition is needed - * - * @param query The query to assess - * @returns A complexity score from 1-10 - */ - assessQueryComplexity(query: string): number { - let score = 0; + // Check for "multiple aspects" questions + const aspectPatterns = [ + /what (?:are|is) the (\w+) (?:of|about|for|in) /i, + /how (?:to|do|does|can) .+ (\w+)/i + ]; - // Factor 1: Length - longer queries tend to be more complex - // 0-1.5 points for length - const lengthScore = Math.min(query.length / 100, 1.5); - score += lengthScore; + for (const pattern of aspectPatterns) { + const match = query.match(pattern); + if (match && match[1]) { + const aspect = match[1]; + parts.push(`What is the ${aspect}?`); + parts.push(`How does ${aspect} relate to the main topic?`); + } + } - // Factor 2: Question marks - multiple questions are more complex - // 0-2 points for question marks - const questionMarkCount = (query.match(/\?/g) || []).length; - score += Math.min(questionMarkCount * 0.8, 2); - - // Factor 3: Question words - multiple "wh" questions indicate complexity - // 0-2 points for question words - const questionWords = ['what', 'why', 'how', 'when', 'where', 'who', 'which']; - let questionWordCount = 0; - - for (const word of questionWords) { - const regex = new RegExp(`\\b${word}\\b`, 'gi'); - questionWordCount += (query.match(regex) || []).length; + return parts; } - score += Math.min(questionWordCount * 0.5, 2); + /** + * Extract items being compared from a comparison question + * + * @param before Text before the comparison term + * @param after Text after the comparison term + * @returns Array of items being compared + */ + private extractComparisonAspects(before: string, after: string): string[] { + const aspects: string[] = []; - // Factor 4: Conjunctions - linking multiple concepts increases complexity - // 0-1.5 points for conjunctions - const conjunctions = ['and', 'or', 'but', 'however', 'although', 'nevertheless', 'despite', 'whereas']; - let conjunctionCount = 0; + // Look for "between A and B" pattern + const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i); + if (betweenMatch) { + aspects.push(betweenMatch[1].trim()); + aspects.push(betweenMatch[2].trim()); + return aspects; + } - for (const conj of conjunctions) { - const regex = new RegExp(`\\b${conj}\\b`, 'gi'); - conjunctionCount += (query.match(regex) || []).length; + // Look for A vs B pattern + const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i); + if (directComparison) { + aspects.push(directComparison[1].trim()); + aspects.push(directComparison[2].trim()); + return aspects; + } + + // Fall back to looking for named entities or key terms in both parts + const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || []; + const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || []; + + // Look for substantial terms (longer than 3 chars) + const candidateTerms = [...beforeTerms, ...afterTerms] + .filter(term => term.length > 3) + .map(term => term.trim()); + + // Take up to 2 distinct terms + return [...new Set(candidateTerms)].slice(0, 2); } - score += Math.min(conjunctionCount * 0.3, 1.5); - - // Factor 5: Comparison terms - comparisons are complex - // 0-1.5 points for comparison terms - const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs', 'similarities', 'better', 'worse']; - let comparisonCount = 0; - - for (const term of comparisonTerms) { - const regex = new RegExp(`\\b${term}\\b`, 'gi'); - comparisonCount += (query.match(regex) || []).length; + /** + * Generate a unique ID for a sub-query + * + * @returns A unique sub-query ID + */ + private generateSubQueryId(): string { + QueryProcessor.queryCounter++; + return `sq_${Date.now()}_${QueryProcessor.queryCounter}`; } - score += Math.min(comparisonCount * 0.7, 1.5); + /** + * Assess the complexity of a query on a scale of 1-10 + * This helps determine if decomposition is needed + * + * @param query The query to assess + * @returns A complexity score from 1-10 + */ + assessQueryComplexity(query: string): number { + let score = 0; - // Factor 6: Technical terms and depth indicators - // 0-1.5 points for depth indicators - const depthTerms = ['explain', 'detail', 'elaborate', 'in-depth', 'comprehensive', 'thoroughly', 'analysis']; - let depthCount = 0; + // Factor 1: Length - longer queries tend to be more complex + // 0-1.5 points for length + const lengthScore = Math.min(query.length / 100, 1.5); + score += lengthScore; - for (const term of depthTerms) { - const regex = new RegExp(`\\b${term}\\b`, 'gi'); - depthCount += (query.match(regex) || []).length; + // Factor 2: Question marks - multiple questions are more complex + // 0-2 points for question marks + const questionMarkCount = (query.match(/\?/g) || []).length; + score += Math.min(questionMarkCount * 0.8, 2); + + // Factor 3: Question words - multiple "wh" questions indicate complexity + // 0-2 points for question words + const questionWords = ['what', 'why', 'how', 'when', 'where', 'who', 'which']; + let questionWordCount = 0; + + for (const word of questionWords) { + const regex = new RegExp(`\\b${word}\\b`, 'gi'); + questionWordCount += (query.match(regex) || []).length; + } + + score += Math.min(questionWordCount * 0.5, 2); + + // Factor 4: Conjunctions - linking multiple concepts increases complexity + // 0-1.5 points for conjunctions + const conjunctions = ['and', 'or', 'but', 'however', 'although', 'nevertheless', 'despite', 'whereas']; + let conjunctionCount = 0; + + for (const conj of conjunctions) { + const regex = new RegExp(`\\b${conj}\\b`, 'gi'); + conjunctionCount += (query.match(regex) || []).length; + } + + score += Math.min(conjunctionCount * 0.3, 1.5); + + // Factor 5: Comparison terms - comparisons are complex + // 0-1.5 points for comparison terms + const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs', 'similarities', 'better', 'worse']; + let comparisonCount = 0; + + for (const term of comparisonTerms) { + const regex = new RegExp(`\\b${term}\\b`, 'gi'); + comparisonCount += (query.match(regex) || []).length; + } + + score += Math.min(comparisonCount * 0.7, 1.5); + + // Factor 6: Technical terms and depth indicators + // 0-1.5 points for depth indicators + const depthTerms = ['explain', 'detail', 'elaborate', 'in-depth', 'comprehensive', 'thoroughly', 'analysis']; + let depthCount = 0; + + for (const term of depthTerms) { + const regex = new RegExp(`\\b${term}\\b`, 'gi'); + depthCount += (query.match(regex) || []).length; + } + + score += Math.min(depthCount * 0.5, 1.5); + + // Return final score, capped at 10 + return Math.min(Math.round(score), 10); } - score += Math.min(depthCount * 0.5, 1.5); + /** + * Update a sub-query with its answer + * + * @param decomposedQuery The decomposed query object + * @param subQueryId The ID of the sub-query to update + * @param answer The answer to the sub-query + * @returns The updated decomposed query + */ + updateSubQueryAnswer( + decomposedQuery: DecomposedQuery, + subQueryId: string, + answer: string + ): DecomposedQuery { + const updatedSubQueries = decomposedQuery.subQueries.map(sq => { + if (sq.id === subQueryId) { + return { + ...sq, + answer, + isAnswered: true + }; + } + return sq; + }); - // Return final score, capped at 10 - return Math.min(Math.round(score), 10); - } + // Check if all sub-queries are answered + const allAnswered = updatedSubQueries.every(sq => sq.isAnswered); - /** - * Update a sub-query with its answer - * - * @param decomposedQuery The decomposed query object - * @param subQueryId The ID of the sub-query to update - * @param answer The answer to the sub-query - * @returns The updated decomposed query - */ - updateSubQueryAnswer( - decomposedQuery: DecomposedQuery, - subQueryId: string, - answer: string - ): DecomposedQuery { - const updatedSubQueries = decomposedQuery.subQueries.map(sq => { - if (sq.id === subQueryId) { return { - ...sq, - answer, - isAnswered: true + ...decomposedQuery, + subQueries: updatedSubQueries, + status: allAnswered ? 'completed' : 'in_progress' }; - } - return sq; - }); - - // Check if all sub-queries are answered - const allAnswered = updatedSubQueries.every(sq => sq.isAnswered); - - return { - ...decomposedQuery, - subQueries: updatedSubQueries, - status: allAnswered ? 'completed' : 'in_progress' - }; - } - - /** - * Synthesize all sub-query answers into a comprehensive response - * - * @param decomposedQuery The decomposed query with all sub-queries answered - * @returns A synthesized answer to the original query - */ - synthesizeAnswer(decomposedQuery: DecomposedQuery): string { - try { - // Ensure all sub-queries are answered - if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) { - return "Cannot synthesize answer until all sub-queries are answered."; - } - - // For simple queries with just one sub-query, return the answer directly - if (decomposedQuery.subQueries.length === 1) { - return decomposedQuery.subQueries[0].answer || ""; - } - - // For complex queries, build a structured response - let synthesized = `Answer to: ${decomposedQuery.originalQuery}\n\n`; - - // Group by themes if there are many sub-queries - if (decomposedQuery.subQueries.length > 3) { - synthesized += "Based on the information gathered:\n\n"; - - for (const sq of decomposedQuery.subQueries) { - synthesized += `${sq.answer}\n\n`; - } - } else { - // For fewer sub-queries, present each one with its question - for (const sq of decomposedQuery.subQueries) { - synthesized += `${sq.answer}\n\n`; - } - } - - return synthesized.trim(); - } catch (error: any) { - log.error(`Error synthesizing answer: ${error.message}`); - return "An error occurred while synthesizing the answer."; } - } + + /** + * Synthesize all sub-query answers into a comprehensive response + * + * @param decomposedQuery The decomposed query with all sub-queries answered + * @returns A synthesized answer to the original query + */ + synthesizeAnswer(decomposedQuery: DecomposedQuery): string { + try { + // Ensure all sub-queries are answered + if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) { + return "Cannot synthesize answer until all sub-queries are answered."; + } + + // For simple queries with just one sub-query, return the answer directly + if (decomposedQuery.subQueries.length === 1) { + return decomposedQuery.subQueries[0].answer || ""; + } + + // For complex queries, build a structured response + let synthesized = `Answer to: ${decomposedQuery.originalQuery}\n\n`; + + // Group by themes if there are many sub-queries + if (decomposedQuery.subQueries.length > 3) { + synthesized += "Based on the information gathered:\n\n"; + + for (const sq of decomposedQuery.subQueries) { + synthesized += `${sq.answer}\n\n`; + } + } else { + // For fewer sub-queries, present each one with its question + for (const sq of decomposedQuery.subQueries) { + synthesized += `${sq.answer}\n\n`; + } + } + + return synthesized.trim(); + } catch (error: any) { + log.error(`Error synthesizing answer: ${error.message}`); + return "An error occurred while synthesizing the answer."; + } + } } // Export a singleton instance diff --git a/src/services/llm/context_extractors/query_decomposition_tool.ts b/src/services/llm/context_extractors/query_decomposition_tool.ts index 3338caea7..a213a090d 100644 --- a/src/services/llm/context_extractors/query_decomposition_tool.ts +++ b/src/services/llm/context_extractors/query_decomposition_tool.ts @@ -12,83 +12,129 @@ import type { SubQuery, DecomposedQuery } from '../context/services/query_proces export type { SubQuery, DecomposedQuery }; export class QueryDecompositionTool { - /** - * Break down a complex query into smaller, more manageable sub-queries - * - * @param query The original user query - * @param context Optional context about the current note being viewed - * @returns A decomposed query object with sub-queries - */ - decomposeQuery(query: string, context?: string): DecomposedQuery { - log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery'); - return queryProcessor.decomposeQuery(query, context); - } + /** + * Break down a complex query into smaller, more manageable sub-queries + * + * @param query The original user query + * @param context Optional context about the current note being viewed + * @returns A decomposed query object with sub-queries + */ + decomposeQuery(query: string, context?: string): DecomposedQuery { + log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery'); - /** - * Update a sub-query with its answer - * - * @param decomposedQuery The decomposed query object - * @param subQueryId The ID of the sub-query to update - * @param answer The answer to the sub-query - * @returns The updated decomposed query - */ - updateSubQueryAnswer( - decomposedQuery: DecomposedQuery, - subQueryId: string, - answer: string - ): DecomposedQuery { - log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer'); - return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer); - } + // Since the main implementation is now async but we need to maintain a sync interface, + // we'll use a simpler approach that doesn't require LLM - /** - * Synthesize all sub-query answers into a comprehensive response - * - * @param decomposedQuery The decomposed query with all sub-queries answered - * @returns A synthesized answer to the original query - */ - synthesizeAnswer(decomposedQuery: DecomposedQuery): string { - log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer'); - return queryProcessor.synthesizeAnswer(decomposedQuery); - } + // Get the complexity to determine approach + const complexity = queryProcessor.assessQueryComplexity(query); - /** - * Generate a status report on the progress of answering a complex query - * - * @param decomposedQuery The decomposed query - * @returns A status report string - */ - getQueryStatus(decomposedQuery: DecomposedQuery): string { - log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus'); - // This method doesn't exist directly in the new implementation - // We'll implement a simple fallback + if (!query || query.trim().length === 0) { + return { + originalQuery: query, + subQueries: [], + status: 'pending', + complexity: 0 + }; + } - const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length; - const totalCount = decomposedQuery.subQueries.length; + // Create a baseline decomposed query + let subQueries = []; - let status = `Progress: ${answeredCount}/${totalCount} sub-queries answered\n\n`; + // For compatibility, we'll use the basic SubQuery generation + // This avoids the async LLM call which would break the sync interface + const mainSubQuery = { + id: `sq_${Date.now()}_sync_0`, + text: query, + reason: "Main question (for direct matching)", + isAnswered: false + }; - for (const sq of decomposedQuery.subQueries) { - status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`; - if (sq.isAnswered && sq.answer) { - status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`; - } - status += '\n'; + subQueries.push(mainSubQuery); + + // Add a generic exploration query for context + const genericQuery = { + id: `sq_${Date.now()}_sync_1`, + text: `What information is related to ${query}?`, + reason: "General exploration to find related content", + isAnswered: false + }; + + subQueries.push(genericQuery); + + // Simplified implementation that doesn't require async/LLM calls + return { + originalQuery: query, + subQueries: subQueries, + status: 'pending', + complexity + }; } - return status; - } + /** + * Update a sub-query with its answer + * + * @param decomposedQuery The decomposed query object + * @param subQueryId The ID of the sub-query to update + * @param answer The answer to the sub-query + * @returns The updated decomposed query + */ + updateSubQueryAnswer( + decomposedQuery: DecomposedQuery, + subQueryId: string, + answer: string + ): DecomposedQuery { + log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer'); + return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer); + } - /** - * Assess the complexity of a query on a scale of 1-10 - * - * @param query The query to assess - * @returns A complexity score from 1-10 - */ - assessQueryComplexity(query: string): number { - log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity'); - return queryProcessor.assessQueryComplexity(query); - } + /** + * Synthesize all sub-query answers into a comprehensive response + * + * @param decomposedQuery The decomposed query with all sub-queries answered + * @returns A synthesized answer to the original query + */ + synthesizeAnswer(decomposedQuery: DecomposedQuery): string { + log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer'); + return queryProcessor.synthesizeAnswer(decomposedQuery); + } + + /** + * Generate a status report on the progress of answering a complex query + * + * @param decomposedQuery The decomposed query + * @returns A status report string + */ + getQueryStatus(decomposedQuery: DecomposedQuery): string { + log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus'); + // This method doesn't exist directly in the new implementation + // We'll implement a simple fallback + + const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length; + const totalCount = decomposedQuery.subQueries.length; + + let status = `Progress: ${answeredCount}/${totalCount} sub-queries answered\n\n`; + + for (const sq of decomposedQuery.subQueries) { + status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`; + if (sq.isAnswered && sq.answer) { + status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`; + } + status += '\n'; + } + + return status; + } + + /** + * Assess the complexity of a query on a scale of 1-10 + * + * @param query The query to assess + * @returns A complexity score from 1-10 + */ + assessQueryComplexity(query: string): number { + log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity'); + return queryProcessor.assessQueryComplexity(query); + } } // Export default instance for compatibility diff --git a/src/services/llm/interfaces/agent_tool_interfaces.ts b/src/services/llm/interfaces/agent_tool_interfaces.ts index dcf990827..deb4bfd5e 100644 --- a/src/services/llm/interfaces/agent_tool_interfaces.ts +++ b/src/services/llm/interfaces/agent_tool_interfaces.ts @@ -19,13 +19,6 @@ export interface LLMServiceInterface { stream?: boolean; systemPrompt?: string; }): Promise; - - /** - * Generate search queries by decomposing a complex query into simpler ones - * @param query The original user query to decompose - * @returns An array of decomposed search queries - */ - generateSearchQueries?(query: string): Promise; } /** diff --git a/src/services/llm/pipeline/chat_pipeline.ts b/src/services/llm/pipeline/chat_pipeline.ts index fc9f7bc8e..888511f92 100644 --- a/src/services/llm/pipeline/chat_pipeline.ts +++ b/src/services/llm/pipeline/chat_pipeline.ts @@ -168,16 +168,28 @@ export class ChatPipeline { log.info(`========== STAGE 2: QUERY DECOMPOSITION ==========`); log.info('Performing query decomposition to generate effective search queries'); const llmService = await this.getLLMService(); - let searchQueries = [userQuery]; // Default to original query + let searchQueries = [userQuery]; - if (llmService && llmService.generateSearchQueries) { + if (llmService) { try { - const decompositionResult = await llmService.generateSearchQueries(userQuery); - if (decompositionResult && decompositionResult.length > 0) { - searchQueries = decompositionResult; - log.info(`Generated ${searchQueries.length} search queries: ${JSON.stringify(searchQueries)}`); + // Import the query processor and use its decomposeQuery method + const queryProcessor = (await import('../context/services/query_processor.js')).default; + + // Use the enhanced query processor with the LLM service + const decomposedQuery = await queryProcessor.decomposeQuery(userQuery, undefined, llmService); + + if (decomposedQuery && decomposedQuery.subQueries && decomposedQuery.subQueries.length > 0) { + // Extract search queries from the decomposed query + searchQueries = decomposedQuery.subQueries.map(sq => sq.text); + + // Always include the original query if it's not already included + if (!searchQueries.includes(userQuery)) { + searchQueries.unshift(userQuery); + } + + log.info(`Query decomposed with complexity ${decomposedQuery.complexity}/10 into ${searchQueries.length} search queries`); } else { - log.info('Query decomposition returned no results, using original query'); + log.info('Query decomposition returned no sub-queries, using original query'); } } catch (error: any) { log.error(`Error in query decomposition: ${error.message || String(error)}`);