diff --git a/src/services/llm/context/services/query_processor.ts b/src/services/llm/context/services/query_processor.ts index 9ca3880de..53a0e8b12 100644 --- a/src/services/llm/context/services/query_processor.ts +++ b/src/services/llm/context/services/query_processor.ts @@ -54,82 +54,126 @@ export class QueryProcessor { return null; } } - /** - * Generate enhanced search queries for better semantic matching + * Generate search queries to find relevant information for the user question * * @param userQuestion - The user's question - * @param llmService - The LLM service to use for generating queries, or null to auto-detect + * @param llmService - The LLM service to use for generating queries * @returns Array of search queries */ - async generateSearchQueries( - userQuestion: string, - llmService?: LLMServiceInterface - ): Promise { - if (!userQuestion || userQuestion.trim() === '') { - return []; // Return empty array for empty input - } - + async generateSearchQueries(userQuestion: string, llmService: any): Promise { try { - // Check cache - const cacheKey = `searchQueries:${userQuestion}`; - const cached = cacheManager.getQueryResults(cacheKey); - if (cached && Array.isArray(cached)) { - return cached; + // Check cache first + const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`); + + const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question. +Given the user's question, generate 3-5 specific search queries that would help find relevant information. +Each query should be focused on a different aspect of the question. +Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question. +Format your answer as a JSON array of strings, with each string being a search query. +Example: ["exact topic mentioned", "related concept 1", "related concept 2"]` + + interface Message { + role: 'user' | 'assistant' | 'system'; + content: string; } - // Get LLM service if not provided - const service = llmService || await this.getLLMService(); - if (!service) { - log.info(`No LLM service available for query enhancement, using original query`); - return [userQuestion]; - } - - // Prepare the prompt with JSON formatting instructions - const enhancedPrompt = `${this.enhancerPrompt} -IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements. -Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`; - - const messages = [ - { role: "system" as const, content: enhancedPrompt }, - { role: "user" as const, content: userQuestion } + const messages: Message[] = [ + { role: "system", content: PROMPT }, + { role: "user", content: userQuestion } ]; const options = { - temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR, - maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, - bypassFormatter: true, - expectsJsonResponse: true, - _bypassContextProcessing: true, // Prevent recursive calls - enableTools: false // Explicitly disable tools for this request + temperature: 0.3, + maxTokens: 300 }; // Get the response from the LLM - const response = await service.generateChatCompletion(messages, options); - const responseText = response.text; + const response = await llmService.generateChatCompletion(messages, options); + const responseText = response.text; // Extract the text from the response object - // Use the JsonExtractor to parse the response - const queries = JsonExtractor.extract(responseText, { - extractArrays: true, - minStringLength: 3, - applyFixes: true, - useFallbacks: true - }); + try { + // Remove code blocks, quotes, and clean up the response text + let jsonStr = responseText + .replace(/```(?:json)?|```/g, '') // Remove code block markers + .replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes + .trim(); - if (queries && queries.length > 0) { - log.info(`Extracted ${queries.length} queries using JsonExtractor`); - cacheManager.storeQueryResults(cacheKey, queries); - return queries; + // Check if the text might contain a JSON array (has square brackets) + if (jsonStr.includes('[') && jsonStr.includes(']')) { + // Extract just the array part if there's explanatory text + const arrayMatch = jsonStr.match(/\[[\s\S]*\]/); + if (arrayMatch) { + jsonStr = arrayMatch[0]; + } + + // Try to parse the JSON + try { + const queries = JSON.parse(jsonStr); + if (Array.isArray(queries) && queries.length > 0) { + const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean); + cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); + return result; + } + } catch (innerError) { + // If parsing fails, log it and continue to the fallback + log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`); + } + } + + // Fallback 1: Try to extract an array manually by splitting on commas between quotes + if (jsonStr.includes('[') && jsonStr.includes(']')) { + const arrayContent = jsonStr.substring( + jsonStr.indexOf('[') + 1, + jsonStr.lastIndexOf(']') + ); + + // Use regex to match quoted strings, handling escaped quotes + const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g); + if (stringMatches && stringMatches.length > 0) { + const result = stringMatches + .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes + .filter((s: string) => s.length > 0); + cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); + return result; + } + } + + // Fallback 2: Extract queries line by line + const lines = responseText.split('\n') + .map((line: string) => line.trim()) + .filter((line: string) => + line.length > 0 && + !line.startsWith('```') && + !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone + !line.match(/^\[|\]$/) // Skip lines that are just brackets + ); + + if (lines.length > 0) { + // Remove numbering, quotes and other list markers from each line + const result = lines.map((line: string) => { + return line + .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc) + .replace(/^[-*•]\s*/, '') // Remove bullet list markers + .replace(/^["']|["']$/g, '') // Remove surrounding quotes + .trim(); + }).filter((s: string) => s.length > 0); + + cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result); + return result; + } + } catch (parseError) { + log.error(`Error parsing search queries: ${parseError}`); } - // Fallback to original question + // If all else fails, just use the original question const fallback = [userQuestion]; - log.info(`No queries extracted, using fallback: "${userQuestion}"`); - cacheManager.storeQueryResults(cacheKey, fallback); + cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback); return fallback; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); log.error(`Error generating search queries: ${errorMessage}`); + // Fallback to just using the original question return [userQuestion]; } } @@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this }; } - // Assess query complexity - const complexity = this.assessQueryComplexity(query); - log.info(`Query complexity assessment: ${complexity}/10`); + // Simple assessment of query complexity + const complexity = query.length > 100 ? 5 : 3; - // Try to get LLM service if not provided + // Get LLM service if not provided const service = llmService || await this.getLLMService(); - // If no LLM service is available, use basic decomposition + // If no LLM service is available, use original query if (!service) { log.info(`No LLM service available for query decomposition, using original query`); - return this.createBasicDecomposition(query, complexity); + return { + originalQuery: query, + subQueries: [{ + id: this.generateSubQueryId(), + text: query, + reason: "Original query", + isAnswered: false + }], + status: 'pending', + complexity + }; } - // With LLM service available, always use advanced decomposition regardless of complexity - try { - log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`); - const enhancedSubQueries = await this.createLLMSubQueries(query, context, service); + // Make a simple request to decompose the query + const result = await this.simpleQueryDecomposition(query, service, context); - if (enhancedSubQueries && enhancedSubQueries.length > 0) { - log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`); - return { - originalQuery: query, - subQueries: enhancedSubQueries, - status: 'pending', - complexity - }; - } - } catch (error: any) { - log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`); - // Fall through to basic decomposition - } - - // Fallback to basic decomposition - return this.createBasicDecomposition(query, complexity); + // Return the result + return { + originalQuery: query, + subQueries: result, + status: 'pending', + complexity + }; } catch (error: any) { log.error(`Error decomposing query: ${error.message}`); @@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this } /** - * Create a basic decomposition of a query without using LLM + * Simple LLM-based query decomposition * - * @param query The original query - * @param complexity The assessed complexity - * @returns A basic decomposed query - */ - private createBasicDecomposition(query: string, complexity: number): DecomposedQuery { - log.info(`Using basic decomposition approach (complexity: ${complexity})`); - - const mainSubQuery = { - id: this.generateSubQueryId(), - text: query, - reason: "Direct question that can be answered without decomposition", - isAnswered: false - }; - - // Add a generic exploration query for context - const genericQuery = { - id: this.generateSubQueryId(), - text: `What information is related to ${query}?`, - reason: "General exploration to find related content", - isAnswered: false - }; - - return { - originalQuery: query, - subQueries: [mainSubQuery, genericQuery], - status: 'pending', - complexity - }; - } - - /** - * Use LLM to create advanced sub-queries from a complex query - * - * @param query The original complex query + * @param query The original query to decompose + * @param llmService LLM service to use * @param context Optional context to help with decomposition - * @param llmService LLM service to use for advanced decomposition * @returns Array of sub-queries */ - private async createLLMSubQueries( + private async simpleQueryDecomposition( query: string, - context?: string, - llmService?: LLMServiceInterface + llmService: LLMServiceInterface, + context?: string ): Promise { - // If no LLM service, use basic decomposition - if (!llmService) { - return this.createSubQueries(query, context); - } - try { - // Create a much better prompt for more effective query decomposition - const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information. + // Create a simple prompt for query decomposition + const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search. -Your task is to identify the main concepts and break them down into specific, targeted search queries. +Your goal is to help find comprehensive information by breaking down the query into multiple search terms. -DO NOT simply rephrase the original query or create a generic "what's related to X" pattern. -DO create specific queries that explore different aspects of the topic. +IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects. -For example: -If the query is "How does Docker compare to Kubernetes?", good sub-queries would be: -- "Docker container architecture and features" -- "Kubernetes container orchestration capabilities" -- "Docker vs Kubernetes performance comparison" -- "When to use Docker versus Kubernetes" +For example, if the query is "What are Docker containers?", good sub-queries would be: +1. "Docker container architecture and components" +2. "Docker vs virtual machines differences" +3. "Docker container use cases and benefits" +4. "Docker container deployment best practices" Format your response as a JSON array of objects with 'text' and 'reason' properties. Example: [ - {"text": "Docker container architecture", "reason": "Understanding Docker's core technology"}, - {"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"} + {"text": "Docker container architecture", "reason": "Understanding the technical structure"}, + {"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"}, + {"text": "Docker container benefits", "reason": "Understanding advantages and use cases"}, + {"text": "Docker deployment best practices", "reason": "Learning practical implementation"} ] ${context ? `\nContext: ${context}` : ''} Query: ${query}`; + log.info(`Sending decomposition prompt to LLM for query: "${query}"`); + const messages = [ { role: "system" as const, content: prompt } ]; const options = { - temperature: 0.7, // Higher temperature for more creative decomposition + temperature: 0.7, maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS, bypassFormatter: true, expectsJsonResponse: true, - _bypassContextProcessing: true, // Prevent recursive calls - enableTools: false // Explicitly disable tools for this request + _bypassContextProcessing: true, + enableTools: false }; // Get the response from the LLM const response = await llmService.generateChatCompletion(messages, options); const responseText = response.text; - // Try to extract structured sub-queries from the response + log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`); + + // Try to parse the response as JSON + let subQueries: SubQuery[] = []; try { - // Expected format is an array of objects with "text" and "reason" keys - interface RawSubQuery { - text: string; - reason?: string; - } - - // Log the response for debugging - log.info(`Received response from LLM for query decomposition, extracting JSON...`); - - log.info(`Response: ${responseText}`); - - // Extract JSON from the response - const extractedData = JsonExtractor.extract(responseText, { + // Extract the JSON from the response + const extractedJson = JsonExtractor.extract(responseText, { extractArrays: true, applyFixes: true, useFallbacks: true }); - // Validate the extracted data - if (!Array.isArray(extractedData)) { - log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`); - return this.createSubQueries(query, context); + log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`); + + if (Array.isArray(extractedJson) && extractedJson.length > 0) { + // Convert the extracted data to SubQuery objects + subQueries = extractedJson + .filter(item => item && typeof item === 'object' && item.text) + .map(item => ({ + id: this.generateSubQueryId(), + text: item.text, + reason: item.reason || "Sub-aspect of the main question", + isAnswered: false + })); + + log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`); + } else { + log.info(`Failed to extract array of sub-queries from LLM response`); } + } catch (error) { + log.error(`Error parsing LLM response: ${error}`); + } - if (extractedData.length === 0) { - log.error(`Extracted array is empty, falling back to basic decomposition`); - return this.createSubQueries(query, context); - } - - log.info(`Successfully extracted ${extractedData.length} items using regex pattern`); - - // Validate each sub-query to ensure it has a text property - const validSubQueries = extractedData.filter(item => { - if (!item || typeof item !== 'object') { - log.error(`Invalid sub-query item: ${JSON.stringify(item)}`); - return false; - } - - if (!item.text || typeof item.text !== 'string') { - log.error(`Sub-query missing text property: ${JSON.stringify(item)}`); - return false; - } - - return true; - }); - - if (validSubQueries.length === 0) { - log.error(`No valid sub-queries found after validation, falling back to basic decomposition`); - return this.createSubQueries(query, context); - } - - if (validSubQueries.length < extractedData.length) { - log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`); - } - - // Convert the raw data to SubQuery objects - let subQueries = validSubQueries.map(item => ({ + // Always include the original query + const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase()); + if (!hasOriginal) { + subQueries.push({ id: this.generateSubQueryId(), - text: item.text, - reason: item.reason || "Sub-aspect of the main question", + text: query, + reason: "Original query", isAnswered: false - })); - - // Make sure we have at least the original query - const hasOriginalQuery = subQueries.some(sq => { - // Check if either sq.text or query is null/undefined before using toLowerCase - if (!sq.text) return false; - const sqText = sq.text.toLowerCase(); - const originalQuery = query.toLowerCase(); - - return sqText.includes(originalQuery) || originalQuery.includes(sqText); }); + log.info(`Added original query to sub-queries list`); + } - if (!hasOriginalQuery) { - subQueries.unshift({ + // Ensure we have at least 3 queries for better search coverage + if (subQueries.length < 3) { + // Create some generic variants of the original query + const genericVariants = [ + { text: `${query} examples and use cases`, reason: "Practical applications" }, + { text: `${query} concepts and definitions`, reason: "Conceptual understanding" }, + { text: `${query} best practices`, reason: "Implementation guidance" } + ]; + + // Add variants until we have at least 3 queries + for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) { + subQueries.push({ id: this.generateSubQueryId(), - text: query, - reason: "Original query", + text: genericVariants[i].text, + reason: genericVariants[i].reason, isAnswered: false }); } - // Log the extracted sub-queries for debugging - log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`); - - return subQueries; - } catch (error: any) { - log.error(`Error extracting sub-queries from LLM response: ${error.message}`); - // Fall through to traditional decomposition + log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`); } - // Fallback to traditional decomposition - return this.createSubQueries(query, context); - } catch (error: any) { - log.error(`Error in createLLMSubQueries: ${error.message}`); - return this.createSubQueries(query, context); - } - } + log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`); + return subQueries; + } catch (error) { + log.error(`Error in simpleQueryDecomposition: ${error}`); - /** - * Create sub-queries from a complex query - * - * @param query The original complex query - * @param context Optional context to help with decomposition - * @returns Array of sub-queries - */ - private createSubQueries(query: string, context?: string): SubQuery[] { - // Analyze the query to identify potential aspects to explore - const questionParts = this.identifyQuestionParts(query); - const subQueries: SubQuery[] = []; - - // Add the main query as the first sub-query - subQueries.push({ - id: this.generateSubQueryId(), - text: query, - reason: "Main question (for direct matching)", - isAnswered: false - }); - - // Add sub-queries for each identified question part - for (const part of questionParts) { - subQueries.push({ - id: this.generateSubQueryId(), - text: part, - reason: "Sub-aspect of the main question", - isAnswered: false - }); - } - - // Add a generic exploration query to find related information - subQueries.push({ - id: this.generateSubQueryId(), - text: `What information is related to ${query}?`, - reason: "General exploration to find related content", - isAnswered: false - }); - - // If we have context, add a specific query for that context - if (context) { - subQueries.push({ - id: this.generateSubQueryId(), - text: `How does "${context}" relate to ${query}?`, - reason: "Contextual relationship exploration", - isAnswered: false - }); - } - - return subQueries; - } - - /** - * Identify parts of a complex question that could be individual sub-questions - * - * @param query The complex query to analyze - * @returns Array of potential sub-questions - */ - private identifyQuestionParts(query: string): string[] { - const parts: string[] = []; - - // Check for multiple question marks - const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?')); - if (questionSentences.length > 1) { - // Multiple explicit questions detected - return questionSentences.map(s => s.trim()); - } - - // Check for conjunctions that might separate multiple questions - const conjunctions = ['and', 'or', 'but', 'plus', 'also']; - for (const conjunction of conjunctions) { - const pattern = new RegExp(`\\b${conjunction}\\b`, 'i'); - if (pattern.test(query)) { - // Split by conjunction and check if each part could be a question - const splitParts = query.split(pattern); - for (const part of splitParts) { - const trimmed = part.trim(); - if (trimmed.length > 10) { // Avoid tiny fragments - parts.push(trimmed); - } + // Return the original query plus some variants as fallback + const fallbackQueries = [ + { + id: this.generateSubQueryId(), + text: query, + reason: "Original query", + isAnswered: false + }, + { + id: this.generateSubQueryId(), + text: `${query} overview`, + reason: "General information", + isAnswered: false + }, + { + id: this.generateSubQueryId(), + text: `${query} examples`, + reason: "Practical examples", + isAnswered: false } - if (parts.length > 0) { - return parts; - } - } + ]; + + log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`); + return fallbackQueries; } - - // Check for comparison indicators - const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs']; - for (const term of comparisonTerms) { - if (query.toLowerCase().includes(term)) { - // This is likely a comparison question, extract the items being compared - const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i')); - if (beforeAfter.length === 2) { - // Try to extract compared items - const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]); - if (aspects.length > 0) { - for (const aspect of aspects) { - parts.push(`What are the key points about ${aspect}?`); - } - parts.push(`What are the differences between ${aspects.join(' and ')}?`); - return parts; - } - } - } - } - - // Check for "multiple aspects" questions - const aspectPatterns = [ - /what (?:are|is) the (\w+) (?:of|about|for|in) /i, - /how (?:to|do|does|can) .+ (\w+)/i - ]; - - for (const pattern of aspectPatterns) { - const match = query.match(pattern); - if (match && match[1]) { - const aspect = match[1]; - parts.push(`What is the ${aspect}?`); - parts.push(`How does ${aspect} relate to the main topic?`); - } - } - - return parts; - } - - /** - * Extract items being compared from a comparison question - * - * @param before Text before the comparison term - * @param after Text after the comparison term - * @returns Array of items being compared - */ - private extractComparisonAspects(before: string, after: string): string[] { - const aspects: string[] = []; - - // Look for "between A and B" pattern - const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i); - if (betweenMatch) { - aspects.push(betweenMatch[1].trim()); - aspects.push(betweenMatch[2].trim()); - return aspects; - } - - // Look for A vs B pattern - const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i); - if (directComparison) { - aspects.push(directComparison[1].trim()); - aspects.push(directComparison[2].trim()); - return aspects; - } - - // Fall back to looking for named entities or key terms in both parts - const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || []; - const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || []; - - // Look for substantial terms (longer than 3 chars) - const candidateTerms = [...beforeTerms, ...afterTerms] - .filter(term => term.length > 3) - .map(term => term.trim()); - - // Take up to 2 distinct terms - return [...new Set(candidateTerms)].slice(0, 2); } /** diff --git a/src/services/llm/context_extractors/query_decomposition_tool.ts b/src/services/llm/context_extractors/query_decomposition_tool.ts index a213a090d..2e65ada76 100644 --- a/src/services/llm/context_extractors/query_decomposition_tool.ts +++ b/src/services/llm/context_extractors/query_decomposition_tool.ts @@ -1,17 +1,38 @@ /** - * Query Decomposition Tool - Compatibility Layer + * Query Decomposition Tool * - * This file provides backward compatibility with the new consolidated - * query_processor.js implementation. + * This tool helps the LLM agent break down complex user queries into + * sub-questions that can be answered individually and then synthesized + * into a comprehensive response. + * + * Features: + * - Analyze query complexity + * - Extract multiple intents from a single question + * - Create a multi-stage research plan + * - Track progress through complex information gathering */ import log from '../../log.js'; -import queryProcessor from '../context/services/query_processor.js'; -import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js'; +import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js'; -export type { SubQuery, DecomposedQuery }; +export interface SubQuery { + id: string; + text: string; + reason: string; + isAnswered: boolean; + answer?: string; +} + +export interface DecomposedQuery { + originalQuery: string; + subQueries: SubQuery[]; + status: 'pending' | 'in_progress' | 'completed'; + complexity: number; +} export class QueryDecompositionTool { + private static queryCounter: number = 0; + /** * Break down a complex query into smaller, more manageable sub-queries * @@ -20,54 +41,83 @@ export class QueryDecompositionTool { * @returns A decomposed query object with sub-queries */ decomposeQuery(query: string, context?: string): DecomposedQuery { - log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery'); + try { + // Log the decomposition attempt for tracking + log.info(`Decomposing query: "${query.substring(0, 100)}..."`); - // Since the main implementation is now async but we need to maintain a sync interface, - // we'll use a simpler approach that doesn't require LLM + if (!query || query.trim().length === 0) { + log.info("Query decomposition called with empty query"); + return { + originalQuery: query, + subQueries: [], + status: 'pending', + complexity: 0 + }; + } - // Get the complexity to determine approach - const complexity = queryProcessor.assessQueryComplexity(query); + // Assess query complexity to determine if decomposition is needed + const complexity = this.assessQueryComplexity(query); + log.info(`Query complexity assessment: ${complexity}/10`); + + // For simple queries, just return the original as a single sub-query + // Use a lower threshold (2 instead of 3) to decompose more queries + if (complexity < 2) { + log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`); + + const mainSubQuery = { + id: this.generateSubQueryId(), + text: query, + reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT, + isAnswered: false + }; + + // Still add a generic exploration query to get some related content + const genericQuery = { + id: this.generateSubQueryId(), + text: `Information related to ${query}`, + reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC, + isAnswered: false + }; + + return { + originalQuery: query, + subQueries: [mainSubQuery, genericQuery], + status: 'pending', + complexity + }; + } + + // For complex queries, perform decomposition + const subQueries = this.createSubQueries(query, context); + log.info(`Decomposed query into ${subQueries.length} sub-queries`); + + // Log the sub-queries for better visibility + subQueries.forEach((sq, index) => { + log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`); + }); - if (!query || query.trim().length === 0) { return { originalQuery: query, - subQueries: [], + subQueries, status: 'pending', - complexity: 0 + complexity + }; + } catch (error: any) { + log.error(`Error decomposing query: ${error.message}`); + + // Fallback to treating it as a simple query + return { + originalQuery: query, + subQueries: [{ + id: this.generateSubQueryId(), + text: query, + reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR, + isAnswered: false + }], + status: 'pending', + complexity: 1 }; } - - // Create a baseline decomposed query - let subQueries = []; - - // For compatibility, we'll use the basic SubQuery generation - // This avoids the async LLM call which would break the sync interface - const mainSubQuery = { - id: `sq_${Date.now()}_sync_0`, - text: query, - reason: "Main question (for direct matching)", - isAnswered: false - }; - - subQueries.push(mainSubQuery); - - // Add a generic exploration query for context - const genericQuery = { - id: `sq_${Date.now()}_sync_1`, - text: `What information is related to ${query}?`, - reason: "General exploration to find related content", - isAnswered: false - }; - - subQueries.push(genericQuery); - - // Simplified implementation that doesn't require async/LLM calls - return { - originalQuery: query, - subQueries: subQueries, - status: 'pending', - complexity - }; } /** @@ -83,8 +133,25 @@ export class QueryDecompositionTool { subQueryId: string, answer: string ): DecomposedQuery { - log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer'); - return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer); + const updatedSubQueries = decomposedQuery.subQueries.map(sq => { + if (sq.id === subQueryId) { + return { + ...sq, + answer, + isAnswered: true + }; + } + return sq; + }); + + // Check if all sub-queries are answered + const allAnswered = updatedSubQueries.every(sq => sq.isAnswered); + + return { + ...decomposedQuery, + subQueries: updatedSubQueries, + status: allAnswered ? 'completed' : 'in_progress' + }; } /** @@ -94,8 +161,40 @@ export class QueryDecompositionTool { * @returns A synthesized answer to the original query */ synthesizeAnswer(decomposedQuery: DecomposedQuery): string { - log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer'); - return queryProcessor.synthesizeAnswer(decomposedQuery); + try { + // Ensure all sub-queries are answered + if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) { + return "Cannot synthesize answer - not all sub-queries have been answered."; + } + + // For simple queries with just one sub-query, return the answer directly + if (decomposedQuery.subQueries.length === 1) { + return decomposedQuery.subQueries[0].answer || ""; + } + + // For complex queries, build a structured response that references each sub-answer + let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`; + + // Group by themes if there are many sub-queries + if (decomposedQuery.subQueries.length > 3) { + // Here we would ideally group related sub-queries, but for now we'll just present them in order + synthesized += "Based on the information gathered:\n\n"; + + for (const sq of decomposedQuery.subQueries) { + synthesized += `${sq.answer}\n\n`; + } + } else { + // For fewer sub-queries, present each one with its question + for (const sq of decomposedQuery.subQueries) { + synthesized += `${sq.answer}\n\n`; + } + } + + return synthesized.trim(); + } catch (error: any) { + log.error(`Error synthesizing answer: ${error.message}`); + return "Error synthesizing the final answer."; + } } /** @@ -105,10 +204,6 @@ export class QueryDecompositionTool { * @returns A status report string */ getQueryStatus(decomposedQuery: DecomposedQuery): string { - log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus'); - // This method doesn't exist directly in the new implementation - // We'll implement a simple fallback - const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length; const totalCount = decomposedQuery.subQueries.length; @@ -116,10 +211,9 @@ export class QueryDecompositionTool { for (const sq of decomposedQuery.subQueries) { status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`; - if (sq.isAnswered && sq.answer) { - status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`; + if (sq.isAnswered) { + status += ` Answer: ${this.truncateText(sq.answer || "", 100)}\n`; } - status += '\n'; } return status; @@ -127,15 +221,302 @@ export class QueryDecompositionTool { /** * Assess the complexity of a query on a scale of 1-10 + * This helps determine how many sub-queries are needed * * @param query The query to assess * @returns A complexity score from 1-10 */ assessQueryComplexity(query: string): number { - log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity'); - return queryProcessor.assessQueryComplexity(query); + // Count the number of question marks as a basic indicator + const questionMarkCount = (query.match(/\?/g) || []).length; + + // Count potential sub-questions based on question words + const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which']; + const questionWordMatches = questionWords.map(word => { + const regex = new RegExp(`\\b${word}\\b`, 'gi'); + return (query.match(regex) || []).length; + }); + + const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0); + + // Look for conjunctions which might join multiple questions + const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length; + + // Look for complex requirements + const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length; + const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length; + + // Calculate base complexity + let complexity = 1; + + // Add for multiple questions + complexity += Math.min(2, questionMarkCount); + + // Add for question words beyond the first one + complexity += Math.min(2, Math.max(0, questionWordCount - 1)); + + // Add for conjunctions that might join questions + complexity += Math.min(2, conjunctionCount); + + // Add for comparative/analytical requirements + complexity += Math.min(2, comparisonCount + analysisCount); + + // Add for overall length/complexity + if (query.length > 100) complexity += 1; + if (query.length > 200) complexity += 1; + + // Ensure we stay in the 1-10 range + return Math.max(1, Math.min(10, complexity)); + } + + /** + * Generate a unique ID for a sub-query + */ + generateSubQueryId(): string { + return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`; + } + + /** + * Create sub-queries based on the original query + */ + createSubQueries(query: string, context?: string): SubQuery[] { + // Simple rules to create sub-queries based on query content + const subQueries: SubQuery[] = []; + + // Avoid creating subqueries that start with "Provide details about" or similar + // as these have been causing recursive loops + if (query.toLowerCase().includes("provide details about") || + query.toLowerCase().includes("information related to")) { + log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`); + return [{ + id: this.generateSubQueryId(), + text: query, + reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS, + isAnswered: false + }]; + } + + // First, add the original query as a sub-query (always) + subQueries.push({ + id: this.generateSubQueryId(), + text: query, + reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY, + isAnswered: false + }); + + // Check for "compare", "difference", "versus" to identify comparison questions + if ( + query.toLowerCase().includes('compare') || + query.toLowerCase().includes('difference between') || + query.toLowerCase().includes(' vs ') || + query.toLowerCase().includes('versus') + ) { + // Extract entities to compare (simplified approach) + const entities = this.extractEntitiesForComparison(query); + + if (entities.length >= 2) { + // Add sub-queries for each entity + entities.forEach(entity => { + subQueries.push({ + id: this.generateSubQueryId(), + text: `What are the key characteristics of ${entity}?`, + reason: `Getting details about "${entity}" for comparison`, + isAnswered: false + }); + }); + + // Add explicit comparison sub-query + subQueries.push({ + id: this.generateSubQueryId(), + text: `How do ${entities.join(' and ')} compare in terms of their primary features?`, + reason: 'Direct comparison of the entities', + isAnswered: false + }); + } + } + // Check for "how to" questions + else if (query.toLowerCase().includes('how to ')) { + const topic = query.replace(/how to /i, '').trim(); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `What are the steps to ${topic}?`, + reason: 'Finding procedural information', + isAnswered: false + }); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `What are common challenges or pitfalls when trying to ${topic}?`, + reason: 'Identifying potential difficulties', + isAnswered: false + }); + } + // Check for "why" questions + else if (query.toLowerCase().startsWith('why ')) { + const topic = query.replace(/why /i, '').trim(); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `What are the causes of ${topic}?`, + reason: 'Identifying causes', + isAnswered: false + }); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `What evidence supports explanations for ${topic}?`, + reason: 'Finding supporting evidence', + isAnswered: false + }); + } + // Handle "what is" questions + else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) { + const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, ''); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `Definition of ${topic}`, + reason: 'Getting basic definition', + isAnswered: false + }); + + subQueries.push({ + id: this.generateSubQueryId(), + text: `Examples of ${topic}`, + reason: 'Finding examples', + isAnswered: false + }); + } + + // If no specific sub-queries were added (beyond the original), + // generate generic exploratory sub-queries + if (subQueries.length <= 1) { + // Extract main entities/concepts from the query + const concepts = this.extractMainConcepts(query); + + concepts.forEach(concept => { + // Don't create recursive or self-referential queries + if (!concept.toLowerCase().includes('provide details') && + !concept.toLowerCase().includes('information related')) { + subQueries.push({ + id: this.generateSubQueryId(), + text: `Key information about ${concept}`, + reason: `Finding information about "${concept}"`, + isAnswered: false + }); + } + }); + } + + return subQueries; + } + + /** + * Truncate text to a maximum length with ellipsis + */ + private truncateText(text: string, maxLength: number): string { + if (text.length <= maxLength) return text; + return text.substring(0, maxLength - 3) + '...'; + } + + /** + * Extract entities for comparison from a query + * + * @param query The query to extract entities from + * @returns Array of entity strings + */ + extractEntitiesForComparison(query: string): string[] { + // Try to match patterns like "compare X and Y" or "difference between X and Y" + const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i; + const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i; + + let match = query.match(comparePattern) || query.match(vsPattern); + + if (match) { + return [match[1].trim(), match[2].trim()]; + } + + // If no pattern match, try to extract noun phrases + const words = query.split(/\s+/); + const potentialEntities = []; + let currentPhrase = ''; + + for (const word of words) { + // Skip common words that are unlikely to be part of entity names + if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) { + if (currentPhrase.trim()) { + potentialEntities.push(currentPhrase.trim()); + currentPhrase = ''; + } + continue; + } + + currentPhrase += word + ' '; + } + + if (currentPhrase.trim()) { + potentialEntities.push(currentPhrase.trim()); + } + + return potentialEntities.slice(0, 2); // Return at most 2 entities + } + + /** + * Extract main concepts from a query + * + * @param query The query to extract concepts from + * @returns Array of concept strings + */ + extractMainConcepts(query: string): string[] { + // Remove question words and common stop words + const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' '); + + // Split into words and filter out short words + const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3); + + // Count word frequency + const wordCounts: Record = {}; + for (const word of words) { + wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1; + } + + // Sort by frequency + const sortedWords = Object.entries(wordCounts) + .sort((a, b) => b[1] - a[1]) + .map(entry => entry[0]); + + // Try to build meaningful phrases around top words + const conceptPhrases: string[] = []; + + if (sortedWords.length === 0) { + // Fallback if no significant words found + return [query.trim()]; + } + + // Use the top 2-3 words to form concepts + for (let i = 0; i < Math.min(sortedWords.length, 3); i++) { + const word = sortedWords[i]; + + // Try to find the word in the original query and extract a small phrase around it + const wordIndex = query.toLowerCase().indexOf(word); + if (wordIndex >= 0) { + // Extract a window of text around the word (3 words before and after) + const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1); + const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15)); + + if (end > start) { + conceptPhrases.push(query.substring(start, end).trim()); + } else { + conceptPhrases.push(word); + } + } else { + conceptPhrases.push(word); + } + } + + return conceptPhrases; } } -// Export default instance for compatibility -export default new QueryDecompositionTool(); +export default QueryDecompositionTool;