maybe

2025-10-27 18:22:45 +08:00 · 2025-04-17 18:19:40 +00:00 · 2025-04-17 18:19:40 +00:00 · daa56b10e8
commit daa56b10e8
parent d83cce88cb
2 changed files with 662 additions and 440 deletions
--- a/src/services/llm/context/services/query_processor.ts
+++ b/src/services/llm/context/services/query_processor.ts
@ -54,82 +54,126 @@ export class QueryProcessor {
            return null;
        }
    }
    /**
-     * Generate enhanced search queries for better semantic matching
+     * Generate search queries to find relevant information for the user question
     *
     * @param userQuestion - The user's question
-     * @param llmService - The LLM service to use for generating queries, or null to auto-detect
+     * @param llmService - The LLM service to use for generating queries
     * @returns Array of search queries
     */
-    async generateSearchQueries(
+    async generateSearchQueries(userQuestion: string, llmService: any): Promise<string[]> {
        userQuestion: string,
        llmService?: LLMServiceInterface
    ): Promise<string[]> {
        if (!userQuestion || userQuestion.trim() === '') {
            return []; // Return empty array for empty input
        }
        try {
-            // Check cache
+            // Check cache first
-            const cacheKey = `searchQueries:${userQuestion}`;
+            const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`);
-            const cached = cacheManager.getQueryResults<string[]>(cacheKey);
+
-            if (cached && Array.isArray(cached)) {
+            const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
-                return cached;
+Given the user's question, generate 3-5 specific search queries that would help find relevant information.
 Each query should be focused on a different aspect of the question.
 Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
 Format your answer as a JSON array of strings, with each string being a search query.
 Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
            interface Message {
                role: 'user' | 'assistant' | 'system';
                content: string;
            }
-            // Get LLM service if not provided
+            const messages: Message[] = [
-            const service = llmService || await this.getLLMService();
+                { role: "system", content: PROMPT },
-            if (!service) {
+                { role: "user", content: userQuestion }
                log.info(`No LLM service available for query enhancement, using original query`);
                return [userQuestion];
            }
            // Prepare the prompt with JSON formatting instructions
            const enhancedPrompt = `${this.enhancerPrompt}
 IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
 Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
            const messages = [
                { role: "system" as const, content: enhancedPrompt },
                { role: "user" as const, content: userQuestion }
            ];
            const options = {
-                temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR,
+                temperature: 0.3,
-                maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
+                maxTokens: 300
                bypassFormatter: true,
                expectsJsonResponse: true,
                _bypassContextProcessing: true, // Prevent recursive calls
                enableTools: false // Explicitly disable tools for this request
            };
            // Get the response from the LLM
-            const response = await service.generateChatCompletion(messages, options);
+            const response = await llmService.generateChatCompletion(messages, options);
-            const responseText = response.text;
+            const responseText = response.text; // Extract the text from the response object
-            // Use the JsonExtractor to parse the response
+            try {
-            const queries = JsonExtractor.extract<string[]>(responseText, {
+                // Remove code blocks, quotes, and clean up the response text
-                extractArrays: true,
+                let jsonStr = responseText
-                minStringLength: 3,
+                    .replace(/```(?:json)?|```/g, '') // Remove code block markers
-                applyFixes: true,
+                    .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
-                useFallbacks: true
+                    .trim();
            });
-            if (queries && queries.length > 0) {
+                // Check if the text might contain a JSON array (has square brackets)
-                log.info(`Extracted ${queries.length} queries using JsonExtractor`);
+                if (jsonStr.includes('[') && jsonStr.includes(']')) {
-                cacheManager.storeQueryResults(cacheKey, queries);
+                    // Extract just the array part if there's explanatory text
-                return queries;
+                    const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
                    if (arrayMatch) {
                        jsonStr = arrayMatch[0];
                    }
                    // Try to parse the JSON
                    try {
                        const queries = JSON.parse(jsonStr);
                        if (Array.isArray(queries) && queries.length > 0) {
                            const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
                            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                            return result;
                        }
                    } catch (innerError) {
                        // If parsing fails, log it and continue to the fallback
                        log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
                    }
                }
                // Fallback 1: Try to extract an array manually by splitting on commas between quotes
                if (jsonStr.includes('[') && jsonStr.includes(']')) {
                    const arrayContent = jsonStr.substring(
                        jsonStr.indexOf('[') + 1,
                        jsonStr.lastIndexOf(']')
                    );
                    // Use regex to match quoted strings, handling escaped quotes
                    const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
                    if (stringMatches && stringMatches.length > 0) {
                        const result = stringMatches
                            .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
                            .filter((s: string) => s.length > 0);
                        cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                        return result;
                    }
                }
                // Fallback 2: Extract queries line by line
                const lines = responseText.split('\n')
                    .map((line: string) => line.trim())
                    .filter((line: string) =>
                        line.length > 0 &&
                        !line.startsWith('```') &&
                        !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
                        !line.match(/^\[|\]$/) // Skip lines that are just brackets
                    );
                if (lines.length > 0) {
                    // Remove numbering, quotes and other list markers from each line
                    const result = lines.map((line: string) => {
                        return line
                            .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
                            .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
                            .replace(/^["']|["']$/g, '') // Remove surrounding quotes
                            .trim();
                    }).filter((s: string) => s.length > 0);
                    cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                    return result;
                }
            } catch (parseError) {
                log.error(`Error parsing search queries: ${parseError}`);
            }
-            // Fallback to original question
+            // If all else fails, just use the original question
            const fallback = [userQuestion];
-            log.info(`No queries extracted, using fallback: "${userQuestion}"`);
+            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
            cacheManager.storeQueryResults(cacheKey, fallback);
            return fallback;
        } catch (error: unknown) {
            const errorMessage = error instanceof Error ? error.message : String(error);
            log.error(`Error generating search queries: ${errorMessage}`);
            // Fallback to just using the original question
            return [userQuestion];
        }
    }
@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this
                };
            }
-            // Assess query complexity
+            // Simple assessment of query complexity
-            const complexity = this.assessQueryComplexity(query);
+            const complexity = query.length > 100 ? 5 : 3;
            log.info(`Query complexity assessment: ${complexity}/10`);
-            // Try to get LLM service if not provided
+            // Get LLM service if not provided
            const service = llmService || await this.getLLMService();
-            // If no LLM service is available, use basic decomposition
+            // If no LLM service is available, use original query
            if (!service) {
                log.info(`No LLM service available for query decomposition, using original query`);
-                return this.createBasicDecomposition(query, complexity);
+                return {
                    originalQuery: query,
                    subQueries: [{
                        id: this.generateSubQueryId(),
                        text: query,
                        reason: "Original query",
                        isAnswered: false
                    }],
                    status: 'pending',
                    complexity
                };
            }
-            // With LLM service available, always use advanced decomposition regardless of complexity
+            // Make a simple request to decompose the query
-            try {
+            const result = await this.simpleQueryDecomposition(query, service, context);
                log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`);
                const enhancedSubQueries = await this.createLLMSubQueries(query, context, service);
-                if (enhancedSubQueries && enhancedSubQueries.length > 0) {
+            // Return the result
-                    log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`);
+            return {
-                    return {
+                originalQuery: query,
-                        originalQuery: query,
+                subQueries: result,
-                        subQueries: enhancedSubQueries,
+                status: 'pending',
-                        status: 'pending',
+                complexity
-                        complexity
+            };
                    };
                }
            } catch (error: any) {
                log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`);
                // Fall through to basic decomposition
            }
            // Fallback to basic decomposition
            return this.createBasicDecomposition(query, complexity);
        } catch (error: any) {
            log.error(`Error decomposing query: ${error.message}`);
@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this
    }
    /**
-     * Create a basic decomposition of a query without using LLM
+     * Simple LLM-based query decomposition
     *
-     * @param query The original query
+     * @param query The original query to decompose
-     * @param complexity The assessed complexity
+     * @param llmService LLM service to use
     * @returns A basic decomposed query
     */
    private createBasicDecomposition(query: string, complexity: number): DecomposedQuery {
        log.info(`Using basic decomposition approach (complexity: ${complexity})`);
        const mainSubQuery = {
            id: this.generateSubQueryId(),
            text: query,
            reason: "Direct question that can be answered without decomposition",
            isAnswered: false
        };
        // Add a generic exploration query for context
        const genericQuery = {
            id: this.generateSubQueryId(),
            text: `What information is related to ${query}?`,
            reason: "General exploration to find related content",
            isAnswered: false
        };
        return {
            originalQuery: query,
            subQueries: [mainSubQuery, genericQuery],
            status: 'pending',
            complexity
        };
    }
    /**
     * Use LLM to create advanced sub-queries from a complex query
     *
     * @param query The original complex query
     * @param context Optional context to help with decomposition
     * @param llmService LLM service to use for advanced decomposition
     * @returns Array of sub-queries
     */
-    private async createLLMSubQueries(
+    private async simpleQueryDecomposition(
        query: string,
-        context?: string,
+        llmService: LLMServiceInterface,
-        llmService?: LLMServiceInterface
+        context?: string
    ): Promise<SubQuery[]> {
        // If no LLM service, use basic decomposition
        if (!llmService) {
            return this.createSubQueries(query, context);
        }
        try {
-            // Create a much better prompt for more effective query decomposition
+            // Create a simple prompt for query decomposition
-            const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information.
+            const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.
-Your task is to identify the main concepts and break them down into specific, targeted search queries.
+Your goal is to help find comprehensive information by breaking down the query into multiple search terms.
-DO NOT simply rephrase the original query or create a generic "what's related to X" pattern.
+IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.
 DO create specific queries that explore different aspects of the topic.
-For example:
+For example, if the query is "What are Docker containers?", good sub-queries would be:
-If the query is "How does Docker compare to Kubernetes?", good sub-queries would be:
+1. "Docker container architecture and components"
- "Docker container architecture and features"
+2. "Docker vs virtual machines differences"
- "Kubernetes container orchestration capabilities"
+3. "Docker container use cases and benefits"
- "Docker vs Kubernetes performance comparison"
+4. "Docker container deployment best practices"
 - "When to use Docker versus Kubernetes"
 Format your response as a JSON array of objects with 'text' and 'reason' properties.
 Example: [
-  {"text": "Docker container architecture", "reason": "Understanding Docker's core technology"},
+  {"text": "Docker container architecture", "reason": "Understanding the technical structure"},
-  {"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"}
+  {"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
  {"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
  {"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
 ]
 ${context ? `\nContext: ${context}` : ''}
 Query: ${query}`;
            log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
            const messages = [
                { role: "system" as const, content: prompt }
            ];
            const options = {
-                temperature: 0.7,  // Higher temperature for more creative decomposition
+                temperature: 0.7,
                maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
                bypassFormatter: true,
                expectsJsonResponse: true,
-                _bypassContextProcessing: true, // Prevent recursive calls
+                _bypassContextProcessing: true,
-                enableTools: false // Explicitly disable tools for this request
+                enableTools: false
            };
            // Get the response from the LLM
            const response = await llmService.generateChatCompletion(messages, options);
            const responseText = response.text;
-            // Try to extract structured sub-queries from the response
+            log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
            // Try to parse the response as JSON
            let subQueries: SubQuery[] = [];
            try {
-                // Expected format is an array of objects with "text" and "reason" keys
+                // Extract the JSON from the response
-                interface RawSubQuery {
+                const extractedJson = JsonExtractor.extract(responseText, {
                    text: string;
                    reason?: string;
                }
                // Log the response for debugging
                log.info(`Received response from LLM for query decomposition, extracting JSON...`);
                log.info(`Response: ${responseText}`);
                // Extract JSON from the response
                const extractedData = JsonExtractor.extract<RawSubQuery[]>(responseText, {
                    extractArrays: true,
                    applyFixes: true,
                    useFallbacks: true
                });
-                // Validate the extracted data
+                log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
-                if (!Array.isArray(extractedData)) {
+
-                    log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`);
+                if (Array.isArray(extractedJson) && extractedJson.length > 0) {
-                    return this.createSubQueries(query, context);
+                    // Convert the extracted data to SubQuery objects
                    subQueries = extractedJson
                        .filter(item => item && typeof item === 'object' && item.text)
                        .map(item => ({
                            id: this.generateSubQueryId(),
                            text: item.text,
                            reason: item.reason || "Sub-aspect of the main question",
                            isAnswered: false
                        }));
                    log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
                } else {
                    log.info(`Failed to extract array of sub-queries from LLM response`);
                }
            } catch (error) {
                log.error(`Error parsing LLM response: ${error}`);
            }
-                if (extractedData.length === 0) {
+            // Always include the original query
-                    log.error(`Extracted array is empty, falling back to basic decomposition`);
+            const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
-                    return this.createSubQueries(query, context);
+            if (!hasOriginal) {
-                }
+                subQueries.push({
                log.info(`Successfully extracted ${extractedData.length} items using regex pattern`);
                // Validate each sub-query to ensure it has a text property
                const validSubQueries = extractedData.filter(item => {
                    if (!item || typeof item !== 'object') {
                        log.error(`Invalid sub-query item: ${JSON.stringify(item)}`);
                        return false;
                    }
                    if (!item.text || typeof item.text !== 'string') {
                        log.error(`Sub-query missing text property: ${JSON.stringify(item)}`);
                        return false;
                    }
                    return true;
                });
                if (validSubQueries.length === 0) {
                    log.error(`No valid sub-queries found after validation, falling back to basic decomposition`);
                    return this.createSubQueries(query, context);
                }
                if (validSubQueries.length < extractedData.length) {
                    log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`);
                }
                // Convert the raw data to SubQuery objects
                let subQueries = validSubQueries.map(item => ({
                    id: this.generateSubQueryId(),
-                    text: item.text,
+                    text: query,
-                    reason: item.reason || "Sub-aspect of the main question",
+                    reason: "Original query",
                    isAnswered: false
                }));
                // Make sure we have at least the original query
                const hasOriginalQuery = subQueries.some(sq => {
                    // Check if either sq.text or query is null/undefined before using toLowerCase
                    if (!sq.text) return false;
                    const sqText = sq.text.toLowerCase();
                    const originalQuery = query.toLowerCase();
                    return sqText.includes(originalQuery) || originalQuery.includes(sqText);
                });
                log.info(`Added original query to sub-queries list`);
            }
-                if (!hasOriginalQuery) {
+            // Ensure we have at least 3 queries for better search coverage
-                    subQueries.unshift({
+            if (subQueries.length < 3) {
                // Create some generic variants of the original query
                const genericVariants = [
                    { text: `${query} examples and use cases`, reason: "Practical applications" },
                    { text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
                    { text: `${query} best practices`, reason: "Implementation guidance" }
                ];
                // Add variants until we have at least 3 queries
                for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
                    subQueries.push({
                        id: this.generateSubQueryId(),
-                        text: query,
+                        text: genericVariants[i].text,
-                        reason: "Original query",
+                        reason: genericVariants[i].reason,
                        isAnswered: false
                    });
                }
-                // Log the extracted sub-queries for debugging
+                log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
                log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`);
                return subQueries;
            } catch (error: any) {
                log.error(`Error extracting sub-queries from LLM response: ${error.message}`);
                // Fall through to traditional decomposition
            }
-            // Fallback to traditional decomposition
+            log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
-            return this.createSubQueries(query, context);
+            return subQueries;
-        } catch (error: any) {
+        } catch (error) {
-            log.error(`Error in createLLMSubQueries: ${error.message}`);
+            log.error(`Error in simpleQueryDecomposition: ${error}`);
            return this.createSubQueries(query, context);
        }
    }
-    /**
+            // Return the original query plus some variants as fallback
-     * Create sub-queries from a complex query
+            const fallbackQueries = [
-     *
+                {
-     * @param query The original complex query
+                    id: this.generateSubQueryId(),
-     * @param context Optional context to help with decomposition
+                    text: query,
-     * @returns Array of sub-queries
+                    reason: "Original query",
-     */
+                    isAnswered: false
-    private createSubQueries(query: string, context?: string): SubQuery[] {
+                },
-        // Analyze the query to identify potential aspects to explore
+                {
-        const questionParts = this.identifyQuestionParts(query);
+                    id: this.generateSubQueryId(),
-        const subQueries: SubQuery[] = [];
+                    text: `${query} overview`,
-
+                    reason: "General information",
-        // Add the main query as the first sub-query
+                    isAnswered: false
-        subQueries.push({
+                },
-            id: this.generateSubQueryId(),
+                {
-            text: query,
+                    id: this.generateSubQueryId(),
-            reason: "Main question (for direct matching)",
+                    text: `${query} examples`,
-            isAnswered: false
+                    reason: "Practical examples",
-        });
+                    isAnswered: false
        // Add sub-queries for each identified question part
        for (const part of questionParts) {
            subQueries.push({
                id: this.generateSubQueryId(),
                text: part,
                reason: "Sub-aspect of the main question",
                isAnswered: false
            });
        }
        // Add a generic exploration query to find related information
        subQueries.push({
            id: this.generateSubQueryId(),
            text: `What information is related to ${query}?`,
            reason: "General exploration to find related content",
            isAnswered: false
        });
        // If we have context, add a specific query for that context
        if (context) {
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `How does "${context}" relate to ${query}?`,
                reason: "Contextual relationship exploration",
                isAnswered: false
            });
        }
        return subQueries;
    }
    /**
     * Identify parts of a complex question that could be individual sub-questions
     *
     * @param query The complex query to analyze
     * @returns Array of potential sub-questions
     */
    private identifyQuestionParts(query: string): string[] {
        const parts: string[] = [];
        // Check for multiple question marks
        const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?'));
        if (questionSentences.length > 1) {
            // Multiple explicit questions detected
            return questionSentences.map(s => s.trim());
        }
        // Check for conjunctions that might separate multiple questions
        const conjunctions = ['and', 'or', 'but', 'plus', 'also'];
        for (const conjunction of conjunctions) {
            const pattern = new RegExp(`\\b${conjunction}\\b`, 'i');
            if (pattern.test(query)) {
                // Split by conjunction and check if each part could be a question
                const splitParts = query.split(pattern);
                for (const part of splitParts) {
                    const trimmed = part.trim();
                    if (trimmed.length > 10) { // Avoid tiny fragments
                        parts.push(trimmed);
                    }
                }
-                if (parts.length > 0) {
+            ];
-                    return parts;
+
-                }
+            log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
-            }
+            return fallbackQueries;
        }
        // Check for comparison indicators
        const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs'];
        for (const term of comparisonTerms) {
            if (query.toLowerCase().includes(term)) {
                // This is likely a comparison question, extract the items being compared
                const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i'));
                if (beforeAfter.length === 2) {
                    // Try to extract compared items
                    const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]);
                    if (aspects.length > 0) {
                        for (const aspect of aspects) {
                            parts.push(`What are the key points about ${aspect}?`);
                        }
                        parts.push(`What are the differences between ${aspects.join(' and ')}?`);
                        return parts;
                    }
                }
            }
        }
        // Check for "multiple aspects" questions
        const aspectPatterns = [
            /what (?:are|is) the (\w+) (?:of|about|for|in) /i,
            /how (?:to|do|does|can) .+ (\w+)/i
        ];
        for (const pattern of aspectPatterns) {
            const match = query.match(pattern);
            if (match && match[1]) {
                const aspect = match[1];
                parts.push(`What is the ${aspect}?`);
                parts.push(`How does ${aspect} relate to the main topic?`);
            }
        }
        return parts;
    }
    /**
     * Extract items being compared from a comparison question
     *
     * @param before Text before the comparison term
     * @param after Text after the comparison term
     * @returns Array of items being compared
     */
    private extractComparisonAspects(before: string, after: string): string[] {
        const aspects: string[] = [];
        // Look for "between A and B" pattern
        const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i);
        if (betweenMatch) {
            aspects.push(betweenMatch[1].trim());
            aspects.push(betweenMatch[2].trim());
            return aspects;
        }
        // Look for A vs B pattern
        const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i);
        if (directComparison) {
            aspects.push(directComparison[1].trim());
            aspects.push(directComparison[2].trim());
            return aspects;
        }
        // Fall back to looking for named entities or key terms in both parts
        const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || [];
        const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || [];
        // Look for substantial terms (longer than 3 chars)
        const candidateTerms = [...beforeTerms, ...afterTerms]
            .filter(term => term.length > 3)
            .map(term => term.trim());
        // Take up to 2 distinct terms
        return [...new Set(candidateTerms)].slice(0, 2);
    }
    /**
--- a/src/services/llm/context_extractors/query_decomposition_tool.ts
+++ b/src/services/llm/context_extractors/query_decomposition_tool.ts
@ -1,17 +1,38 @@
 /**
- * Query Decomposition Tool - Compatibility Layer
+ * Query Decomposition Tool
 *
- * This file provides backward compatibility with the new consolidated
+ * This tool helps the LLM agent break down complex user queries into
- * query_processor.js implementation.
+ * sub-questions that can be answered individually and then synthesized
 * into a comprehensive response.
 *
 * Features:
 * - Analyze query complexity
 * - Extract multiple intents from a single question
 * - Create a multi-stage research plan
 * - Track progress through complex information gathering
 */
 import log from '../../log.js';
-import queryProcessor from '../context/services/query_processor.js';
+import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';
 import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js';
-export type { SubQuery, DecomposedQuery };
+export interface SubQuery {
    id: string;
    text: string;
    reason: string;
    isAnswered: boolean;
    answer?: string;
 }
 export interface DecomposedQuery {
    originalQuery: string;
    subQueries: SubQuery[];
    status: 'pending' | 'in_progress' | 'completed';
    complexity: number;
 }
 export class QueryDecompositionTool {
    private static queryCounter: number = 0;
    /**
     * Break down a complex query into smaller, more manageable sub-queries
     *
@ -20,54 +41,83 @@ export class QueryDecompositionTool {
     * @returns A decomposed query object with sub-queries
     */
    decomposeQuery(query: string, context?: string): DecomposedQuery {
-        log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery');
+        try {
            // Log the decomposition attempt for tracking
            log.info(`Decomposing query: "${query.substring(0, 100)}..."`);
-        // Since the main implementation is now async but we need to maintain a sync interface,
+            if (!query || query.trim().length === 0) {
-        // we'll use a simpler approach that doesn't require LLM
+                log.info("Query decomposition called with empty query");
                return {
                    originalQuery: query,
                    subQueries: [],
                    status: 'pending',
                    complexity: 0
                };
            }
-        // Get the complexity to determine approach
+            // Assess query complexity to determine if decomposition is needed
-        const complexity = queryProcessor.assessQueryComplexity(query);
+            const complexity = this.assessQueryComplexity(query);
            log.info(`Query complexity assessment: ${complexity}/10`);
            // For simple queries, just return the original as a single sub-query
            // Use a lower threshold (2 instead of 3) to decompose more queries
            if (complexity < 2) {
                log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`);
                const mainSubQuery = {
                    id: this.generateSubQueryId(),
                    text: query,
                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
                    isAnswered: false
                };
                // Still add a generic exploration query to get some related content
                const genericQuery = {
                    id: this.generateSubQueryId(),
                    text: `Information related to ${query}`,
                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
                    isAnswered: false
                };
                return {
                    originalQuery: query,
                    subQueries: [mainSubQuery, genericQuery],
                    status: 'pending',
                    complexity
                };
            }
            // For complex queries, perform decomposition
            const subQueries = this.createSubQueries(query, context);
            log.info(`Decomposed query into ${subQueries.length} sub-queries`);
            // Log the sub-queries for better visibility
            subQueries.forEach((sq, index) => {
                log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`);
            });
        if (!query || query.trim().length === 0) {
            return {
                originalQuery: query,
-                subQueries: [],
+                subQueries,
                status: 'pending',
-                complexity: 0
+                complexity
            };
        } catch (error: any) {
            log.error(`Error decomposing query: ${error.message}`);
            // Fallback to treating it as a simple query
            return {
                originalQuery: query,
                subQueries: [{
                    id: this.generateSubQueryId(),
                    text: query,
                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
                    isAnswered: false
                }],
                status: 'pending',
                complexity: 1
            };
        }
        // Create a baseline decomposed query
        let subQueries = [];
        // For compatibility, we'll use the basic SubQuery generation
        // This avoids the async LLM call which would break the sync interface
        const mainSubQuery = {
            id: `sq_${Date.now()}_sync_0`,
            text: query,
            reason: "Main question (for direct matching)",
            isAnswered: false
        };
        subQueries.push(mainSubQuery);
        // Add a generic exploration query for context
        const genericQuery = {
            id: `sq_${Date.now()}_sync_1`,
            text: `What information is related to ${query}?`,
            reason: "General exploration to find related content",
            isAnswered: false
        };
        subQueries.push(genericQuery);
        // Simplified implementation that doesn't require async/LLM calls
        return {
            originalQuery: query,
            subQueries: subQueries,
            status: 'pending',
            complexity
        };
    }
    /**
@ -83,8 +133,25 @@ export class QueryDecompositionTool {
        subQueryId: string,
        answer: string
    ): DecomposedQuery {
-        log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer');
+        const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
-        return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer);
+            if (sq.id === subQueryId) {
                return {
                    ...sq,
                    answer,
                    isAnswered: true
                };
            }
            return sq;
        });
        // Check if all sub-queries are answered
        const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
        return {
            ...decomposedQuery,
            subQueries: updatedSubQueries,
            status: allAnswered ? 'completed' : 'in_progress'
        };
    }
    /**
@ -94,8 +161,40 @@ export class QueryDecompositionTool {
     * @returns A synthesized answer to the original query
     */
    synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
-        log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer');
+        try {
-        return queryProcessor.synthesizeAnswer(decomposedQuery);
+            // Ensure all sub-queries are answered
            if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
                return "Cannot synthesize answer - not all sub-queries have been answered.";
            }
            // For simple queries with just one sub-query, return the answer directly
            if (decomposedQuery.subQueries.length === 1) {
                return decomposedQuery.subQueries[0].answer || "";
            }
            // For complex queries, build a structured response that references each sub-answer
            let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`;
            // Group by themes if there are many sub-queries
            if (decomposedQuery.subQueries.length > 3) {
                // Here we would ideally group related sub-queries, but for now we'll just present them in order
                synthesized += "Based on the information gathered:\n\n";
                for (const sq of decomposedQuery.subQueries) {
                    synthesized += `${sq.answer}\n\n`;
                }
            } else {
                // For fewer sub-queries, present each one with its question
                for (const sq of decomposedQuery.subQueries) {
                    synthesized += `${sq.answer}\n\n`;
                }
            }
            return synthesized.trim();
        } catch (error: any) {
            log.error(`Error synthesizing answer: ${error.message}`);
            return "Error synthesizing the final answer.";
        }
    }
    /**
@ -105,10 +204,6 @@ export class QueryDecompositionTool {
     * @returns A status report string
     */
    getQueryStatus(decomposedQuery: DecomposedQuery): string {
        log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus');
        // This method doesn't exist directly in the new implementation
        // We'll implement a simple fallback
        const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
        const totalCount = decomposedQuery.subQueries.length;
@ -116,10 +211,9 @@ export class QueryDecompositionTool {
        for (const sq of decomposedQuery.subQueries) {
            status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
-            if (sq.isAnswered && sq.answer) {
+            if (sq.isAnswered) {
-                status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`;
+                status += `   Answer: ${this.truncateText(sq.answer || "", 100)}\n`;
            }
            status += '\n';
        }
        return status;
@ -127,15 +221,302 @@ export class QueryDecompositionTool {
    /**
     * Assess the complexity of a query on a scale of 1-10
     * This helps determine how many sub-queries are needed
     *
     * @param query The query to assess
     * @returns A complexity score from 1-10
     */
    assessQueryComplexity(query: string): number {
-        log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity');
+        // Count the number of question marks as a basic indicator
-        return queryProcessor.assessQueryComplexity(query);
+        const questionMarkCount = (query.match(/\?/g) || []).length;
        // Count potential sub-questions based on question words
        const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which'];
        const questionWordMatches = questionWords.map(word => {
            const regex = new RegExp(`\\b${word}\\b`, 'gi');
            return (query.match(regex) || []).length;
        });
        const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
        // Look for conjunctions which might join multiple questions
        const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length;
        // Look for complex requirements
        const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length;
        const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length;
        // Calculate base complexity
        let complexity = 1;
        // Add for multiple questions
        complexity += Math.min(2, questionMarkCount);
        // Add for question words beyond the first one
        complexity += Math.min(2, Math.max(0, questionWordCount - 1));
        // Add for conjunctions that might join questions
        complexity += Math.min(2, conjunctionCount);
        // Add for comparative/analytical requirements
        complexity += Math.min(2, comparisonCount + analysisCount);
        // Add for overall length/complexity
        if (query.length > 100) complexity += 1;
        if (query.length > 200) complexity += 1;
        // Ensure we stay in the 1-10 range
        return Math.max(1, Math.min(10, complexity));
    }
    /**
     * Generate a unique ID for a sub-query
     */
    generateSubQueryId(): string {
        return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
    }
    /**
     * Create sub-queries based on the original query
     */
    createSubQueries(query: string, context?: string): SubQuery[] {
        // Simple rules to create sub-queries based on query content
        const subQueries: SubQuery[] = [];
        // Avoid creating subqueries that start with "Provide details about" or similar
        // as these have been causing recursive loops
        if (query.toLowerCase().includes("provide details about") ||
            query.toLowerCase().includes("information related to")) {
            log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`);
            return [{
                id: this.generateSubQueryId(),
                text: query,
                reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
                isAnswered: false
            }];
        }
        // First, add the original query as a sub-query (always)
        subQueries.push({
            id: this.generateSubQueryId(),
            text: query,
            reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
            isAnswered: false
        });
        // Check for "compare", "difference", "versus" to identify comparison questions
        if (
            query.toLowerCase().includes('compare') ||
            query.toLowerCase().includes('difference between') ||
            query.toLowerCase().includes(' vs ') ||
            query.toLowerCase().includes('versus')
        ) {
            // Extract entities to compare (simplified approach)
            const entities = this.extractEntitiesForComparison(query);
            if (entities.length >= 2) {
                // Add sub-queries for each entity
                entities.forEach(entity => {
                    subQueries.push({
                        id: this.generateSubQueryId(),
                        text: `What are the key characteristics of ${entity}?`,
                        reason: `Getting details about "${entity}" for comparison`,
                        isAnswered: false
                    });
                });
                // Add explicit comparison sub-query
                subQueries.push({
                    id: this.generateSubQueryId(),
                    text: `How do ${entities.join(' and ')} compare in terms of their primary features?`,
                    reason: 'Direct comparison of the entities',
                    isAnswered: false
                });
            }
        }
        // Check for "how to" questions
        else if (query.toLowerCase().includes('how to ')) {
            const topic = query.replace(/how to /i, '').trim();
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `What are the steps to ${topic}?`,
                reason: 'Finding procedural information',
                isAnswered: false
            });
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `What are common challenges or pitfalls when trying to ${topic}?`,
                reason: 'Identifying potential difficulties',
                isAnswered: false
            });
        }
        // Check for "why" questions
        else if (query.toLowerCase().startsWith('why ')) {
            const topic = query.replace(/why /i, '').trim();
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `What are the causes of ${topic}?`,
                reason: 'Identifying causes',
                isAnswered: false
            });
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `What evidence supports explanations for ${topic}?`,
                reason: 'Finding supporting evidence',
                isAnswered: false
            });
        }
        // Handle "what is" questions
        else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) {
            const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `Definition of ${topic}`,
                reason: 'Getting basic definition',
                isAnswered: false
            });
            subQueries.push({
                id: this.generateSubQueryId(),
                text: `Examples of ${topic}`,
                reason: 'Finding examples',
                isAnswered: false
            });
        }
        // If no specific sub-queries were added (beyond the original),
        // generate generic exploratory sub-queries
        if (subQueries.length <= 1) {
            // Extract main entities/concepts from the query
            const concepts = this.extractMainConcepts(query);
            concepts.forEach(concept => {
                // Don't create recursive or self-referential queries
                if (!concept.toLowerCase().includes('provide details') &&
                    !concept.toLowerCase().includes('information related')) {
                    subQueries.push({
                        id: this.generateSubQueryId(),
                        text: `Key information about ${concept}`,
                        reason: `Finding information about "${concept}"`,
                        isAnswered: false
                    });
                }
            });
        }
        return subQueries;
    }
    /**
     * Truncate text to a maximum length with ellipsis
     */
    private truncateText(text: string, maxLength: number): string {
        if (text.length <= maxLength) return text;
        return text.substring(0, maxLength - 3) + '...';
    }
    /**
     * Extract entities for comparison from a query
     *
     * @param query The query to extract entities from
     * @returns Array of entity strings
     */
    extractEntitiesForComparison(query: string): string[] {
        // Try to match patterns like "compare X and Y" or "difference between X and Y"
        const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
        const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
        let match = query.match(comparePattern) || query.match(vsPattern);
        if (match) {
            return [match[1].trim(), match[2].trim()];
        }
        // If no pattern match, try to extract noun phrases
        const words = query.split(/\s+/);
        const potentialEntities = [];
        let currentPhrase = '';
        for (const word of words) {
            // Skip common words that are unlikely to be part of entity names
            if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) {
                if (currentPhrase.trim()) {
                    potentialEntities.push(currentPhrase.trim());
                    currentPhrase = '';
                }
                continue;
            }
            currentPhrase += word + ' ';
        }
        if (currentPhrase.trim()) {
            potentialEntities.push(currentPhrase.trim());
        }
        return potentialEntities.slice(0, 2); // Return at most 2 entities
    }
    /**
     * Extract main concepts from a query
     *
     * @param query The query to extract concepts from
     * @returns Array of concept strings
     */
    extractMainConcepts(query: string): string[] {
        // Remove question words and common stop words
        const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' ');
        // Split into words and filter out short words
        const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
        // Count word frequency
        const wordCounts: Record<string, number> = {};
        for (const word of words) {
            wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
        }
        // Sort by frequency
        const sortedWords = Object.entries(wordCounts)
            .sort((a, b) => b[1] - a[1])
            .map(entry => entry[0]);
        // Try to build meaningful phrases around top words
        const conceptPhrases: string[] = [];
        if (sortedWords.length === 0) {
            // Fallback if no significant words found
            return [query.trim()];
        }
        // Use the top 2-3 words to form concepts
        for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
            const word = sortedWords[i];
            // Try to find the word in the original query and extract a small phrase around it
            const wordIndex = query.toLowerCase().indexOf(word);
            if (wordIndex >= 0) {
                // Extract a window of text around the word (3 words before and after)
                const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
                const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
                if (end > start) {
                    conceptPhrases.push(query.substring(start, end).trim());
                } else {
                    conceptPhrases.push(word);
                }
            } else {
                conceptPhrases.push(word);
            }
        }
        return conceptPhrases;
    }
 }
-// Export default instance for compatibility
+export default QueryDecompositionTool;
 export default new QueryDecompositionTool();