maybe

2025-10-29 03:23:25 +08:00 · 2025-04-17 18:19:40 +00:00 · 2025-04-17 18:19:40 +00:00 · daa56b10e8
commit daa56b10e8
parent d83cce88cb
2 changed files with 662 additions and 440 deletions
--- a/src/services/llm/context/services/query_processor.ts
+++ b/src/services/llm/context/services/query_processor.ts
@ -54,82 +54,126 @@ export class QueryProcessor {
            return null;
        }
    }
-
    /**
-     * Generate enhanced search queries for better semantic matching
+     * Generate search queries to find relevant information for the user question
     *
     * @param userQuestion - The user's question
-     * @param llmService - The LLM service to use for generating queries, or null to auto-detect
+     * @param llmService - The LLM service to use for generating queries
     * @returns Array of search queries
     */
-    async generateSearchQueries(
-        userQuestion: string,
-        llmService?: LLMServiceInterface
-    ): Promise<string[]> {
-        if (!userQuestion || userQuestion.trim() === '') {
-            return []; // Return empty array for empty input
-        }
-
+    async generateSearchQueries(userQuestion: string, llmService: any): Promise<string[]> {
        try {
-            // Check cache
-            const cacheKey = `searchQueries:${userQuestion}`;
-            const cached = cacheManager.getQueryResults<string[]>(cacheKey);
-            if (cached && Array.isArray(cached)) {
-                return cached;
+            // Check cache first
+            const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`);
+
+            const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
+Given the user's question, generate 3-5 specific search queries that would help find relevant information.
+Each query should be focused on a different aspect of the question.
+Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
+Format your answer as a JSON array of strings, with each string being a search query.
+Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
+
+            interface Message {
+                role: 'user' | 'assistant' | 'system';
+                content: string;
            }

-            // Get LLM service if not provided
-            const service = llmService || await this.getLLMService();
-            if (!service) {
-                log.info(`No LLM service available for query enhancement, using original query`);
-                return [userQuestion];
-            }
-
-            // Prepare the prompt with JSON formatting instructions
-            const enhancedPrompt = `${this.enhancerPrompt}
-IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
-Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
-
-            const messages = [
-                { role: "system" as const, content: enhancedPrompt },
-                { role: "user" as const, content: userQuestion }
+            const messages: Message[] = [
+                { role: "system", content: PROMPT },
+                { role: "user", content: userQuestion }
            ];

            const options = {
-                temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR,
-                maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
-                bypassFormatter: true,
-                expectsJsonResponse: true,
-                _bypassContextProcessing: true, // Prevent recursive calls
-                enableTools: false // Explicitly disable tools for this request
+                temperature: 0.3,
+                maxTokens: 300
            };

            // Get the response from the LLM
-            const response = await service.generateChatCompletion(messages, options);
-            const responseText = response.text;
+            const response = await llmService.generateChatCompletion(messages, options);
+            const responseText = response.text; // Extract the text from the response object

-            // Use the JsonExtractor to parse the response
-            const queries = JsonExtractor.extract<string[]>(responseText, {
-                extractArrays: true,
-                minStringLength: 3,
-                applyFixes: true,
-                useFallbacks: true
-            });
+            try {
+                // Remove code blocks, quotes, and clean up the response text
+                let jsonStr = responseText
+                    .replace(/```(?:json)?|```/g, '') // Remove code block markers
+                    .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
+                    .trim();

-            if (queries && queries.length > 0) {
-                log.info(`Extracted ${queries.length} queries using JsonExtractor`);
-                cacheManager.storeQueryResults(cacheKey, queries);
-                return queries;
+                // Check if the text might contain a JSON array (has square brackets)
+                if (jsonStr.includes('[') && jsonStr.includes(']')) {
+                    // Extract just the array part if there's explanatory text
+                    const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
+                    if (arrayMatch) {
+                        jsonStr = arrayMatch[0];
+                    }
+
+                    // Try to parse the JSON
+                    try {
+                        const queries = JSON.parse(jsonStr);
+                        if (Array.isArray(queries) && queries.length > 0) {
+                            const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
+                            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
+                            return result;
+                        }
+                    } catch (innerError) {
+                        // If parsing fails, log it and continue to the fallback
+                        log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
+                    }
+                }
+
+                // Fallback 1: Try to extract an array manually by splitting on commas between quotes
+                if (jsonStr.includes('[') && jsonStr.includes(']')) {
+                    const arrayContent = jsonStr.substring(
+                        jsonStr.indexOf('[') + 1,
+                        jsonStr.lastIndexOf(']')
+                    );
+
+                    // Use regex to match quoted strings, handling escaped quotes
+                    const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
+                    if (stringMatches && stringMatches.length > 0) {
+                        const result = stringMatches
+                            .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
+                            .filter((s: string) => s.length > 0);
+                        cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
+                        return result;
+                    }
+                }
+
+                // Fallback 2: Extract queries line by line
+                const lines = responseText.split('\n')
+                    .map((line: string) => line.trim())
+                    .filter((line: string) =>
+                        line.length > 0 &&
+                        !line.startsWith('```') &&
+                        !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
+                        !line.match(/^\[|\]$/) // Skip lines that are just brackets
+                    );
+
+                if (lines.length > 0) {
+                    // Remove numbering, quotes and other list markers from each line
+                    const result = lines.map((line: string) => {
+                        return line
+                            .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
+                            .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
+                            .replace(/^["']|["']$/g, '') // Remove surrounding quotes
+                            .trim();
+                    }).filter((s: string) => s.length > 0);
+
+                    cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
+                    return result;
+                }
+            } catch (parseError) {
+                log.error(`Error parsing search queries: ${parseError}`);
            }

-            // Fallback to original question
+            // If all else fails, just use the original question
            const fallback = [userQuestion];
-            log.info(`No queries extracted, using fallback: "${userQuestion}"`);
-            cacheManager.storeQueryResults(cacheKey, fallback);
+            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
            return fallback;
        } catch (error: unknown) {
            const errorMessage = error instanceof Error ? error.message : String(error);
            log.error(`Error generating search queries: ${errorMessage}`);
+            // Fallback to just using the original question
            return [userQuestion];
        }
    }
@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this
                };
            }

-            // Assess query complexity
-            const complexity = this.assessQueryComplexity(query);
-            log.info(`Query complexity assessment: ${complexity}/10`);
+            // Simple assessment of query complexity
+            const complexity = query.length > 100 ? 5 : 3;

-            // Try to get LLM service if not provided
+            // Get LLM service if not provided
            const service = llmService || await this.getLLMService();

-            // If no LLM service is available, use basic decomposition
+            // If no LLM service is available, use original query
            if (!service) {
                log.info(`No LLM service available for query decomposition, using original query`);
-                return this.createBasicDecomposition(query, complexity);
+                return {
+                    originalQuery: query,
+                    subQueries: [{
+                        id: this.generateSubQueryId(),
+                        text: query,
+                        reason: "Original query",
+                        isAnswered: false
+                    }],
+                    status: 'pending',
+                    complexity
+                };
            }

-            // With LLM service available, always use advanced decomposition regardless of complexity
-            try {
-                log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`);
-                const enhancedSubQueries = await this.createLLMSubQueries(query, context, service);
+            // Make a simple request to decompose the query
+            const result = await this.simpleQueryDecomposition(query, service, context);

-                if (enhancedSubQueries && enhancedSubQueries.length > 0) {
-                    log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`);
-                    return {
-                        originalQuery: query,
-                        subQueries: enhancedSubQueries,
-                        status: 'pending',
-                        complexity
-                    };
-                }
-            } catch (error: any) {
-                log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`);
-                // Fall through to basic decomposition
-            }
-
-            // Fallback to basic decomposition
-            return this.createBasicDecomposition(query, complexity);
+            // Return the result
+            return {
+                originalQuery: query,
+                subQueries: result,
+                status: 'pending',
+                complexity
+            };
        } catch (error: any) {
            log.error(`Error decomposing query: ${error.message}`);

@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this
    }

    /**
-     * Create a basic decomposition of a query without using LLM
+     * Simple LLM-based query decomposition
     *
-     * @param query The original query
-     * @param complexity The assessed complexity
-     * @returns A basic decomposed query
-     */
-    private createBasicDecomposition(query: string, complexity: number): DecomposedQuery {
-        log.info(`Using basic decomposition approach (complexity: ${complexity})`);
-
-        const mainSubQuery = {
-            id: this.generateSubQueryId(),
-            text: query,
-            reason: "Direct question that can be answered without decomposition",
-            isAnswered: false
-        };
-
-        // Add a generic exploration query for context
-        const genericQuery = {
-            id: this.generateSubQueryId(),
-            text: `What information is related to ${query}?`,
-            reason: "General exploration to find related content",
-            isAnswered: false
-        };
-
-        return {
-            originalQuery: query,
-            subQueries: [mainSubQuery, genericQuery],
-            status: 'pending',
-            complexity
-        };
-    }
-
-    /**
-     * Use LLM to create advanced sub-queries from a complex query
-     *
-     * @param query The original complex query
+     * @param query The original query to decompose
+     * @param llmService LLM service to use
     * @param context Optional context to help with decomposition
-     * @param llmService LLM service to use for advanced decomposition
     * @returns Array of sub-queries
     */
-    private async createLLMSubQueries(
+    private async simpleQueryDecomposition(
        query: string,
-        context?: string,
-        llmService?: LLMServiceInterface
+        llmService: LLMServiceInterface,
+        context?: string
    ): Promise<SubQuery[]> {
-        // If no LLM service, use basic decomposition
-        if (!llmService) {
-            return this.createSubQueries(query, context);
-        }
-
        try {
-            // Create a much better prompt for more effective query decomposition
-            const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information.
+            // Create a simple prompt for query decomposition
+            const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.

-Your task is to identify the main concepts and break them down into specific, targeted search queries.
+Your goal is to help find comprehensive information by breaking down the query into multiple search terms.

-DO NOT simply rephrase the original query or create a generic "what's related to X" pattern.
-DO create specific queries that explore different aspects of the topic.
+IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.

-For example:
-If the query is "How does Docker compare to Kubernetes?", good sub-queries would be:
- "Docker container architecture and features"
- "Kubernetes container orchestration capabilities"
- "Docker vs Kubernetes performance comparison"
- "When to use Docker versus Kubernetes"
+For example, if the query is "What are Docker containers?", good sub-queries would be:
+1. "Docker container architecture and components"
+2. "Docker vs virtual machines differences"
+3. "Docker container use cases and benefits"
+4. "Docker container deployment best practices"

 Format your response as a JSON array of objects with 'text' and 'reason' properties.
 Example: [
-  {"text": "Docker container architecture", "reason": "Understanding Docker's core technology"},
-  {"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"}
+  {"text": "Docker container architecture", "reason": "Understanding the technical structure"},
+  {"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
+  {"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
+  {"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
 ]

 ${context ? `\nContext: ${context}` : ''}

 Query: ${query}`;

+            log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
+
            const messages = [
                { role: "system" as const, content: prompt }
            ];

            const options = {
-                temperature: 0.7,  // Higher temperature for more creative decomposition
+                temperature: 0.7,
                maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
                bypassFormatter: true,
                expectsJsonResponse: true,
-                _bypassContextProcessing: true, // Prevent recursive calls
-                enableTools: false // Explicitly disable tools for this request
+                _bypassContextProcessing: true,
+                enableTools: false
            };

            // Get the response from the LLM
            const response = await llmService.generateChatCompletion(messages, options);
            const responseText = response.text;

-            // Try to extract structured sub-queries from the response
+            log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
+
+            // Try to parse the response as JSON
+            let subQueries: SubQuery[] = [];
            try {
-                // Expected format is an array of objects with "text" and "reason" keys
-                interface RawSubQuery {
-                    text: string;
-                    reason?: string;
-                }
-
-                // Log the response for debugging
-                log.info(`Received response from LLM for query decomposition, extracting JSON...`);
-
-                log.info(`Response: ${responseText}`);
-
-                // Extract JSON from the response
-                const extractedData = JsonExtractor.extract<RawSubQuery[]>(responseText, {
+                // Extract the JSON from the response
+                const extractedJson = JsonExtractor.extract(responseText, {
                    extractArrays: true,
                    applyFixes: true,
                    useFallbacks: true
                });

-                // Validate the extracted data
-                if (!Array.isArray(extractedData)) {
-                    log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`);
-                    return this.createSubQueries(query, context);
+                log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
+
+                if (Array.isArray(extractedJson) && extractedJson.length > 0) {
+                    // Convert the extracted data to SubQuery objects
+                    subQueries = extractedJson
+                        .filter(item => item && typeof item === 'object' && item.text)
+                        .map(item => ({
+                            id: this.generateSubQueryId(),
+                            text: item.text,
+                            reason: item.reason || "Sub-aspect of the main question",
+                            isAnswered: false
+                        }));
+
+                    log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
+                } else {
+                    log.info(`Failed to extract array of sub-queries from LLM response`);
                }
+            } catch (error) {
+                log.error(`Error parsing LLM response: ${error}`);
+            }

-                if (extractedData.length === 0) {
-                    log.error(`Extracted array is empty, falling back to basic decomposition`);
-                    return this.createSubQueries(query, context);
-                }
-
-                log.info(`Successfully extracted ${extractedData.length} items using regex pattern`);
-
-                // Validate each sub-query to ensure it has a text property
-                const validSubQueries = extractedData.filter(item => {
-                    if (!item || typeof item !== 'object') {
-                        log.error(`Invalid sub-query item: ${JSON.stringify(item)}`);
-                        return false;
-                    }
-
-                    if (!item.text || typeof item.text !== 'string') {
-                        log.error(`Sub-query missing text property: ${JSON.stringify(item)}`);
-                        return false;
-                    }
-
-                    return true;
-                });
-
-                if (validSubQueries.length === 0) {
-                    log.error(`No valid sub-queries found after validation, falling back to basic decomposition`);
-                    return this.createSubQueries(query, context);
-                }
-
-                if (validSubQueries.length < extractedData.length) {
-                    log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`);
-                }
-
-                // Convert the raw data to SubQuery objects
-                let subQueries = validSubQueries.map(item => ({
+            // Always include the original query
+            const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
+            if (!hasOriginal) {
+                subQueries.push({
                    id: this.generateSubQueryId(),
-                    text: item.text,
-                    reason: item.reason || "Sub-aspect of the main question",
+                    text: query,
+                    reason: "Original query",
                    isAnswered: false
-                }));
-
-                // Make sure we have at least the original query
-                const hasOriginalQuery = subQueries.some(sq => {
-                    // Check if either sq.text or query is null/undefined before using toLowerCase
-                    if (!sq.text) return false;
-                    const sqText = sq.text.toLowerCase();
-                    const originalQuery = query.toLowerCase();
-
-                    return sqText.includes(originalQuery) || originalQuery.includes(sqText);
                });
+                log.info(`Added original query to sub-queries list`);
+            }

-                if (!hasOriginalQuery) {
-                    subQueries.unshift({
+            // Ensure we have at least 3 queries for better search coverage
+            if (subQueries.length < 3) {
+                // Create some generic variants of the original query
+                const genericVariants = [
+                    { text: `${query} examples and use cases`, reason: "Practical applications" },
+                    { text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
+                    { text: `${query} best practices`, reason: "Implementation guidance" }
+                ];
+
+                // Add variants until we have at least 3 queries
+                for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
+                    subQueries.push({
                        id: this.generateSubQueryId(),
-                        text: query,
-                        reason: "Original query",
+                        text: genericVariants[i].text,
+                        reason: genericVariants[i].reason,
                        isAnswered: false
                    });
                }

-                // Log the extracted sub-queries for debugging
-                log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`);
-
-                return subQueries;
-            } catch (error: any) {
-                log.error(`Error extracting sub-queries from LLM response: ${error.message}`);
-                // Fall through to traditional decomposition
+                log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
            }

-            // Fallback to traditional decomposition
-            return this.createSubQueries(query, context);
-        } catch (error: any) {
-            log.error(`Error in createLLMSubQueries: ${error.message}`);
-            return this.createSubQueries(query, context);
-        }
-    }
+            log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
+            return subQueries;
+        } catch (error) {
+            log.error(`Error in simpleQueryDecomposition: ${error}`);

-    /**
-     * Create sub-queries from a complex query
-     *
-     * @param query The original complex query
-     * @param context Optional context to help with decomposition
-     * @returns Array of sub-queries
-     */
-    private createSubQueries(query: string, context?: string): SubQuery[] {
-        // Analyze the query to identify potential aspects to explore
-        const questionParts = this.identifyQuestionParts(query);
-        const subQueries: SubQuery[] = [];
-
-        // Add the main query as the first sub-query
-        subQueries.push({
-            id: this.generateSubQueryId(),
-            text: query,
-            reason: "Main question (for direct matching)",
-            isAnswered: false
-        });
-
-        // Add sub-queries for each identified question part
-        for (const part of questionParts) {
-            subQueries.push({
-                id: this.generateSubQueryId(),
-                text: part,
-                reason: "Sub-aspect of the main question",
-                isAnswered: false
-            });
-        }
-
-        // Add a generic exploration query to find related information
-        subQueries.push({
-            id: this.generateSubQueryId(),
-            text: `What information is related to ${query}?`,
-            reason: "General exploration to find related content",
-            isAnswered: false
-        });
-
-        // If we have context, add a specific query for that context
-        if (context) {
-            subQueries.push({
-                id: this.generateSubQueryId(),
-                text: `How does "${context}" relate to ${query}?`,
-                reason: "Contextual relationship exploration",
-                isAnswered: false
-            });
-        }
-
-        return subQueries;
-    }
-
-    /**
-     * Identify parts of a complex question that could be individual sub-questions
-     *
-     * @param query The complex query to analyze
-     * @returns Array of potential sub-questions
-     */
-    private identifyQuestionParts(query: string): string[] {
-        const parts: string[] = [];
-
-        // Check for multiple question marks
-        const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?'));
-        if (questionSentences.length > 1) {
-            // Multiple explicit questions detected
-            return questionSentences.map(s => s.trim());
-        }
-
-        // Check for conjunctions that might separate multiple questions
-        const conjunctions = ['and', 'or', 'but', 'plus', 'also'];
-        for (const conjunction of conjunctions) {
-            const pattern = new RegExp(`\\b${conjunction}\\b`, 'i');
-            if (pattern.test(query)) {
-                // Split by conjunction and check if each part could be a question
-                const splitParts = query.split(pattern);
-                for (const part of splitParts) {
-                    const trimmed = part.trim();
-                    if (trimmed.length > 10) { // Avoid tiny fragments
-                        parts.push(trimmed);
-                    }
+            // Return the original query plus some variants as fallback
+            const fallbackQueries = [
+                {
+                    id: this.generateSubQueryId(),
+                    text: query,
+                    reason: "Original query",
+                    isAnswered: false
+                },
+                {
+                    id: this.generateSubQueryId(),
+                    text: `${query} overview`,
+                    reason: "General information",
+                    isAnswered: false
+                },
+                {
+                    id: this.generateSubQueryId(),
+                    text: `${query} examples`,
+                    reason: "Practical examples",
+                    isAnswered: false
                }
-                if (parts.length > 0) {
-                    return parts;
-                }
-            }
+            ];
+
+            log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
+            return fallbackQueries;
        }
-
-        // Check for comparison indicators
-        const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs'];
-        for (const term of comparisonTerms) {
-            if (query.toLowerCase().includes(term)) {
-                // This is likely a comparison question, extract the items being compared
-                const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i'));
-                if (beforeAfter.length === 2) {
-                    // Try to extract compared items
-                    const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]);
-                    if (aspects.length > 0) {
-                        for (const aspect of aspects) {
-                            parts.push(`What are the key points about ${aspect}?`);
-                        }
-                        parts.push(`What are the differences between ${aspects.join(' and ')}?`);
-                        return parts;
-                    }
-                }
-            }
-        }
-
-        // Check for "multiple aspects" questions
-        const aspectPatterns = [
-            /what (?:are|is) the (\w+) (?:of|about|for|in) /i,
-            /how (?:to|do|does|can) .+ (\w+)/i
-        ];
-
-        for (const pattern of aspectPatterns) {
-            const match = query.match(pattern);
-            if (match && match[1]) {
-                const aspect = match[1];
-                parts.push(`What is the ${aspect}?`);
-                parts.push(`How does ${aspect} relate to the main topic?`);
-            }
-        }
-
-        return parts;
-    }
-
-    /**
-     * Extract items being compared from a comparison question
-     *
-     * @param before Text before the comparison term
-     * @param after Text after the comparison term
-     * @returns Array of items being compared
-     */
-    private extractComparisonAspects(before: string, after: string): string[] {
-        const aspects: string[] = [];
-
-        // Look for "between A and B" pattern
-        const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i);
-        if (betweenMatch) {
-            aspects.push(betweenMatch[1].trim());
-            aspects.push(betweenMatch[2].trim());
-            return aspects;
-        }
-
-        // Look for A vs B pattern
-        const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i);
-        if (directComparison) {
-            aspects.push(directComparison[1].trim());
-            aspects.push(directComparison[2].trim());
-            return aspects;
-        }
-
-        // Fall back to looking for named entities or key terms in both parts
-        const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || [];
-        const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || [];
-
-        // Look for substantial terms (longer than 3 chars)
-        const candidateTerms = [...beforeTerms, ...afterTerms]
-            .filter(term => term.length > 3)
-            .map(term => term.trim());
-
-        // Take up to 2 distinct terms
-        return [...new Set(candidateTerms)].slice(0, 2);
    }

    /**
--- a/src/services/llm/context_extractors/query_decomposition_tool.ts
+++ b/src/services/llm/context_extractors/query_decomposition_tool.ts
@ -1,17 +1,38 @@
 /**
- * Query Decomposition Tool - Compatibility Layer
+ * Query Decomposition Tool
 *
- * This file provides backward compatibility with the new consolidated
- * query_processor.js implementation.
+ * This tool helps the LLM agent break down complex user queries into
+ * sub-questions that can be answered individually and then synthesized
+ * into a comprehensive response.
+ *
+ * Features:
+ * - Analyze query complexity
+ * - Extract multiple intents from a single question
+ * - Create a multi-stage research plan
+ * - Track progress through complex information gathering
 */

 import log from '../../log.js';
-import queryProcessor from '../context/services/query_processor.js';
-import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js';
+import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';

-export type { SubQuery, DecomposedQuery };
+export interface SubQuery {
+    id: string;
+    text: string;
+    reason: string;
+    isAnswered: boolean;
+    answer?: string;
+}
+
+export interface DecomposedQuery {
+    originalQuery: string;
+    subQueries: SubQuery[];
+    status: 'pending' | 'in_progress' | 'completed';
+    complexity: number;
+}

 export class QueryDecompositionTool {
+    private static queryCounter: number = 0;
+
    /**
     * Break down a complex query into smaller, more manageable sub-queries
     *
@ -20,54 +41,83 @@ export class QueryDecompositionTool {
     * @returns A decomposed query object with sub-queries
     */
    decomposeQuery(query: string, context?: string): DecomposedQuery {
-        log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery');
+        try {
+            // Log the decomposition attempt for tracking
+            log.info(`Decomposing query: "${query.substring(0, 100)}..."`);

-        // Since the main implementation is now async but we need to maintain a sync interface,
-        // we'll use a simpler approach that doesn't require LLM
+            if (!query || query.trim().length === 0) {
+                log.info("Query decomposition called with empty query");
+                return {
+                    originalQuery: query,
+                    subQueries: [],
+                    status: 'pending',
+                    complexity: 0
+                };
+            }

-        // Get the complexity to determine approach
-        const complexity = queryProcessor.assessQueryComplexity(query);
+            // Assess query complexity to determine if decomposition is needed
+            const complexity = this.assessQueryComplexity(query);
+            log.info(`Query complexity assessment: ${complexity}/10`);
+
+            // For simple queries, just return the original as a single sub-query
+            // Use a lower threshold (2 instead of 3) to decompose more queries
+            if (complexity < 2) {
+                log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`);
+
+                const mainSubQuery = {
+                    id: this.generateSubQueryId(),
+                    text: query,
+                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
+                    isAnswered: false
+                };
+
+                // Still add a generic exploration query to get some related content
+                const genericQuery = {
+                    id: this.generateSubQueryId(),
+                    text: `Information related to ${query}`,
+                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
+                    isAnswered: false
+                };
+
+                return {
+                    originalQuery: query,
+                    subQueries: [mainSubQuery, genericQuery],
+                    status: 'pending',
+                    complexity
+                };
+            }
+
+            // For complex queries, perform decomposition
+            const subQueries = this.createSubQueries(query, context);
+            log.info(`Decomposed query into ${subQueries.length} sub-queries`);
+
+            // Log the sub-queries for better visibility
+            subQueries.forEach((sq, index) => {
+                log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`);
+            });

-        if (!query || query.trim().length === 0) {
            return {
                originalQuery: query,
-                subQueries: [],
+                subQueries,
                status: 'pending',
-                complexity: 0
+                complexity
+            };
+        } catch (error: any) {
+            log.error(`Error decomposing query: ${error.message}`);
+
+            // Fallback to treating it as a simple query
+            return {
+                originalQuery: query,
+                subQueries: [{
+                    id: this.generateSubQueryId(),
+                    text: query,
+                    reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
+                    isAnswered: false
+                }],
+                status: 'pending',
+                complexity: 1
            };
        }
-
-        // Create a baseline decomposed query
-        let subQueries = [];
-
-        // For compatibility, we'll use the basic SubQuery generation
-        // This avoids the async LLM call which would break the sync interface
-        const mainSubQuery = {
-            id: `sq_${Date.now()}_sync_0`,
-            text: query,
-            reason: "Main question (for direct matching)",
-            isAnswered: false
-        };
-
-        subQueries.push(mainSubQuery);
-
-        // Add a generic exploration query for context
-        const genericQuery = {
-            id: `sq_${Date.now()}_sync_1`,
-            text: `What information is related to ${query}?`,
-            reason: "General exploration to find related content",
-            isAnswered: false
-        };
-
-        subQueries.push(genericQuery);
-
-        // Simplified implementation that doesn't require async/LLM calls
-        return {
-            originalQuery: query,
-            subQueries: subQueries,
-            status: 'pending',
-            complexity
-        };
    }

    /**
@ -83,8 +133,25 @@ export class QueryDecompositionTool {
        subQueryId: string,
        answer: string
    ): DecomposedQuery {
-        log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer');
-        return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer);
+        const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
+            if (sq.id === subQueryId) {
+                return {
+                    ...sq,
+                    answer,
+                    isAnswered: true
+                };
+            }
+            return sq;
+        });
+
+        // Check if all sub-queries are answered
+        const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
+
+        return {
+            ...decomposedQuery,
+            subQueries: updatedSubQueries,
+            status: allAnswered ? 'completed' : 'in_progress'
+        };
    }

    /**
@ -94,8 +161,40 @@ export class QueryDecompositionTool {
     * @returns A synthesized answer to the original query
     */
    synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
-        log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer');
-        return queryProcessor.synthesizeAnswer(decomposedQuery);
+        try {
+            // Ensure all sub-queries are answered
+            if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
+                return "Cannot synthesize answer - not all sub-queries have been answered.";
+            }
+
+            // For simple queries with just one sub-query, return the answer directly
+            if (decomposedQuery.subQueries.length === 1) {
+                return decomposedQuery.subQueries[0].answer || "";
+            }
+
+            // For complex queries, build a structured response that references each sub-answer
+            let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`;
+
+            // Group by themes if there are many sub-queries
+            if (decomposedQuery.subQueries.length > 3) {
+                // Here we would ideally group related sub-queries, but for now we'll just present them in order
+                synthesized += "Based on the information gathered:\n\n";
+
+                for (const sq of decomposedQuery.subQueries) {
+                    synthesized += `${sq.answer}\n\n`;
+                }
+            } else {
+                // For fewer sub-queries, present each one with its question
+                for (const sq of decomposedQuery.subQueries) {
+                    synthesized += `${sq.answer}\n\n`;
+                }
+            }
+
+            return synthesized.trim();
+        } catch (error: any) {
+            log.error(`Error synthesizing answer: ${error.message}`);
+            return "Error synthesizing the final answer.";
+        }
    }

    /**
@ -105,10 +204,6 @@ export class QueryDecompositionTool {
     * @returns A status report string
     */
    getQueryStatus(decomposedQuery: DecomposedQuery): string {
-        log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus');
-        // This method doesn't exist directly in the new implementation
-        // We'll implement a simple fallback
-
        const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
        const totalCount = decomposedQuery.subQueries.length;

@ -116,10 +211,9 @@ export class QueryDecompositionTool {

        for (const sq of decomposedQuery.subQueries) {
            status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
-            if (sq.isAnswered && sq.answer) {
-                status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`;
+            if (sq.isAnswered) {
+                status += `   Answer: ${this.truncateText(sq.answer || "", 100)}\n`;
            }
-            status += '\n';
        }

        return status;
@ -127,15 +221,302 @@ export class QueryDecompositionTool {

    /**
     * Assess the complexity of a query on a scale of 1-10
+     * This helps determine how many sub-queries are needed
     *
     * @param query The query to assess
     * @returns A complexity score from 1-10
     */
    assessQueryComplexity(query: string): number {
-        log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity');
-        return queryProcessor.assessQueryComplexity(query);
+        // Count the number of question marks as a basic indicator
+        const questionMarkCount = (query.match(/\?/g) || []).length;
+
+        // Count potential sub-questions based on question words
+        const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which'];
+        const questionWordMatches = questionWords.map(word => {
+            const regex = new RegExp(`\\b${word}\\b`, 'gi');
+            return (query.match(regex) || []).length;
+        });
+
+        const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
+
+        // Look for conjunctions which might join multiple questions
+        const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length;
+
+        // Look for complex requirements
+        const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length;
+        const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length;
+
+        // Calculate base complexity
+        let complexity = 1;
+
+        // Add for multiple questions
+        complexity += Math.min(2, questionMarkCount);
+
+        // Add for question words beyond the first one
+        complexity += Math.min(2, Math.max(0, questionWordCount - 1));
+
+        // Add for conjunctions that might join questions
+        complexity += Math.min(2, conjunctionCount);
+
+        // Add for comparative/analytical requirements
+        complexity += Math.min(2, comparisonCount + analysisCount);
+
+        // Add for overall length/complexity
+        if (query.length > 100) complexity += 1;
+        if (query.length > 200) complexity += 1;
+
+        // Ensure we stay in the 1-10 range
+        return Math.max(1, Math.min(10, complexity));
+    }
+
+    /**
+     * Generate a unique ID for a sub-query
+     */
+    generateSubQueryId(): string {
+        return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
+    }
+
+    /**
+     * Create sub-queries based on the original query
+     */
+    createSubQueries(query: string, context?: string): SubQuery[] {
+        // Simple rules to create sub-queries based on query content
+        const subQueries: SubQuery[] = [];
+
+        // Avoid creating subqueries that start with "Provide details about" or similar
+        // as these have been causing recursive loops
+        if (query.toLowerCase().includes("provide details about") ||
+            query.toLowerCase().includes("information related to")) {
+            log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`);
+            return [{
+                id: this.generateSubQueryId(),
+                text: query,
+                reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
+                isAnswered: false
+            }];
+        }
+
+        // First, add the original query as a sub-query (always)
+        subQueries.push({
+            id: this.generateSubQueryId(),
+            text: query,
+            reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
+            isAnswered: false
+        });
+
+        // Check for "compare", "difference", "versus" to identify comparison questions
+        if (
+            query.toLowerCase().includes('compare') ||
+            query.toLowerCase().includes('difference between') ||
+            query.toLowerCase().includes(' vs ') ||
+            query.toLowerCase().includes('versus')
+        ) {
+            // Extract entities to compare (simplified approach)
+            const entities = this.extractEntitiesForComparison(query);
+
+            if (entities.length >= 2) {
+                // Add sub-queries for each entity
+                entities.forEach(entity => {
+                    subQueries.push({
+                        id: this.generateSubQueryId(),
+                        text: `What are the key characteristics of ${entity}?`,
+                        reason: `Getting details about "${entity}" for comparison`,
+                        isAnswered: false
+                    });
+                });
+
+                // Add explicit comparison sub-query
+                subQueries.push({
+                    id: this.generateSubQueryId(),
+                    text: `How do ${entities.join(' and ')} compare in terms of their primary features?`,
+                    reason: 'Direct comparison of the entities',
+                    isAnswered: false
+                });
+            }
+        }
+        // Check for "how to" questions
+        else if (query.toLowerCase().includes('how to ')) {
+            const topic = query.replace(/how to /i, '').trim();
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `What are the steps to ${topic}?`,
+                reason: 'Finding procedural information',
+                isAnswered: false
+            });
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `What are common challenges or pitfalls when trying to ${topic}?`,
+                reason: 'Identifying potential difficulties',
+                isAnswered: false
+            });
+        }
+        // Check for "why" questions
+        else if (query.toLowerCase().startsWith('why ')) {
+            const topic = query.replace(/why /i, '').trim();
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `What are the causes of ${topic}?`,
+                reason: 'Identifying causes',
+                isAnswered: false
+            });
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `What evidence supports explanations for ${topic}?`,
+                reason: 'Finding supporting evidence',
+                isAnswered: false
+            });
+        }
+        // Handle "what is" questions
+        else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) {
+            const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `Definition of ${topic}`,
+                reason: 'Getting basic definition',
+                isAnswered: false
+            });
+
+            subQueries.push({
+                id: this.generateSubQueryId(),
+                text: `Examples of ${topic}`,
+                reason: 'Finding examples',
+                isAnswered: false
+            });
+        }
+
+        // If no specific sub-queries were added (beyond the original),
+        // generate generic exploratory sub-queries
+        if (subQueries.length <= 1) {
+            // Extract main entities/concepts from the query
+            const concepts = this.extractMainConcepts(query);
+
+            concepts.forEach(concept => {
+                // Don't create recursive or self-referential queries
+                if (!concept.toLowerCase().includes('provide details') &&
+                    !concept.toLowerCase().includes('information related')) {
+                    subQueries.push({
+                        id: this.generateSubQueryId(),
+                        text: `Key information about ${concept}`,
+                        reason: `Finding information about "${concept}"`,
+                        isAnswered: false
+                    });
+                }
+            });
+        }
+
+        return subQueries;
+    }
+
+    /**
+     * Truncate text to a maximum length with ellipsis
+     */
+    private truncateText(text: string, maxLength: number): string {
+        if (text.length <= maxLength) return text;
+        return text.substring(0, maxLength - 3) + '...';
+    }
+
+    /**
+     * Extract entities for comparison from a query
+     *
+     * @param query The query to extract entities from
+     * @returns Array of entity strings
+     */
+    extractEntitiesForComparison(query: string): string[] {
+        // Try to match patterns like "compare X and Y" or "difference between X and Y"
+        const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
+        const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
+
+        let match = query.match(comparePattern) || query.match(vsPattern);
+
+        if (match) {
+            return [match[1].trim(), match[2].trim()];
+        }
+
+        // If no pattern match, try to extract noun phrases
+        const words = query.split(/\s+/);
+        const potentialEntities = [];
+        let currentPhrase = '';
+
+        for (const word of words) {
+            // Skip common words that are unlikely to be part of entity names
+            if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) {
+                if (currentPhrase.trim()) {
+                    potentialEntities.push(currentPhrase.trim());
+                    currentPhrase = '';
+                }
+                continue;
+            }
+
+            currentPhrase += word + ' ';
+        }
+
+        if (currentPhrase.trim()) {
+            potentialEntities.push(currentPhrase.trim());
+        }
+
+        return potentialEntities.slice(0, 2); // Return at most 2 entities
+    }
+
+    /**
+     * Extract main concepts from a query
+     *
+     * @param query The query to extract concepts from
+     * @returns Array of concept strings
+     */
+    extractMainConcepts(query: string): string[] {
+        // Remove question words and common stop words
+        const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' ');
+
+        // Split into words and filter out short words
+        const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
+
+        // Count word frequency
+        const wordCounts: Record<string, number> = {};
+        for (const word of words) {
+            wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
+        }
+
+        // Sort by frequency
+        const sortedWords = Object.entries(wordCounts)
+            .sort((a, b) => b[1] - a[1])
+            .map(entry => entry[0]);
+
+        // Try to build meaningful phrases around top words
+        const conceptPhrases: string[] = [];
+
+        if (sortedWords.length === 0) {
+            // Fallback if no significant words found
+            return [query.trim()];
+        }
+
+        // Use the top 2-3 words to form concepts
+        for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
+            const word = sortedWords[i];
+
+            // Try to find the word in the original query and extract a small phrase around it
+            const wordIndex = query.toLowerCase().indexOf(word);
+            if (wordIndex >= 0) {
+                // Extract a window of text around the word (3 words before and after)
+                const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
+                const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
+
+                if (end > start) {
+                    conceptPhrases.push(query.substring(start, end).trim());
+                } else {
+                    conceptPhrases.push(word);
+                }
+            } else {
+                conceptPhrases.push(word);
+            }
+        }
+
+        return conceptPhrases;
    }
 }

-// Export default instance for compatibility
-export default new QueryDecompositionTool();
+export default QueryDecompositionTool;