This commit is contained in:
perf3ct 2025-04-17 18:19:40 +00:00
parent d83cce88cb
commit daa56b10e8
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
2 changed files with 662 additions and 440 deletions

View File

@ -54,82 +54,126 @@ export class QueryProcessor {
return null;
}
}
/**
* Generate enhanced search queries for better semantic matching
* Generate search queries to find relevant information for the user question
*
* @param userQuestion - The user's question
* @param llmService - The LLM service to use for generating queries, or null to auto-detect
* @param llmService - The LLM service to use for generating queries
* @returns Array of search queries
*/
async generateSearchQueries(
userQuestion: string,
llmService?: LLMServiceInterface
): Promise<string[]> {
if (!userQuestion || userQuestion.trim() === '') {
return []; // Return empty array for empty input
}
async generateSearchQueries(userQuestion: string, llmService: any): Promise<string[]> {
try {
// Check cache
const cacheKey = `searchQueries:${userQuestion}`;
const cached = cacheManager.getQueryResults<string[]>(cacheKey);
if (cached && Array.isArray(cached)) {
return cached;
// Check cache first
const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`);
const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
Given the user's question, generate 3-5 specific search queries that would help find relevant information.
Each query should be focused on a different aspect of the question.
Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
Format your answer as a JSON array of strings, with each string being a search query.
Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
interface Message {
role: 'user' | 'assistant' | 'system';
content: string;
}
// Get LLM service if not provided
const service = llmService || await this.getLLMService();
if (!service) {
log.info(`No LLM service available for query enhancement, using original query`);
return [userQuestion];
}
// Prepare the prompt with JSON formatting instructions
const enhancedPrompt = `${this.enhancerPrompt}
IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
const messages = [
{ role: "system" as const, content: enhancedPrompt },
{ role: "user" as const, content: userQuestion }
const messages: Message[] = [
{ role: "system", content: PROMPT },
{ role: "user", content: userQuestion }
];
const options = {
temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR,
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
bypassFormatter: true,
expectsJsonResponse: true,
_bypassContextProcessing: true, // Prevent recursive calls
enableTools: false // Explicitly disable tools for this request
temperature: 0.3,
maxTokens: 300
};
// Get the response from the LLM
const response = await service.generateChatCompletion(messages, options);
const responseText = response.text;
const response = await llmService.generateChatCompletion(messages, options);
const responseText = response.text; // Extract the text from the response object
// Use the JsonExtractor to parse the response
const queries = JsonExtractor.extract<string[]>(responseText, {
extractArrays: true,
minStringLength: 3,
applyFixes: true,
useFallbacks: true
});
try {
// Remove code blocks, quotes, and clean up the response text
let jsonStr = responseText
.replace(/```(?:json)?|```/g, '') // Remove code block markers
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
.trim();
if (queries && queries.length > 0) {
log.info(`Extracted ${queries.length} queries using JsonExtractor`);
cacheManager.storeQueryResults(cacheKey, queries);
return queries;
// Check if the text might contain a JSON array (has square brackets)
if (jsonStr.includes('[') && jsonStr.includes(']')) {
// Extract just the array part if there's explanatory text
const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
if (arrayMatch) {
jsonStr = arrayMatch[0];
}
// Try to parse the JSON
try {
const queries = JSON.parse(jsonStr);
if (Array.isArray(queries) && queries.length > 0) {
const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
return result;
}
} catch (innerError) {
// If parsing fails, log it and continue to the fallback
log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
}
}
// Fallback 1: Try to extract an array manually by splitting on commas between quotes
if (jsonStr.includes('[') && jsonStr.includes(']')) {
const arrayContent = jsonStr.substring(
jsonStr.indexOf('[') + 1,
jsonStr.lastIndexOf(']')
);
// Use regex to match quoted strings, handling escaped quotes
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
if (stringMatches && stringMatches.length > 0) {
const result = stringMatches
.map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
.filter((s: string) => s.length > 0);
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
return result;
}
}
// Fallback 2: Extract queries line by line
const lines = responseText.split('\n')
.map((line: string) => line.trim())
.filter((line: string) =>
line.length > 0 &&
!line.startsWith('```') &&
!line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
!line.match(/^\[|\]$/) // Skip lines that are just brackets
);
if (lines.length > 0) {
// Remove numbering, quotes and other list markers from each line
const result = lines.map((line: string) => {
return line
.replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
.replace(/^[-*•]\s*/, '') // Remove bullet list markers
.replace(/^["']|["']$/g, '') // Remove surrounding quotes
.trim();
}).filter((s: string) => s.length > 0);
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
return result;
}
} catch (parseError) {
log.error(`Error parsing search queries: ${parseError}`);
}
// Fallback to original question
// If all else fails, just use the original question
const fallback = [userQuestion];
log.info(`No queries extracted, using fallback: "${userQuestion}"`);
cacheManager.storeQueryResults(cacheKey, fallback);
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
return fallback;
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
log.error(`Error generating search queries: ${errorMessage}`);
// Fallback to just using the original question
return [userQuestion];
}
}
@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this
};
}
// Assess query complexity
const complexity = this.assessQueryComplexity(query);
log.info(`Query complexity assessment: ${complexity}/10`);
// Simple assessment of query complexity
const complexity = query.length > 100 ? 5 : 3;
// Try to get LLM service if not provided
// Get LLM service if not provided
const service = llmService || await this.getLLMService();
// If no LLM service is available, use basic decomposition
// If no LLM service is available, use original query
if (!service) {
log.info(`No LLM service available for query decomposition, using original query`);
return this.createBasicDecomposition(query, complexity);
return {
originalQuery: query,
subQueries: [{
id: this.generateSubQueryId(),
text: query,
reason: "Original query",
isAnswered: false
}],
status: 'pending',
complexity
};
}
// With LLM service available, always use advanced decomposition regardless of complexity
try {
log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`);
const enhancedSubQueries = await this.createLLMSubQueries(query, context, service);
// Make a simple request to decompose the query
const result = await this.simpleQueryDecomposition(query, service, context);
if (enhancedSubQueries && enhancedSubQueries.length > 0) {
log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`);
return {
originalQuery: query,
subQueries: enhancedSubQueries,
status: 'pending',
complexity
};
}
} catch (error: any) {
log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`);
// Fall through to basic decomposition
}
// Fallback to basic decomposition
return this.createBasicDecomposition(query, complexity);
// Return the result
return {
originalQuery: query,
subQueries: result,
status: 'pending',
complexity
};
} catch (error: any) {
log.error(`Error decomposing query: ${error.message}`);
@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this
}
/**
* Create a basic decomposition of a query without using LLM
* Simple LLM-based query decomposition
*
* @param query The original query
* @param complexity The assessed complexity
* @returns A basic decomposed query
*/
private createBasicDecomposition(query: string, complexity: number): DecomposedQuery {
log.info(`Using basic decomposition approach (complexity: ${complexity})`);
const mainSubQuery = {
id: this.generateSubQueryId(),
text: query,
reason: "Direct question that can be answered without decomposition",
isAnswered: false
};
// Add a generic exploration query for context
const genericQuery = {
id: this.generateSubQueryId(),
text: `What information is related to ${query}?`,
reason: "General exploration to find related content",
isAnswered: false
};
return {
originalQuery: query,
subQueries: [mainSubQuery, genericQuery],
status: 'pending',
complexity
};
}
/**
* Use LLM to create advanced sub-queries from a complex query
*
* @param query The original complex query
* @param query The original query to decompose
* @param llmService LLM service to use
* @param context Optional context to help with decomposition
* @param llmService LLM service to use for advanced decomposition
* @returns Array of sub-queries
*/
private async createLLMSubQueries(
private async simpleQueryDecomposition(
query: string,
context?: string,
llmService?: LLMServiceInterface
llmService: LLMServiceInterface,
context?: string
): Promise<SubQuery[]> {
// If no LLM service, use basic decomposition
if (!llmService) {
return this.createSubQueries(query, context);
}
try {
// Create a much better prompt for more effective query decomposition
const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information.
// Create a simple prompt for query decomposition
const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.
Your task is to identify the main concepts and break them down into specific, targeted search queries.
Your goal is to help find comprehensive information by breaking down the query into multiple search terms.
DO NOT simply rephrase the original query or create a generic "what's related to X" pattern.
DO create specific queries that explore different aspects of the topic.
IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.
For example:
If the query is "How does Docker compare to Kubernetes?", good sub-queries would be:
- "Docker container architecture and features"
- "Kubernetes container orchestration capabilities"
- "Docker vs Kubernetes performance comparison"
- "When to use Docker versus Kubernetes"
For example, if the query is "What are Docker containers?", good sub-queries would be:
1. "Docker container architecture and components"
2. "Docker vs virtual machines differences"
3. "Docker container use cases and benefits"
4. "Docker container deployment best practices"
Format your response as a JSON array of objects with 'text' and 'reason' properties.
Example: [
{"text": "Docker container architecture", "reason": "Understanding Docker's core technology"},
{"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"}
{"text": "Docker container architecture", "reason": "Understanding the technical structure"},
{"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
{"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
{"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
]
${context ? `\nContext: ${context}` : ''}
Query: ${query}`;
log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
const messages = [
{ role: "system" as const, content: prompt }
];
const options = {
temperature: 0.7, // Higher temperature for more creative decomposition
temperature: 0.7,
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
bypassFormatter: true,
expectsJsonResponse: true,
_bypassContextProcessing: true, // Prevent recursive calls
enableTools: false // Explicitly disable tools for this request
_bypassContextProcessing: true,
enableTools: false
};
// Get the response from the LLM
const response = await llmService.generateChatCompletion(messages, options);
const responseText = response.text;
// Try to extract structured sub-queries from the response
log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
// Try to parse the response as JSON
let subQueries: SubQuery[] = [];
try {
// Expected format is an array of objects with "text" and "reason" keys
interface RawSubQuery {
text: string;
reason?: string;
}
// Log the response for debugging
log.info(`Received response from LLM for query decomposition, extracting JSON...`);
log.info(`Response: ${responseText}`);
// Extract JSON from the response
const extractedData = JsonExtractor.extract<RawSubQuery[]>(responseText, {
// Extract the JSON from the response
const extractedJson = JsonExtractor.extract(responseText, {
extractArrays: true,
applyFixes: true,
useFallbacks: true
});
// Validate the extracted data
if (!Array.isArray(extractedData)) {
log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`);
return this.createSubQueries(query, context);
log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
if (Array.isArray(extractedJson) && extractedJson.length > 0) {
// Convert the extracted data to SubQuery objects
subQueries = extractedJson
.filter(item => item && typeof item === 'object' && item.text)
.map(item => ({
id: this.generateSubQueryId(),
text: item.text,
reason: item.reason || "Sub-aspect of the main question",
isAnswered: false
}));
log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
} else {
log.info(`Failed to extract array of sub-queries from LLM response`);
}
} catch (error) {
log.error(`Error parsing LLM response: ${error}`);
}
if (extractedData.length === 0) {
log.error(`Extracted array is empty, falling back to basic decomposition`);
return this.createSubQueries(query, context);
}
log.info(`Successfully extracted ${extractedData.length} items using regex pattern`);
// Validate each sub-query to ensure it has a text property
const validSubQueries = extractedData.filter(item => {
if (!item || typeof item !== 'object') {
log.error(`Invalid sub-query item: ${JSON.stringify(item)}`);
return false;
}
if (!item.text || typeof item.text !== 'string') {
log.error(`Sub-query missing text property: ${JSON.stringify(item)}`);
return false;
}
return true;
});
if (validSubQueries.length === 0) {
log.error(`No valid sub-queries found after validation, falling back to basic decomposition`);
return this.createSubQueries(query, context);
}
if (validSubQueries.length < extractedData.length) {
log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`);
}
// Convert the raw data to SubQuery objects
let subQueries = validSubQueries.map(item => ({
// Always include the original query
const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
if (!hasOriginal) {
subQueries.push({
id: this.generateSubQueryId(),
text: item.text,
reason: item.reason || "Sub-aspect of the main question",
text: query,
reason: "Original query",
isAnswered: false
}));
// Make sure we have at least the original query
const hasOriginalQuery = subQueries.some(sq => {
// Check if either sq.text or query is null/undefined before using toLowerCase
if (!sq.text) return false;
const sqText = sq.text.toLowerCase();
const originalQuery = query.toLowerCase();
return sqText.includes(originalQuery) || originalQuery.includes(sqText);
});
log.info(`Added original query to sub-queries list`);
}
if (!hasOriginalQuery) {
subQueries.unshift({
// Ensure we have at least 3 queries for better search coverage
if (subQueries.length < 3) {
// Create some generic variants of the original query
const genericVariants = [
{ text: `${query} examples and use cases`, reason: "Practical applications" },
{ text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
{ text: `${query} best practices`, reason: "Implementation guidance" }
];
// Add variants until we have at least 3 queries
for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
subQueries.push({
id: this.generateSubQueryId(),
text: query,
reason: "Original query",
text: genericVariants[i].text,
reason: genericVariants[i].reason,
isAnswered: false
});
}
// Log the extracted sub-queries for debugging
log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`);
return subQueries;
} catch (error: any) {
log.error(`Error extracting sub-queries from LLM response: ${error.message}`);
// Fall through to traditional decomposition
log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
}
// Fallback to traditional decomposition
return this.createSubQueries(query, context);
} catch (error: any) {
log.error(`Error in createLLMSubQueries: ${error.message}`);
return this.createSubQueries(query, context);
}
}
log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
return subQueries;
} catch (error) {
log.error(`Error in simpleQueryDecomposition: ${error}`);
/**
* Create sub-queries from a complex query
*
* @param query The original complex query
* @param context Optional context to help with decomposition
* @returns Array of sub-queries
*/
private createSubQueries(query: string, context?: string): SubQuery[] {
// Analyze the query to identify potential aspects to explore
const questionParts = this.identifyQuestionParts(query);
const subQueries: SubQuery[] = [];
// Add the main query as the first sub-query
subQueries.push({
id: this.generateSubQueryId(),
text: query,
reason: "Main question (for direct matching)",
isAnswered: false
});
// Add sub-queries for each identified question part
for (const part of questionParts) {
subQueries.push({
id: this.generateSubQueryId(),
text: part,
reason: "Sub-aspect of the main question",
isAnswered: false
});
}
// Add a generic exploration query to find related information
subQueries.push({
id: this.generateSubQueryId(),
text: `What information is related to ${query}?`,
reason: "General exploration to find related content",
isAnswered: false
});
// If we have context, add a specific query for that context
if (context) {
subQueries.push({
id: this.generateSubQueryId(),
text: `How does "${context}" relate to ${query}?`,
reason: "Contextual relationship exploration",
isAnswered: false
});
}
return subQueries;
}
/**
* Identify parts of a complex question that could be individual sub-questions
*
* @param query The complex query to analyze
* @returns Array of potential sub-questions
*/
private identifyQuestionParts(query: string): string[] {
const parts: string[] = [];
// Check for multiple question marks
const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?'));
if (questionSentences.length > 1) {
// Multiple explicit questions detected
return questionSentences.map(s => s.trim());
}
// Check for conjunctions that might separate multiple questions
const conjunctions = ['and', 'or', 'but', 'plus', 'also'];
for (const conjunction of conjunctions) {
const pattern = new RegExp(`\\b${conjunction}\\b`, 'i');
if (pattern.test(query)) {
// Split by conjunction and check if each part could be a question
const splitParts = query.split(pattern);
for (const part of splitParts) {
const trimmed = part.trim();
if (trimmed.length > 10) { // Avoid tiny fragments
parts.push(trimmed);
}
// Return the original query plus some variants as fallback
const fallbackQueries = [
{
id: this.generateSubQueryId(),
text: query,
reason: "Original query",
isAnswered: false
},
{
id: this.generateSubQueryId(),
text: `${query} overview`,
reason: "General information",
isAnswered: false
},
{
id: this.generateSubQueryId(),
text: `${query} examples`,
reason: "Practical examples",
isAnswered: false
}
if (parts.length > 0) {
return parts;
}
}
];
log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
return fallbackQueries;
}
// Check for comparison indicators
const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs'];
for (const term of comparisonTerms) {
if (query.toLowerCase().includes(term)) {
// This is likely a comparison question, extract the items being compared
const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i'));
if (beforeAfter.length === 2) {
// Try to extract compared items
const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]);
if (aspects.length > 0) {
for (const aspect of aspects) {
parts.push(`What are the key points about ${aspect}?`);
}
parts.push(`What are the differences between ${aspects.join(' and ')}?`);
return parts;
}
}
}
}
// Check for "multiple aspects" questions
const aspectPatterns = [
/what (?:are|is) the (\w+) (?:of|about|for|in) /i,
/how (?:to|do|does|can) .+ (\w+)/i
];
for (const pattern of aspectPatterns) {
const match = query.match(pattern);
if (match && match[1]) {
const aspect = match[1];
parts.push(`What is the ${aspect}?`);
parts.push(`How does ${aspect} relate to the main topic?`);
}
}
return parts;
}
/**
* Extract items being compared from a comparison question
*
* @param before Text before the comparison term
* @param after Text after the comparison term
* @returns Array of items being compared
*/
private extractComparisonAspects(before: string, after: string): string[] {
const aspects: string[] = [];
// Look for "between A and B" pattern
const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i);
if (betweenMatch) {
aspects.push(betweenMatch[1].trim());
aspects.push(betweenMatch[2].trim());
return aspects;
}
// Look for A vs B pattern
const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i);
if (directComparison) {
aspects.push(directComparison[1].trim());
aspects.push(directComparison[2].trim());
return aspects;
}
// Fall back to looking for named entities or key terms in both parts
const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || [];
const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || [];
// Look for substantial terms (longer than 3 chars)
const candidateTerms = [...beforeTerms, ...afterTerms]
.filter(term => term.length > 3)
.map(term => term.trim());
// Take up to 2 distinct terms
return [...new Set(candidateTerms)].slice(0, 2);
}
/**

View File

@ -1,17 +1,38 @@
/**
* Query Decomposition Tool - Compatibility Layer
* Query Decomposition Tool
*
* This file provides backward compatibility with the new consolidated
* query_processor.js implementation.
* This tool helps the LLM agent break down complex user queries into
* sub-questions that can be answered individually and then synthesized
* into a comprehensive response.
*
* Features:
* - Analyze query complexity
* - Extract multiple intents from a single question
* - Create a multi-stage research plan
* - Track progress through complex information gathering
*/
import log from '../../log.js';
import queryProcessor from '../context/services/query_processor.js';
import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js';
import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';
export type { SubQuery, DecomposedQuery };
export interface SubQuery {
id: string;
text: string;
reason: string;
isAnswered: boolean;
answer?: string;
}
export interface DecomposedQuery {
originalQuery: string;
subQueries: SubQuery[];
status: 'pending' | 'in_progress' | 'completed';
complexity: number;
}
export class QueryDecompositionTool {
private static queryCounter: number = 0;
/**
* Break down a complex query into smaller, more manageable sub-queries
*
@ -20,54 +41,83 @@ export class QueryDecompositionTool {
* @returns A decomposed query object with sub-queries
*/
decomposeQuery(query: string, context?: string): DecomposedQuery {
log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery');
try {
// Log the decomposition attempt for tracking
log.info(`Decomposing query: "${query.substring(0, 100)}..."`);
// Since the main implementation is now async but we need to maintain a sync interface,
// we'll use a simpler approach that doesn't require LLM
if (!query || query.trim().length === 0) {
log.info("Query decomposition called with empty query");
return {
originalQuery: query,
subQueries: [],
status: 'pending',
complexity: 0
};
}
// Get the complexity to determine approach
const complexity = queryProcessor.assessQueryComplexity(query);
// Assess query complexity to determine if decomposition is needed
const complexity = this.assessQueryComplexity(query);
log.info(`Query complexity assessment: ${complexity}/10`);
// For simple queries, just return the original as a single sub-query
// Use a lower threshold (2 instead of 3) to decompose more queries
if (complexity < 2) {
log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`);
const mainSubQuery = {
id: this.generateSubQueryId(),
text: query,
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
isAnswered: false
};
// Still add a generic exploration query to get some related content
const genericQuery = {
id: this.generateSubQueryId(),
text: `Information related to ${query}`,
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
isAnswered: false
};
return {
originalQuery: query,
subQueries: [mainSubQuery, genericQuery],
status: 'pending',
complexity
};
}
// For complex queries, perform decomposition
const subQueries = this.createSubQueries(query, context);
log.info(`Decomposed query into ${subQueries.length} sub-queries`);
// Log the sub-queries for better visibility
subQueries.forEach((sq, index) => {
log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`);
});
if (!query || query.trim().length === 0) {
return {
originalQuery: query,
subQueries: [],
subQueries,
status: 'pending',
complexity: 0
complexity
};
} catch (error: any) {
log.error(`Error decomposing query: ${error.message}`);
// Fallback to treating it as a simple query
return {
originalQuery: query,
subQueries: [{
id: this.generateSubQueryId(),
text: query,
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
isAnswered: false
}],
status: 'pending',
complexity: 1
};
}
// Create a baseline decomposed query
let subQueries = [];
// For compatibility, we'll use the basic SubQuery generation
// This avoids the async LLM call which would break the sync interface
const mainSubQuery = {
id: `sq_${Date.now()}_sync_0`,
text: query,
reason: "Main question (for direct matching)",
isAnswered: false
};
subQueries.push(mainSubQuery);
// Add a generic exploration query for context
const genericQuery = {
id: `sq_${Date.now()}_sync_1`,
text: `What information is related to ${query}?`,
reason: "General exploration to find related content",
isAnswered: false
};
subQueries.push(genericQuery);
// Simplified implementation that doesn't require async/LLM calls
return {
originalQuery: query,
subQueries: subQueries,
status: 'pending',
complexity
};
}
/**
@ -83,8 +133,25 @@ export class QueryDecompositionTool {
subQueryId: string,
answer: string
): DecomposedQuery {
log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer');
return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer);
const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
if (sq.id === subQueryId) {
return {
...sq,
answer,
isAnswered: true
};
}
return sq;
});
// Check if all sub-queries are answered
const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
return {
...decomposedQuery,
subQueries: updatedSubQueries,
status: allAnswered ? 'completed' : 'in_progress'
};
}
/**
@ -94,8 +161,40 @@ export class QueryDecompositionTool {
* @returns A synthesized answer to the original query
*/
synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer');
return queryProcessor.synthesizeAnswer(decomposedQuery);
try {
// Ensure all sub-queries are answered
if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
return "Cannot synthesize answer - not all sub-queries have been answered.";
}
// For simple queries with just one sub-query, return the answer directly
if (decomposedQuery.subQueries.length === 1) {
return decomposedQuery.subQueries[0].answer || "";
}
// For complex queries, build a structured response that references each sub-answer
let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`;
// Group by themes if there are many sub-queries
if (decomposedQuery.subQueries.length > 3) {
// Here we would ideally group related sub-queries, but for now we'll just present them in order
synthesized += "Based on the information gathered:\n\n";
for (const sq of decomposedQuery.subQueries) {
synthesized += `${sq.answer}\n\n`;
}
} else {
// For fewer sub-queries, present each one with its question
for (const sq of decomposedQuery.subQueries) {
synthesized += `${sq.answer}\n\n`;
}
}
return synthesized.trim();
} catch (error: any) {
log.error(`Error synthesizing answer: ${error.message}`);
return "Error synthesizing the final answer.";
}
}
/**
@ -105,10 +204,6 @@ export class QueryDecompositionTool {
* @returns A status report string
*/
getQueryStatus(decomposedQuery: DecomposedQuery): string {
log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus');
// This method doesn't exist directly in the new implementation
// We'll implement a simple fallback
const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
const totalCount = decomposedQuery.subQueries.length;
@ -116,10 +211,9 @@ export class QueryDecompositionTool {
for (const sq of decomposedQuery.subQueries) {
status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
if (sq.isAnswered && sq.answer) {
status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`;
if (sq.isAnswered) {
status += ` Answer: ${this.truncateText(sq.answer || "", 100)}\n`;
}
status += '\n';
}
return status;
@ -127,15 +221,302 @@ export class QueryDecompositionTool {
/**
* Assess the complexity of a query on a scale of 1-10
* This helps determine how many sub-queries are needed
*
* @param query The query to assess
* @returns A complexity score from 1-10
*/
assessQueryComplexity(query: string): number {
log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity');
return queryProcessor.assessQueryComplexity(query);
// Count the number of question marks as a basic indicator
const questionMarkCount = (query.match(/\?/g) || []).length;
// Count potential sub-questions based on question words
const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which'];
const questionWordMatches = questionWords.map(word => {
const regex = new RegExp(`\\b${word}\\b`, 'gi');
return (query.match(regex) || []).length;
});
const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
// Look for conjunctions which might join multiple questions
const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length;
// Look for complex requirements
const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length;
const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length;
// Calculate base complexity
let complexity = 1;
// Add for multiple questions
complexity += Math.min(2, questionMarkCount);
// Add for question words beyond the first one
complexity += Math.min(2, Math.max(0, questionWordCount - 1));
// Add for conjunctions that might join questions
complexity += Math.min(2, conjunctionCount);
// Add for comparative/analytical requirements
complexity += Math.min(2, comparisonCount + analysisCount);
// Add for overall length/complexity
if (query.length > 100) complexity += 1;
if (query.length > 200) complexity += 1;
// Ensure we stay in the 1-10 range
return Math.max(1, Math.min(10, complexity));
}
/**
* Generate a unique ID for a sub-query
*/
generateSubQueryId(): string {
return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
}
/**
* Create sub-queries based on the original query
*/
createSubQueries(query: string, context?: string): SubQuery[] {
// Simple rules to create sub-queries based on query content
const subQueries: SubQuery[] = [];
// Avoid creating subqueries that start with "Provide details about" or similar
// as these have been causing recursive loops
if (query.toLowerCase().includes("provide details about") ||
query.toLowerCase().includes("information related to")) {
log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`);
return [{
id: this.generateSubQueryId(),
text: query,
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
isAnswered: false
}];
}
// First, add the original query as a sub-query (always)
subQueries.push({
id: this.generateSubQueryId(),
text: query,
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
isAnswered: false
});
// Check for "compare", "difference", "versus" to identify comparison questions
if (
query.toLowerCase().includes('compare') ||
query.toLowerCase().includes('difference between') ||
query.toLowerCase().includes(' vs ') ||
query.toLowerCase().includes('versus')
) {
// Extract entities to compare (simplified approach)
const entities = this.extractEntitiesForComparison(query);
if (entities.length >= 2) {
// Add sub-queries for each entity
entities.forEach(entity => {
subQueries.push({
id: this.generateSubQueryId(),
text: `What are the key characteristics of ${entity}?`,
reason: `Getting details about "${entity}" for comparison`,
isAnswered: false
});
});
// Add explicit comparison sub-query
subQueries.push({
id: this.generateSubQueryId(),
text: `How do ${entities.join(' and ')} compare in terms of their primary features?`,
reason: 'Direct comparison of the entities',
isAnswered: false
});
}
}
// Check for "how to" questions
else if (query.toLowerCase().includes('how to ')) {
const topic = query.replace(/how to /i, '').trim();
subQueries.push({
id: this.generateSubQueryId(),
text: `What are the steps to ${topic}?`,
reason: 'Finding procedural information',
isAnswered: false
});
subQueries.push({
id: this.generateSubQueryId(),
text: `What are common challenges or pitfalls when trying to ${topic}?`,
reason: 'Identifying potential difficulties',
isAnswered: false
});
}
// Check for "why" questions
else if (query.toLowerCase().startsWith('why ')) {
const topic = query.replace(/why /i, '').trim();
subQueries.push({
id: this.generateSubQueryId(),
text: `What are the causes of ${topic}?`,
reason: 'Identifying causes',
isAnswered: false
});
subQueries.push({
id: this.generateSubQueryId(),
text: `What evidence supports explanations for ${topic}?`,
reason: 'Finding supporting evidence',
isAnswered: false
});
}
// Handle "what is" questions
else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) {
const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
subQueries.push({
id: this.generateSubQueryId(),
text: `Definition of ${topic}`,
reason: 'Getting basic definition',
isAnswered: false
});
subQueries.push({
id: this.generateSubQueryId(),
text: `Examples of ${topic}`,
reason: 'Finding examples',
isAnswered: false
});
}
// If no specific sub-queries were added (beyond the original),
// generate generic exploratory sub-queries
if (subQueries.length <= 1) {
// Extract main entities/concepts from the query
const concepts = this.extractMainConcepts(query);
concepts.forEach(concept => {
// Don't create recursive or self-referential queries
if (!concept.toLowerCase().includes('provide details') &&
!concept.toLowerCase().includes('information related')) {
subQueries.push({
id: this.generateSubQueryId(),
text: `Key information about ${concept}`,
reason: `Finding information about "${concept}"`,
isAnswered: false
});
}
});
}
return subQueries;
}
/**
* Truncate text to a maximum length with ellipsis
*/
private truncateText(text: string, maxLength: number): string {
if (text.length <= maxLength) return text;
return text.substring(0, maxLength - 3) + '...';
}
/**
* Extract entities for comparison from a query
*
* @param query The query to extract entities from
* @returns Array of entity strings
*/
extractEntitiesForComparison(query: string): string[] {
// Try to match patterns like "compare X and Y" or "difference between X and Y"
const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
let match = query.match(comparePattern) || query.match(vsPattern);
if (match) {
return [match[1].trim(), match[2].trim()];
}
// If no pattern match, try to extract noun phrases
const words = query.split(/\s+/);
const potentialEntities = [];
let currentPhrase = '';
for (const word of words) {
// Skip common words that are unlikely to be part of entity names
if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) {
if (currentPhrase.trim()) {
potentialEntities.push(currentPhrase.trim());
currentPhrase = '';
}
continue;
}
currentPhrase += word + ' ';
}
if (currentPhrase.trim()) {
potentialEntities.push(currentPhrase.trim());
}
return potentialEntities.slice(0, 2); // Return at most 2 entities
}
/**
* Extract main concepts from a query
*
* @param query The query to extract concepts from
* @returns Array of concept strings
*/
extractMainConcepts(query: string): string[] {
// Remove question words and common stop words
const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' ');
// Split into words and filter out short words
const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
// Count word frequency
const wordCounts: Record<string, number> = {};
for (const word of words) {
wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
}
// Sort by frequency
const sortedWords = Object.entries(wordCounts)
.sort((a, b) => b[1] - a[1])
.map(entry => entry[0]);
// Try to build meaningful phrases around top words
const conceptPhrases: string[] = [];
if (sortedWords.length === 0) {
// Fallback if no significant words found
return [query.trim()];
}
// Use the top 2-3 words to form concepts
for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
const word = sortedWords[i];
// Try to find the word in the original query and extract a small phrase around it
const wordIndex = query.toLowerCase().indexOf(word);
if (wordIndex >= 0) {
// Extract a window of text around the word (3 words before and after)
const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
if (end > start) {
conceptPhrases.push(query.substring(start, end).trim());
} else {
conceptPhrases.push(word);
}
} else {
conceptPhrases.push(word);
}
}
return conceptPhrases;
}
}
// Export default instance for compatibility
export default new QueryDecompositionTool();
export default QueryDecompositionTool;