mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-07-28 02:22:26 +08:00
maybe
This commit is contained in:
parent
d83cce88cb
commit
daa56b10e8
@ -54,82 +54,126 @@ export class QueryProcessor {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate enhanced search queries for better semantic matching
|
* Generate search queries to find relevant information for the user question
|
||||||
*
|
*
|
||||||
* @param userQuestion - The user's question
|
* @param userQuestion - The user's question
|
||||||
* @param llmService - The LLM service to use for generating queries, or null to auto-detect
|
* @param llmService - The LLM service to use for generating queries
|
||||||
* @returns Array of search queries
|
* @returns Array of search queries
|
||||||
*/
|
*/
|
||||||
async generateSearchQueries(
|
async generateSearchQueries(userQuestion: string, llmService: any): Promise<string[]> {
|
||||||
userQuestion: string,
|
|
||||||
llmService?: LLMServiceInterface
|
|
||||||
): Promise<string[]> {
|
|
||||||
if (!userQuestion || userQuestion.trim() === '') {
|
|
||||||
return []; // Return empty array for empty input
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Check cache
|
// Check cache first
|
||||||
const cacheKey = `searchQueries:${userQuestion}`;
|
const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`);
|
||||||
const cached = cacheManager.getQueryResults<string[]>(cacheKey);
|
|
||||||
if (cached && Array.isArray(cached)) {
|
const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
|
||||||
return cached;
|
Given the user's question, generate 3-5 specific search queries that would help find relevant information.
|
||||||
|
Each query should be focused on a different aspect of the question.
|
||||||
|
Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
|
||||||
|
Format your answer as a JSON array of strings, with each string being a search query.
|
||||||
|
Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
|
||||||
|
|
||||||
|
interface Message {
|
||||||
|
role: 'user' | 'assistant' | 'system';
|
||||||
|
content: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get LLM service if not provided
|
const messages: Message[] = [
|
||||||
const service = llmService || await this.getLLMService();
|
{ role: "system", content: PROMPT },
|
||||||
if (!service) {
|
{ role: "user", content: userQuestion }
|
||||||
log.info(`No LLM service available for query enhancement, using original query`);
|
|
||||||
return [userQuestion];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prepare the prompt with JSON formatting instructions
|
|
||||||
const enhancedPrompt = `${this.enhancerPrompt}
|
|
||||||
IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
|
|
||||||
Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
|
|
||||||
|
|
||||||
const messages = [
|
|
||||||
{ role: "system" as const, content: enhancedPrompt },
|
|
||||||
{ role: "user" as const, content: userQuestion }
|
|
||||||
];
|
];
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR,
|
temperature: 0.3,
|
||||||
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
maxTokens: 300
|
||||||
bypassFormatter: true,
|
|
||||||
expectsJsonResponse: true,
|
|
||||||
_bypassContextProcessing: true, // Prevent recursive calls
|
|
||||||
enableTools: false // Explicitly disable tools for this request
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get the response from the LLM
|
// Get the response from the LLM
|
||||||
const response = await service.generateChatCompletion(messages, options);
|
const response = await llmService.generateChatCompletion(messages, options);
|
||||||
const responseText = response.text;
|
const responseText = response.text; // Extract the text from the response object
|
||||||
|
|
||||||
// Use the JsonExtractor to parse the response
|
try {
|
||||||
const queries = JsonExtractor.extract<string[]>(responseText, {
|
// Remove code blocks, quotes, and clean up the response text
|
||||||
extractArrays: true,
|
let jsonStr = responseText
|
||||||
minStringLength: 3,
|
.replace(/```(?:json)?|```/g, '') // Remove code block markers
|
||||||
applyFixes: true,
|
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
|
||||||
useFallbacks: true
|
.trim();
|
||||||
});
|
|
||||||
|
|
||||||
if (queries && queries.length > 0) {
|
// Check if the text might contain a JSON array (has square brackets)
|
||||||
log.info(`Extracted ${queries.length} queries using JsonExtractor`);
|
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||||
cacheManager.storeQueryResults(cacheKey, queries);
|
// Extract just the array part if there's explanatory text
|
||||||
return queries;
|
const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
|
||||||
|
if (arrayMatch) {
|
||||||
|
jsonStr = arrayMatch[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to parse the JSON
|
||||||
|
try {
|
||||||
|
const queries = JSON.parse(jsonStr);
|
||||||
|
if (Array.isArray(queries) && queries.length > 0) {
|
||||||
|
const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
|
||||||
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (innerError) {
|
||||||
|
// If parsing fails, log it and continue to the fallback
|
||||||
|
log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback 1: Try to extract an array manually by splitting on commas between quotes
|
||||||
|
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||||
|
const arrayContent = jsonStr.substring(
|
||||||
|
jsonStr.indexOf('[') + 1,
|
||||||
|
jsonStr.lastIndexOf(']')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Use regex to match quoted strings, handling escaped quotes
|
||||||
|
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
|
||||||
|
if (stringMatches && stringMatches.length > 0) {
|
||||||
|
const result = stringMatches
|
||||||
|
.map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
|
||||||
|
.filter((s: string) => s.length > 0);
|
||||||
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback 2: Extract queries line by line
|
||||||
|
const lines = responseText.split('\n')
|
||||||
|
.map((line: string) => line.trim())
|
||||||
|
.filter((line: string) =>
|
||||||
|
line.length > 0 &&
|
||||||
|
!line.startsWith('```') &&
|
||||||
|
!line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
|
||||||
|
!line.match(/^\[|\]$/) // Skip lines that are just brackets
|
||||||
|
);
|
||||||
|
|
||||||
|
if (lines.length > 0) {
|
||||||
|
// Remove numbering, quotes and other list markers from each line
|
||||||
|
const result = lines.map((line: string) => {
|
||||||
|
return line
|
||||||
|
.replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
|
||||||
|
.replace(/^[-*•]\s*/, '') // Remove bullet list markers
|
||||||
|
.replace(/^["']|["']$/g, '') // Remove surrounding quotes
|
||||||
|
.trim();
|
||||||
|
}).filter((s: string) => s.length > 0);
|
||||||
|
|
||||||
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
log.error(`Error parsing search queries: ${parseError}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to original question
|
// If all else fails, just use the original question
|
||||||
const fallback = [userQuestion];
|
const fallback = [userQuestion];
|
||||||
log.info(`No queries extracted, using fallback: "${userQuestion}"`);
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
|
||||||
cacheManager.storeQueryResults(cacheKey, fallback);
|
|
||||||
return fallback;
|
return fallback;
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||||
log.error(`Error generating search queries: ${errorMessage}`);
|
log.error(`Error generating search queries: ${errorMessage}`);
|
||||||
|
// Fallback to just using the original question
|
||||||
return [userQuestion];
|
return [userQuestion];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assess query complexity
|
// Simple assessment of query complexity
|
||||||
const complexity = this.assessQueryComplexity(query);
|
const complexity = query.length > 100 ? 5 : 3;
|
||||||
log.info(`Query complexity assessment: ${complexity}/10`);
|
|
||||||
|
|
||||||
// Try to get LLM service if not provided
|
// Get LLM service if not provided
|
||||||
const service = llmService || await this.getLLMService();
|
const service = llmService || await this.getLLMService();
|
||||||
|
|
||||||
// If no LLM service is available, use basic decomposition
|
// If no LLM service is available, use original query
|
||||||
if (!service) {
|
if (!service) {
|
||||||
log.info(`No LLM service available for query decomposition, using original query`);
|
log.info(`No LLM service available for query decomposition, using original query`);
|
||||||
return this.createBasicDecomposition(query, complexity);
|
return {
|
||||||
|
originalQuery: query,
|
||||||
|
subQueries: [{
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: "Original query",
|
||||||
|
isAnswered: false
|
||||||
|
}],
|
||||||
|
status: 'pending',
|
||||||
|
complexity
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// With LLM service available, always use advanced decomposition regardless of complexity
|
// Make a simple request to decompose the query
|
||||||
try {
|
const result = await this.simpleQueryDecomposition(query, service, context);
|
||||||
log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`);
|
|
||||||
const enhancedSubQueries = await this.createLLMSubQueries(query, context, service);
|
|
||||||
|
|
||||||
if (enhancedSubQueries && enhancedSubQueries.length > 0) {
|
// Return the result
|
||||||
log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`);
|
return {
|
||||||
return {
|
originalQuery: query,
|
||||||
originalQuery: query,
|
subQueries: result,
|
||||||
subQueries: enhancedSubQueries,
|
status: 'pending',
|
||||||
status: 'pending',
|
complexity
|
||||||
complexity
|
};
|
||||||
};
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
|
||||||
log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`);
|
|
||||||
// Fall through to basic decomposition
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to basic decomposition
|
|
||||||
return this.createBasicDecomposition(query, complexity);
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
log.error(`Error decomposing query: ${error.message}`);
|
log.error(`Error decomposing query: ${error.message}`);
|
||||||
|
|
||||||
@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a basic decomposition of a query without using LLM
|
* Simple LLM-based query decomposition
|
||||||
*
|
*
|
||||||
* @param query The original query
|
* @param query The original query to decompose
|
||||||
* @param complexity The assessed complexity
|
* @param llmService LLM service to use
|
||||||
* @returns A basic decomposed query
|
|
||||||
*/
|
|
||||||
private createBasicDecomposition(query: string, complexity: number): DecomposedQuery {
|
|
||||||
log.info(`Using basic decomposition approach (complexity: ${complexity})`);
|
|
||||||
|
|
||||||
const mainSubQuery = {
|
|
||||||
id: this.generateSubQueryId(),
|
|
||||||
text: query,
|
|
||||||
reason: "Direct question that can be answered without decomposition",
|
|
||||||
isAnswered: false
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add a generic exploration query for context
|
|
||||||
const genericQuery = {
|
|
||||||
id: this.generateSubQueryId(),
|
|
||||||
text: `What information is related to ${query}?`,
|
|
||||||
reason: "General exploration to find related content",
|
|
||||||
isAnswered: false
|
|
||||||
};
|
|
||||||
|
|
||||||
return {
|
|
||||||
originalQuery: query,
|
|
||||||
subQueries: [mainSubQuery, genericQuery],
|
|
||||||
status: 'pending',
|
|
||||||
complexity
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Use LLM to create advanced sub-queries from a complex query
|
|
||||||
*
|
|
||||||
* @param query The original complex query
|
|
||||||
* @param context Optional context to help with decomposition
|
* @param context Optional context to help with decomposition
|
||||||
* @param llmService LLM service to use for advanced decomposition
|
|
||||||
* @returns Array of sub-queries
|
* @returns Array of sub-queries
|
||||||
*/
|
*/
|
||||||
private async createLLMSubQueries(
|
private async simpleQueryDecomposition(
|
||||||
query: string,
|
query: string,
|
||||||
context?: string,
|
llmService: LLMServiceInterface,
|
||||||
llmService?: LLMServiceInterface
|
context?: string
|
||||||
): Promise<SubQuery[]> {
|
): Promise<SubQuery[]> {
|
||||||
// If no LLM service, use basic decomposition
|
|
||||||
if (!llmService) {
|
|
||||||
return this.createSubQueries(query, context);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Create a much better prompt for more effective query decomposition
|
// Create a simple prompt for query decomposition
|
||||||
const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information.
|
const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.
|
||||||
|
|
||||||
Your task is to identify the main concepts and break them down into specific, targeted search queries.
|
Your goal is to help find comprehensive information by breaking down the query into multiple search terms.
|
||||||
|
|
||||||
DO NOT simply rephrase the original query or create a generic "what's related to X" pattern.
|
IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.
|
||||||
DO create specific queries that explore different aspects of the topic.
|
|
||||||
|
|
||||||
For example:
|
For example, if the query is "What are Docker containers?", good sub-queries would be:
|
||||||
If the query is "How does Docker compare to Kubernetes?", good sub-queries would be:
|
1. "Docker container architecture and components"
|
||||||
- "Docker container architecture and features"
|
2. "Docker vs virtual machines differences"
|
||||||
- "Kubernetes container orchestration capabilities"
|
3. "Docker container use cases and benefits"
|
||||||
- "Docker vs Kubernetes performance comparison"
|
4. "Docker container deployment best practices"
|
||||||
- "When to use Docker versus Kubernetes"
|
|
||||||
|
|
||||||
Format your response as a JSON array of objects with 'text' and 'reason' properties.
|
Format your response as a JSON array of objects with 'text' and 'reason' properties.
|
||||||
Example: [
|
Example: [
|
||||||
{"text": "Docker container architecture", "reason": "Understanding Docker's core technology"},
|
{"text": "Docker container architecture", "reason": "Understanding the technical structure"},
|
||||||
{"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"}
|
{"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
|
||||||
|
{"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
|
||||||
|
{"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
|
||||||
]
|
]
|
||||||
|
|
||||||
${context ? `\nContext: ${context}` : ''}
|
${context ? `\nContext: ${context}` : ''}
|
||||||
|
|
||||||
Query: ${query}`;
|
Query: ${query}`;
|
||||||
|
|
||||||
|
log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
|
||||||
|
|
||||||
const messages = [
|
const messages = [
|
||||||
{ role: "system" as const, content: prompt }
|
{ role: "system" as const, content: prompt }
|
||||||
];
|
];
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
temperature: 0.7, // Higher temperature for more creative decomposition
|
temperature: 0.7,
|
||||||
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
||||||
bypassFormatter: true,
|
bypassFormatter: true,
|
||||||
expectsJsonResponse: true,
|
expectsJsonResponse: true,
|
||||||
_bypassContextProcessing: true, // Prevent recursive calls
|
_bypassContextProcessing: true,
|
||||||
enableTools: false // Explicitly disable tools for this request
|
enableTools: false
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get the response from the LLM
|
// Get the response from the LLM
|
||||||
const response = await llmService.generateChatCompletion(messages, options);
|
const response = await llmService.generateChatCompletion(messages, options);
|
||||||
const responseText = response.text;
|
const responseText = response.text;
|
||||||
|
|
||||||
// Try to extract structured sub-queries from the response
|
log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
|
||||||
|
|
||||||
|
// Try to parse the response as JSON
|
||||||
|
let subQueries: SubQuery[] = [];
|
||||||
try {
|
try {
|
||||||
// Expected format is an array of objects with "text" and "reason" keys
|
// Extract the JSON from the response
|
||||||
interface RawSubQuery {
|
const extractedJson = JsonExtractor.extract(responseText, {
|
||||||
text: string;
|
|
||||||
reason?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log the response for debugging
|
|
||||||
log.info(`Received response from LLM for query decomposition, extracting JSON...`);
|
|
||||||
|
|
||||||
log.info(`Response: ${responseText}`);
|
|
||||||
|
|
||||||
// Extract JSON from the response
|
|
||||||
const extractedData = JsonExtractor.extract<RawSubQuery[]>(responseText, {
|
|
||||||
extractArrays: true,
|
extractArrays: true,
|
||||||
applyFixes: true,
|
applyFixes: true,
|
||||||
useFallbacks: true
|
useFallbacks: true
|
||||||
});
|
});
|
||||||
|
|
||||||
// Validate the extracted data
|
log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
|
||||||
if (!Array.isArray(extractedData)) {
|
|
||||||
log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`);
|
if (Array.isArray(extractedJson) && extractedJson.length > 0) {
|
||||||
return this.createSubQueries(query, context);
|
// Convert the extracted data to SubQuery objects
|
||||||
|
subQueries = extractedJson
|
||||||
|
.filter(item => item && typeof item === 'object' && item.text)
|
||||||
|
.map(item => ({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: item.text,
|
||||||
|
reason: item.reason || "Sub-aspect of the main question",
|
||||||
|
isAnswered: false
|
||||||
|
}));
|
||||||
|
|
||||||
|
log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
|
||||||
|
} else {
|
||||||
|
log.info(`Failed to extract array of sub-queries from LLM response`);
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
log.error(`Error parsing LLM response: ${error}`);
|
||||||
|
}
|
||||||
|
|
||||||
if (extractedData.length === 0) {
|
// Always include the original query
|
||||||
log.error(`Extracted array is empty, falling back to basic decomposition`);
|
const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
|
||||||
return this.createSubQueries(query, context);
|
if (!hasOriginal) {
|
||||||
}
|
subQueries.push({
|
||||||
|
|
||||||
log.info(`Successfully extracted ${extractedData.length} items using regex pattern`);
|
|
||||||
|
|
||||||
// Validate each sub-query to ensure it has a text property
|
|
||||||
const validSubQueries = extractedData.filter(item => {
|
|
||||||
if (!item || typeof item !== 'object') {
|
|
||||||
log.error(`Invalid sub-query item: ${JSON.stringify(item)}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!item.text || typeof item.text !== 'string') {
|
|
||||||
log.error(`Sub-query missing text property: ${JSON.stringify(item)}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (validSubQueries.length === 0) {
|
|
||||||
log.error(`No valid sub-queries found after validation, falling back to basic decomposition`);
|
|
||||||
return this.createSubQueries(query, context);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (validSubQueries.length < extractedData.length) {
|
|
||||||
log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert the raw data to SubQuery objects
|
|
||||||
let subQueries = validSubQueries.map(item => ({
|
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: item.text,
|
text: query,
|
||||||
reason: item.reason || "Sub-aspect of the main question",
|
reason: "Original query",
|
||||||
isAnswered: false
|
isAnswered: false
|
||||||
}));
|
|
||||||
|
|
||||||
// Make sure we have at least the original query
|
|
||||||
const hasOriginalQuery = subQueries.some(sq => {
|
|
||||||
// Check if either sq.text or query is null/undefined before using toLowerCase
|
|
||||||
if (!sq.text) return false;
|
|
||||||
const sqText = sq.text.toLowerCase();
|
|
||||||
const originalQuery = query.toLowerCase();
|
|
||||||
|
|
||||||
return sqText.includes(originalQuery) || originalQuery.includes(sqText);
|
|
||||||
});
|
});
|
||||||
|
log.info(`Added original query to sub-queries list`);
|
||||||
|
}
|
||||||
|
|
||||||
if (!hasOriginalQuery) {
|
// Ensure we have at least 3 queries for better search coverage
|
||||||
subQueries.unshift({
|
if (subQueries.length < 3) {
|
||||||
|
// Create some generic variants of the original query
|
||||||
|
const genericVariants = [
|
||||||
|
{ text: `${query} examples and use cases`, reason: "Practical applications" },
|
||||||
|
{ text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
|
||||||
|
{ text: `${query} best practices`, reason: "Implementation guidance" }
|
||||||
|
];
|
||||||
|
|
||||||
|
// Add variants until we have at least 3 queries
|
||||||
|
for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
|
||||||
|
subQueries.push({
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: query,
|
text: genericVariants[i].text,
|
||||||
reason: "Original query",
|
reason: genericVariants[i].reason,
|
||||||
isAnswered: false
|
isAnswered: false
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log the extracted sub-queries for debugging
|
log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
|
||||||
log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`);
|
|
||||||
|
|
||||||
return subQueries;
|
|
||||||
} catch (error: any) {
|
|
||||||
log.error(`Error extracting sub-queries from LLM response: ${error.message}`);
|
|
||||||
// Fall through to traditional decomposition
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to traditional decomposition
|
log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
||||||
return this.createSubQueries(query, context);
|
return subQueries;
|
||||||
} catch (error: any) {
|
} catch (error) {
|
||||||
log.error(`Error in createLLMSubQueries: ${error.message}`);
|
log.error(`Error in simpleQueryDecomposition: ${error}`);
|
||||||
return this.createSubQueries(query, context);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
// Return the original query plus some variants as fallback
|
||||||
* Create sub-queries from a complex query
|
const fallbackQueries = [
|
||||||
*
|
{
|
||||||
* @param query The original complex query
|
id: this.generateSubQueryId(),
|
||||||
* @param context Optional context to help with decomposition
|
text: query,
|
||||||
* @returns Array of sub-queries
|
reason: "Original query",
|
||||||
*/
|
isAnswered: false
|
||||||
private createSubQueries(query: string, context?: string): SubQuery[] {
|
},
|
||||||
// Analyze the query to identify potential aspects to explore
|
{
|
||||||
const questionParts = this.identifyQuestionParts(query);
|
id: this.generateSubQueryId(),
|
||||||
const subQueries: SubQuery[] = [];
|
text: `${query} overview`,
|
||||||
|
reason: "General information",
|
||||||
// Add the main query as the first sub-query
|
isAnswered: false
|
||||||
subQueries.push({
|
},
|
||||||
id: this.generateSubQueryId(),
|
{
|
||||||
text: query,
|
id: this.generateSubQueryId(),
|
||||||
reason: "Main question (for direct matching)",
|
text: `${query} examples`,
|
||||||
isAnswered: false
|
reason: "Practical examples",
|
||||||
});
|
isAnswered: false
|
||||||
|
|
||||||
// Add sub-queries for each identified question part
|
|
||||||
for (const part of questionParts) {
|
|
||||||
subQueries.push({
|
|
||||||
id: this.generateSubQueryId(),
|
|
||||||
text: part,
|
|
||||||
reason: "Sub-aspect of the main question",
|
|
||||||
isAnswered: false
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a generic exploration query to find related information
|
|
||||||
subQueries.push({
|
|
||||||
id: this.generateSubQueryId(),
|
|
||||||
text: `What information is related to ${query}?`,
|
|
||||||
reason: "General exploration to find related content",
|
|
||||||
isAnswered: false
|
|
||||||
});
|
|
||||||
|
|
||||||
// If we have context, add a specific query for that context
|
|
||||||
if (context) {
|
|
||||||
subQueries.push({
|
|
||||||
id: this.generateSubQueryId(),
|
|
||||||
text: `How does "${context}" relate to ${query}?`,
|
|
||||||
reason: "Contextual relationship exploration",
|
|
||||||
isAnswered: false
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return subQueries;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Identify parts of a complex question that could be individual sub-questions
|
|
||||||
*
|
|
||||||
* @param query The complex query to analyze
|
|
||||||
* @returns Array of potential sub-questions
|
|
||||||
*/
|
|
||||||
private identifyQuestionParts(query: string): string[] {
|
|
||||||
const parts: string[] = [];
|
|
||||||
|
|
||||||
// Check for multiple question marks
|
|
||||||
const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?'));
|
|
||||||
if (questionSentences.length > 1) {
|
|
||||||
// Multiple explicit questions detected
|
|
||||||
return questionSentences.map(s => s.trim());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for conjunctions that might separate multiple questions
|
|
||||||
const conjunctions = ['and', 'or', 'but', 'plus', 'also'];
|
|
||||||
for (const conjunction of conjunctions) {
|
|
||||||
const pattern = new RegExp(`\\b${conjunction}\\b`, 'i');
|
|
||||||
if (pattern.test(query)) {
|
|
||||||
// Split by conjunction and check if each part could be a question
|
|
||||||
const splitParts = query.split(pattern);
|
|
||||||
for (const part of splitParts) {
|
|
||||||
const trimmed = part.trim();
|
|
||||||
if (trimmed.length > 10) { // Avoid tiny fragments
|
|
||||||
parts.push(trimmed);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (parts.length > 0) {
|
];
|
||||||
return parts;
|
|
||||||
}
|
log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
||||||
}
|
return fallbackQueries;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for comparison indicators
|
|
||||||
const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs'];
|
|
||||||
for (const term of comparisonTerms) {
|
|
||||||
if (query.toLowerCase().includes(term)) {
|
|
||||||
// This is likely a comparison question, extract the items being compared
|
|
||||||
const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i'));
|
|
||||||
if (beforeAfter.length === 2) {
|
|
||||||
// Try to extract compared items
|
|
||||||
const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]);
|
|
||||||
if (aspects.length > 0) {
|
|
||||||
for (const aspect of aspects) {
|
|
||||||
parts.push(`What are the key points about ${aspect}?`);
|
|
||||||
}
|
|
||||||
parts.push(`What are the differences between ${aspects.join(' and ')}?`);
|
|
||||||
return parts;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for "multiple aspects" questions
|
|
||||||
const aspectPatterns = [
|
|
||||||
/what (?:are|is) the (\w+) (?:of|about|for|in) /i,
|
|
||||||
/how (?:to|do|does|can) .+ (\w+)/i
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const pattern of aspectPatterns) {
|
|
||||||
const match = query.match(pattern);
|
|
||||||
if (match && match[1]) {
|
|
||||||
const aspect = match[1];
|
|
||||||
parts.push(`What is the ${aspect}?`);
|
|
||||||
parts.push(`How does ${aspect} relate to the main topic?`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return parts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract items being compared from a comparison question
|
|
||||||
*
|
|
||||||
* @param before Text before the comparison term
|
|
||||||
* @param after Text after the comparison term
|
|
||||||
* @returns Array of items being compared
|
|
||||||
*/
|
|
||||||
private extractComparisonAspects(before: string, after: string): string[] {
|
|
||||||
const aspects: string[] = [];
|
|
||||||
|
|
||||||
// Look for "between A and B" pattern
|
|
||||||
const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i);
|
|
||||||
if (betweenMatch) {
|
|
||||||
aspects.push(betweenMatch[1].trim());
|
|
||||||
aspects.push(betweenMatch[2].trim());
|
|
||||||
return aspects;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Look for A vs B pattern
|
|
||||||
const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i);
|
|
||||||
if (directComparison) {
|
|
||||||
aspects.push(directComparison[1].trim());
|
|
||||||
aspects.push(directComparison[2].trim());
|
|
||||||
return aspects;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to looking for named entities or key terms in both parts
|
|
||||||
const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || [];
|
|
||||||
const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || [];
|
|
||||||
|
|
||||||
// Look for substantial terms (longer than 3 chars)
|
|
||||||
const candidateTerms = [...beforeTerms, ...afterTerms]
|
|
||||||
.filter(term => term.length > 3)
|
|
||||||
.map(term => term.trim());
|
|
||||||
|
|
||||||
// Take up to 2 distinct terms
|
|
||||||
return [...new Set(candidateTerms)].slice(0, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,17 +1,38 @@
|
|||||||
/**
|
/**
|
||||||
* Query Decomposition Tool - Compatibility Layer
|
* Query Decomposition Tool
|
||||||
*
|
*
|
||||||
* This file provides backward compatibility with the new consolidated
|
* This tool helps the LLM agent break down complex user queries into
|
||||||
* query_processor.js implementation.
|
* sub-questions that can be answered individually and then synthesized
|
||||||
|
* into a comprehensive response.
|
||||||
|
*
|
||||||
|
* Features:
|
||||||
|
* - Analyze query complexity
|
||||||
|
* - Extract multiple intents from a single question
|
||||||
|
* - Create a multi-stage research plan
|
||||||
|
* - Track progress through complex information gathering
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import log from '../../log.js';
|
import log from '../../log.js';
|
||||||
import queryProcessor from '../context/services/query_processor.js';
|
import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';
|
||||||
import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js';
|
|
||||||
|
|
||||||
export type { SubQuery, DecomposedQuery };
|
export interface SubQuery {
|
||||||
|
id: string;
|
||||||
|
text: string;
|
||||||
|
reason: string;
|
||||||
|
isAnswered: boolean;
|
||||||
|
answer?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DecomposedQuery {
|
||||||
|
originalQuery: string;
|
||||||
|
subQueries: SubQuery[];
|
||||||
|
status: 'pending' | 'in_progress' | 'completed';
|
||||||
|
complexity: number;
|
||||||
|
}
|
||||||
|
|
||||||
export class QueryDecompositionTool {
|
export class QueryDecompositionTool {
|
||||||
|
private static queryCounter: number = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Break down a complex query into smaller, more manageable sub-queries
|
* Break down a complex query into smaller, more manageable sub-queries
|
||||||
*
|
*
|
||||||
@ -20,54 +41,83 @@ export class QueryDecompositionTool {
|
|||||||
* @returns A decomposed query object with sub-queries
|
* @returns A decomposed query object with sub-queries
|
||||||
*/
|
*/
|
||||||
decomposeQuery(query: string, context?: string): DecomposedQuery {
|
decomposeQuery(query: string, context?: string): DecomposedQuery {
|
||||||
log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery');
|
try {
|
||||||
|
// Log the decomposition attempt for tracking
|
||||||
|
log.info(`Decomposing query: "${query.substring(0, 100)}..."`);
|
||||||
|
|
||||||
// Since the main implementation is now async but we need to maintain a sync interface,
|
if (!query || query.trim().length === 0) {
|
||||||
// we'll use a simpler approach that doesn't require LLM
|
log.info("Query decomposition called with empty query");
|
||||||
|
return {
|
||||||
|
originalQuery: query,
|
||||||
|
subQueries: [],
|
||||||
|
status: 'pending',
|
||||||
|
complexity: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Get the complexity to determine approach
|
// Assess query complexity to determine if decomposition is needed
|
||||||
const complexity = queryProcessor.assessQueryComplexity(query);
|
const complexity = this.assessQueryComplexity(query);
|
||||||
|
log.info(`Query complexity assessment: ${complexity}/10`);
|
||||||
|
|
||||||
|
// For simple queries, just return the original as a single sub-query
|
||||||
|
// Use a lower threshold (2 instead of 3) to decompose more queries
|
||||||
|
if (complexity < 2) {
|
||||||
|
log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`);
|
||||||
|
|
||||||
|
const mainSubQuery = {
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
|
||||||
|
isAnswered: false
|
||||||
|
};
|
||||||
|
|
||||||
|
// Still add a generic exploration query to get some related content
|
||||||
|
const genericQuery = {
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `Information related to ${query}`,
|
||||||
|
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
|
||||||
|
isAnswered: false
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
originalQuery: query,
|
||||||
|
subQueries: [mainSubQuery, genericQuery],
|
||||||
|
status: 'pending',
|
||||||
|
complexity
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// For complex queries, perform decomposition
|
||||||
|
const subQueries = this.createSubQueries(query, context);
|
||||||
|
log.info(`Decomposed query into ${subQueries.length} sub-queries`);
|
||||||
|
|
||||||
|
// Log the sub-queries for better visibility
|
||||||
|
subQueries.forEach((sq, index) => {
|
||||||
|
log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`);
|
||||||
|
});
|
||||||
|
|
||||||
if (!query || query.trim().length === 0) {
|
|
||||||
return {
|
return {
|
||||||
originalQuery: query,
|
originalQuery: query,
|
||||||
subQueries: [],
|
subQueries,
|
||||||
status: 'pending',
|
status: 'pending',
|
||||||
complexity: 0
|
complexity
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error decomposing query: ${error.message}`);
|
||||||
|
|
||||||
|
// Fallback to treating it as a simple query
|
||||||
|
return {
|
||||||
|
originalQuery: query,
|
||||||
|
subQueries: [{
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
|
||||||
|
isAnswered: false
|
||||||
|
}],
|
||||||
|
status: 'pending',
|
||||||
|
complexity: 1
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a baseline decomposed query
|
|
||||||
let subQueries = [];
|
|
||||||
|
|
||||||
// For compatibility, we'll use the basic SubQuery generation
|
|
||||||
// This avoids the async LLM call which would break the sync interface
|
|
||||||
const mainSubQuery = {
|
|
||||||
id: `sq_${Date.now()}_sync_0`,
|
|
||||||
text: query,
|
|
||||||
reason: "Main question (for direct matching)",
|
|
||||||
isAnswered: false
|
|
||||||
};
|
|
||||||
|
|
||||||
subQueries.push(mainSubQuery);
|
|
||||||
|
|
||||||
// Add a generic exploration query for context
|
|
||||||
const genericQuery = {
|
|
||||||
id: `sq_${Date.now()}_sync_1`,
|
|
||||||
text: `What information is related to ${query}?`,
|
|
||||||
reason: "General exploration to find related content",
|
|
||||||
isAnswered: false
|
|
||||||
};
|
|
||||||
|
|
||||||
subQueries.push(genericQuery);
|
|
||||||
|
|
||||||
// Simplified implementation that doesn't require async/LLM calls
|
|
||||||
return {
|
|
||||||
originalQuery: query,
|
|
||||||
subQueries: subQueries,
|
|
||||||
status: 'pending',
|
|
||||||
complexity
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -83,8 +133,25 @@ export class QueryDecompositionTool {
|
|||||||
subQueryId: string,
|
subQueryId: string,
|
||||||
answer: string
|
answer: string
|
||||||
): DecomposedQuery {
|
): DecomposedQuery {
|
||||||
log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer');
|
const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
|
||||||
return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer);
|
if (sq.id === subQueryId) {
|
||||||
|
return {
|
||||||
|
...sq,
|
||||||
|
answer,
|
||||||
|
isAnswered: true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return sq;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check if all sub-queries are answered
|
||||||
|
const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...decomposedQuery,
|
||||||
|
subQueries: updatedSubQueries,
|
||||||
|
status: allAnswered ? 'completed' : 'in_progress'
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -94,8 +161,40 @@ export class QueryDecompositionTool {
|
|||||||
* @returns A synthesized answer to the original query
|
* @returns A synthesized answer to the original query
|
||||||
*/
|
*/
|
||||||
synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
|
synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
|
||||||
log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer');
|
try {
|
||||||
return queryProcessor.synthesizeAnswer(decomposedQuery);
|
// Ensure all sub-queries are answered
|
||||||
|
if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
|
||||||
|
return "Cannot synthesize answer - not all sub-queries have been answered.";
|
||||||
|
}
|
||||||
|
|
||||||
|
// For simple queries with just one sub-query, return the answer directly
|
||||||
|
if (decomposedQuery.subQueries.length === 1) {
|
||||||
|
return decomposedQuery.subQueries[0].answer || "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// For complex queries, build a structured response that references each sub-answer
|
||||||
|
let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`;
|
||||||
|
|
||||||
|
// Group by themes if there are many sub-queries
|
||||||
|
if (decomposedQuery.subQueries.length > 3) {
|
||||||
|
// Here we would ideally group related sub-queries, but for now we'll just present them in order
|
||||||
|
synthesized += "Based on the information gathered:\n\n";
|
||||||
|
|
||||||
|
for (const sq of decomposedQuery.subQueries) {
|
||||||
|
synthesized += `${sq.answer}\n\n`;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For fewer sub-queries, present each one with its question
|
||||||
|
for (const sq of decomposedQuery.subQueries) {
|
||||||
|
synthesized += `${sq.answer}\n\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return synthesized.trim();
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error synthesizing answer: ${error.message}`);
|
||||||
|
return "Error synthesizing the final answer.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -105,10 +204,6 @@ export class QueryDecompositionTool {
|
|||||||
* @returns A status report string
|
* @returns A status report string
|
||||||
*/
|
*/
|
||||||
getQueryStatus(decomposedQuery: DecomposedQuery): string {
|
getQueryStatus(decomposedQuery: DecomposedQuery): string {
|
||||||
log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus');
|
|
||||||
// This method doesn't exist directly in the new implementation
|
|
||||||
// We'll implement a simple fallback
|
|
||||||
|
|
||||||
const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
|
const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
|
||||||
const totalCount = decomposedQuery.subQueries.length;
|
const totalCount = decomposedQuery.subQueries.length;
|
||||||
|
|
||||||
@ -116,10 +211,9 @@ export class QueryDecompositionTool {
|
|||||||
|
|
||||||
for (const sq of decomposedQuery.subQueries) {
|
for (const sq of decomposedQuery.subQueries) {
|
||||||
status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
|
status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
|
||||||
if (sq.isAnswered && sq.answer) {
|
if (sq.isAnswered) {
|
||||||
status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`;
|
status += ` Answer: ${this.truncateText(sq.answer || "", 100)}\n`;
|
||||||
}
|
}
|
||||||
status += '\n';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
@ -127,15 +221,302 @@ export class QueryDecompositionTool {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Assess the complexity of a query on a scale of 1-10
|
* Assess the complexity of a query on a scale of 1-10
|
||||||
|
* This helps determine how many sub-queries are needed
|
||||||
*
|
*
|
||||||
* @param query The query to assess
|
* @param query The query to assess
|
||||||
* @returns A complexity score from 1-10
|
* @returns A complexity score from 1-10
|
||||||
*/
|
*/
|
||||||
assessQueryComplexity(query: string): number {
|
assessQueryComplexity(query: string): number {
|
||||||
log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity');
|
// Count the number of question marks as a basic indicator
|
||||||
return queryProcessor.assessQueryComplexity(query);
|
const questionMarkCount = (query.match(/\?/g) || []).length;
|
||||||
|
|
||||||
|
// Count potential sub-questions based on question words
|
||||||
|
const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which'];
|
||||||
|
const questionWordMatches = questionWords.map(word => {
|
||||||
|
const regex = new RegExp(`\\b${word}\\b`, 'gi');
|
||||||
|
return (query.match(regex) || []).length;
|
||||||
|
});
|
||||||
|
|
||||||
|
const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
|
||||||
|
|
||||||
|
// Look for conjunctions which might join multiple questions
|
||||||
|
const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length;
|
||||||
|
|
||||||
|
// Look for complex requirements
|
||||||
|
const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length;
|
||||||
|
const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length;
|
||||||
|
|
||||||
|
// Calculate base complexity
|
||||||
|
let complexity = 1;
|
||||||
|
|
||||||
|
// Add for multiple questions
|
||||||
|
complexity += Math.min(2, questionMarkCount);
|
||||||
|
|
||||||
|
// Add for question words beyond the first one
|
||||||
|
complexity += Math.min(2, Math.max(0, questionWordCount - 1));
|
||||||
|
|
||||||
|
// Add for conjunctions that might join questions
|
||||||
|
complexity += Math.min(2, conjunctionCount);
|
||||||
|
|
||||||
|
// Add for comparative/analytical requirements
|
||||||
|
complexity += Math.min(2, comparisonCount + analysisCount);
|
||||||
|
|
||||||
|
// Add for overall length/complexity
|
||||||
|
if (query.length > 100) complexity += 1;
|
||||||
|
if (query.length > 200) complexity += 1;
|
||||||
|
|
||||||
|
// Ensure we stay in the 1-10 range
|
||||||
|
return Math.max(1, Math.min(10, complexity));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a unique ID for a sub-query
|
||||||
|
*/
|
||||||
|
generateSubQueryId(): string {
|
||||||
|
return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create sub-queries based on the original query
|
||||||
|
*/
|
||||||
|
createSubQueries(query: string, context?: string): SubQuery[] {
|
||||||
|
// Simple rules to create sub-queries based on query content
|
||||||
|
const subQueries: SubQuery[] = [];
|
||||||
|
|
||||||
|
// Avoid creating subqueries that start with "Provide details about" or similar
|
||||||
|
// as these have been causing recursive loops
|
||||||
|
if (query.toLowerCase().includes("provide details about") ||
|
||||||
|
query.toLowerCase().includes("information related to")) {
|
||||||
|
log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`);
|
||||||
|
return [{
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
|
||||||
|
isAnswered: false
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, add the original query as a sub-query (always)
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check for "compare", "difference", "versus" to identify comparison questions
|
||||||
|
if (
|
||||||
|
query.toLowerCase().includes('compare') ||
|
||||||
|
query.toLowerCase().includes('difference between') ||
|
||||||
|
query.toLowerCase().includes(' vs ') ||
|
||||||
|
query.toLowerCase().includes('versus')
|
||||||
|
) {
|
||||||
|
// Extract entities to compare (simplified approach)
|
||||||
|
const entities = this.extractEntitiesForComparison(query);
|
||||||
|
|
||||||
|
if (entities.length >= 2) {
|
||||||
|
// Add sub-queries for each entity
|
||||||
|
entities.forEach(entity => {
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `What are the key characteristics of ${entity}?`,
|
||||||
|
reason: `Getting details about "${entity}" for comparison`,
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add explicit comparison sub-query
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `How do ${entities.join(' and ')} compare in terms of their primary features?`,
|
||||||
|
reason: 'Direct comparison of the entities',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check for "how to" questions
|
||||||
|
else if (query.toLowerCase().includes('how to ')) {
|
||||||
|
const topic = query.replace(/how to /i, '').trim();
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `What are the steps to ${topic}?`,
|
||||||
|
reason: 'Finding procedural information',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `What are common challenges or pitfalls when trying to ${topic}?`,
|
||||||
|
reason: 'Identifying potential difficulties',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Check for "why" questions
|
||||||
|
else if (query.toLowerCase().startsWith('why ')) {
|
||||||
|
const topic = query.replace(/why /i, '').trim();
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `What are the causes of ${topic}?`,
|
||||||
|
reason: 'Identifying causes',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `What evidence supports explanations for ${topic}?`,
|
||||||
|
reason: 'Finding supporting evidence',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Handle "what is" questions
|
||||||
|
else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) {
|
||||||
|
const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `Definition of ${topic}`,
|
||||||
|
reason: 'Getting basic definition',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `Examples of ${topic}`,
|
||||||
|
reason: 'Finding examples',
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no specific sub-queries were added (beyond the original),
|
||||||
|
// generate generic exploratory sub-queries
|
||||||
|
if (subQueries.length <= 1) {
|
||||||
|
// Extract main entities/concepts from the query
|
||||||
|
const concepts = this.extractMainConcepts(query);
|
||||||
|
|
||||||
|
concepts.forEach(concept => {
|
||||||
|
// Don't create recursive or self-referential queries
|
||||||
|
if (!concept.toLowerCase().includes('provide details') &&
|
||||||
|
!concept.toLowerCase().includes('information related')) {
|
||||||
|
subQueries.push({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: `Key information about ${concept}`,
|
||||||
|
reason: `Finding information about "${concept}"`,
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return subQueries;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate text to a maximum length with ellipsis
|
||||||
|
*/
|
||||||
|
private truncateText(text: string, maxLength: number): string {
|
||||||
|
if (text.length <= maxLength) return text;
|
||||||
|
return text.substring(0, maxLength - 3) + '...';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract entities for comparison from a query
|
||||||
|
*
|
||||||
|
* @param query The query to extract entities from
|
||||||
|
* @returns Array of entity strings
|
||||||
|
*/
|
||||||
|
extractEntitiesForComparison(query: string): string[] {
|
||||||
|
// Try to match patterns like "compare X and Y" or "difference between X and Y"
|
||||||
|
const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
|
||||||
|
const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
|
||||||
|
|
||||||
|
let match = query.match(comparePattern) || query.match(vsPattern);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
return [match[1].trim(), match[2].trim()];
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no pattern match, try to extract noun phrases
|
||||||
|
const words = query.split(/\s+/);
|
||||||
|
const potentialEntities = [];
|
||||||
|
let currentPhrase = '';
|
||||||
|
|
||||||
|
for (const word of words) {
|
||||||
|
// Skip common words that are unlikely to be part of entity names
|
||||||
|
if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) {
|
||||||
|
if (currentPhrase.trim()) {
|
||||||
|
potentialEntities.push(currentPhrase.trim());
|
||||||
|
currentPhrase = '';
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPhrase += word + ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentPhrase.trim()) {
|
||||||
|
potentialEntities.push(currentPhrase.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
return potentialEntities.slice(0, 2); // Return at most 2 entities
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract main concepts from a query
|
||||||
|
*
|
||||||
|
* @param query The query to extract concepts from
|
||||||
|
* @returns Array of concept strings
|
||||||
|
*/
|
||||||
|
extractMainConcepts(query: string): string[] {
|
||||||
|
// Remove question words and common stop words
|
||||||
|
const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' ');
|
||||||
|
|
||||||
|
// Split into words and filter out short words
|
||||||
|
const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
|
||||||
|
|
||||||
|
// Count word frequency
|
||||||
|
const wordCounts: Record<string, number> = {};
|
||||||
|
for (const word of words) {
|
||||||
|
wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by frequency
|
||||||
|
const sortedWords = Object.entries(wordCounts)
|
||||||
|
.sort((a, b) => b[1] - a[1])
|
||||||
|
.map(entry => entry[0]);
|
||||||
|
|
||||||
|
// Try to build meaningful phrases around top words
|
||||||
|
const conceptPhrases: string[] = [];
|
||||||
|
|
||||||
|
if (sortedWords.length === 0) {
|
||||||
|
// Fallback if no significant words found
|
||||||
|
return [query.trim()];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the top 2-3 words to form concepts
|
||||||
|
for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
|
||||||
|
const word = sortedWords[i];
|
||||||
|
|
||||||
|
// Try to find the word in the original query and extract a small phrase around it
|
||||||
|
const wordIndex = query.toLowerCase().indexOf(word);
|
||||||
|
if (wordIndex >= 0) {
|
||||||
|
// Extract a window of text around the word (3 words before and after)
|
||||||
|
const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
|
||||||
|
const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
|
||||||
|
|
||||||
|
if (end > start) {
|
||||||
|
conceptPhrases.push(query.substring(start, end).trim());
|
||||||
|
} else {
|
||||||
|
conceptPhrases.push(word);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
conceptPhrases.push(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return conceptPhrases;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Export default instance for compatibility
|
export default QueryDecompositionTool;
|
||||||
export default new QueryDecompositionTool();
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user