mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-07-27 18:12:29 +08:00
maybe
This commit is contained in:
parent
d83cce88cb
commit
daa56b10e8
@ -54,82 +54,126 @@ export class QueryProcessor {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate enhanced search queries for better semantic matching
|
||||
* Generate search queries to find relevant information for the user question
|
||||
*
|
||||
* @param userQuestion - The user's question
|
||||
* @param llmService - The LLM service to use for generating queries, or null to auto-detect
|
||||
* @param llmService - The LLM service to use for generating queries
|
||||
* @returns Array of search queries
|
||||
*/
|
||||
async generateSearchQueries(
|
||||
userQuestion: string,
|
||||
llmService?: LLMServiceInterface
|
||||
): Promise<string[]> {
|
||||
if (!userQuestion || userQuestion.trim() === '') {
|
||||
return []; // Return empty array for empty input
|
||||
}
|
||||
|
||||
async generateSearchQueries(userQuestion: string, llmService: any): Promise<string[]> {
|
||||
try {
|
||||
// Check cache
|
||||
const cacheKey = `searchQueries:${userQuestion}`;
|
||||
const cached = cacheManager.getQueryResults<string[]>(cacheKey);
|
||||
if (cached && Array.isArray(cached)) {
|
||||
return cached;
|
||||
// Check cache first
|
||||
const cached = cacheManager.getQueryResults(`searchQueries:${userQuestion}`);
|
||||
|
||||
const PROMPT = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
|
||||
Given the user's question, generate 3-5 specific search queries that would help find relevant information.
|
||||
Each query should be focused on a different aspect of the question.
|
||||
Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
|
||||
Format your answer as a JSON array of strings, with each string being a search query.
|
||||
Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
|
||||
|
||||
interface Message {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
}
|
||||
|
||||
// Get LLM service if not provided
|
||||
const service = llmService || await this.getLLMService();
|
||||
if (!service) {
|
||||
log.info(`No LLM service available for query enhancement, using original query`);
|
||||
return [userQuestion];
|
||||
}
|
||||
|
||||
// Prepare the prompt with JSON formatting instructions
|
||||
const enhancedPrompt = `${this.enhancerPrompt}
|
||||
IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
|
||||
Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
|
||||
|
||||
const messages = [
|
||||
{ role: "system" as const, content: enhancedPrompt },
|
||||
{ role: "user" as const, content: userQuestion }
|
||||
const messages: Message[] = [
|
||||
{ role: "system", content: PROMPT },
|
||||
{ role: "user", content: userQuestion }
|
||||
];
|
||||
|
||||
const options = {
|
||||
temperature: SEARCH_CONSTANTS.TEMPERATURE.QUERY_PROCESSOR,
|
||||
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
||||
bypassFormatter: true,
|
||||
expectsJsonResponse: true,
|
||||
_bypassContextProcessing: true, // Prevent recursive calls
|
||||
enableTools: false // Explicitly disable tools for this request
|
||||
temperature: 0.3,
|
||||
maxTokens: 300
|
||||
};
|
||||
|
||||
// Get the response from the LLM
|
||||
const response = await service.generateChatCompletion(messages, options);
|
||||
const responseText = response.text;
|
||||
const response = await llmService.generateChatCompletion(messages, options);
|
||||
const responseText = response.text; // Extract the text from the response object
|
||||
|
||||
// Use the JsonExtractor to parse the response
|
||||
const queries = JsonExtractor.extract<string[]>(responseText, {
|
||||
extractArrays: true,
|
||||
minStringLength: 3,
|
||||
applyFixes: true,
|
||||
useFallbacks: true
|
||||
});
|
||||
try {
|
||||
// Remove code blocks, quotes, and clean up the response text
|
||||
let jsonStr = responseText
|
||||
.replace(/```(?:json)?|```/g, '') // Remove code block markers
|
||||
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
|
||||
.trim();
|
||||
|
||||
if (queries && queries.length > 0) {
|
||||
log.info(`Extracted ${queries.length} queries using JsonExtractor`);
|
||||
cacheManager.storeQueryResults(cacheKey, queries);
|
||||
return queries;
|
||||
// Check if the text might contain a JSON array (has square brackets)
|
||||
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||
// Extract just the array part if there's explanatory text
|
||||
const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
|
||||
if (arrayMatch) {
|
||||
jsonStr = arrayMatch[0];
|
||||
}
|
||||
|
||||
// Try to parse the JSON
|
||||
try {
|
||||
const queries = JSON.parse(jsonStr);
|
||||
if (Array.isArray(queries) && queries.length > 0) {
|
||||
const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
|
||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||
return result;
|
||||
}
|
||||
} catch (innerError) {
|
||||
// If parsing fails, log it and continue to the fallback
|
||||
log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback 1: Try to extract an array manually by splitting on commas between quotes
|
||||
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||
const arrayContent = jsonStr.substring(
|
||||
jsonStr.indexOf('[') + 1,
|
||||
jsonStr.lastIndexOf(']')
|
||||
);
|
||||
|
||||
// Use regex to match quoted strings, handling escaped quotes
|
||||
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
|
||||
if (stringMatches && stringMatches.length > 0) {
|
||||
const result = stringMatches
|
||||
.map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
|
||||
.filter((s: string) => s.length > 0);
|
||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback 2: Extract queries line by line
|
||||
const lines = responseText.split('\n')
|
||||
.map((line: string) => line.trim())
|
||||
.filter((line: string) =>
|
||||
line.length > 0 &&
|
||||
!line.startsWith('```') &&
|
||||
!line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
|
||||
!line.match(/^\[|\]$/) // Skip lines that are just brackets
|
||||
);
|
||||
|
||||
if (lines.length > 0) {
|
||||
// Remove numbering, quotes and other list markers from each line
|
||||
const result = lines.map((line: string) => {
|
||||
return line
|
||||
.replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
|
||||
.replace(/^[-*•]\s*/, '') // Remove bullet list markers
|
||||
.replace(/^["']|["']$/g, '') // Remove surrounding quotes
|
||||
.trim();
|
||||
}).filter((s: string) => s.length > 0);
|
||||
|
||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||
return result;
|
||||
}
|
||||
} catch (parseError) {
|
||||
log.error(`Error parsing search queries: ${parseError}`);
|
||||
}
|
||||
|
||||
// Fallback to original question
|
||||
// If all else fails, just use the original question
|
||||
const fallback = [userQuestion];
|
||||
log.info(`No queries extracted, using fallback: "${userQuestion}"`);
|
||||
cacheManager.storeQueryResults(cacheKey, fallback);
|
||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
|
||||
return fallback;
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
log.error(`Error generating search queries: ${errorMessage}`);
|
||||
// Fallback to just using the original question
|
||||
return [userQuestion];
|
||||
}
|
||||
}
|
||||
@ -161,40 +205,38 @@ Format your answer as a valid JSON array without markdown code blocks, like this
|
||||
};
|
||||
}
|
||||
|
||||
// Assess query complexity
|
||||
const complexity = this.assessQueryComplexity(query);
|
||||
log.info(`Query complexity assessment: ${complexity}/10`);
|
||||
// Simple assessment of query complexity
|
||||
const complexity = query.length > 100 ? 5 : 3;
|
||||
|
||||
// Try to get LLM service if not provided
|
||||
// Get LLM service if not provided
|
||||
const service = llmService || await this.getLLMService();
|
||||
|
||||
// If no LLM service is available, use basic decomposition
|
||||
// If no LLM service is available, use original query
|
||||
if (!service) {
|
||||
log.info(`No LLM service available for query decomposition, using original query`);
|
||||
return this.createBasicDecomposition(query, complexity);
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [{
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: "Original query",
|
||||
isAnswered: false
|
||||
}],
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
}
|
||||
|
||||
// With LLM service available, always use advanced decomposition regardless of complexity
|
||||
try {
|
||||
log.info(`Using advanced LLM-based decomposition for query (complexity: ${complexity})`);
|
||||
const enhancedSubQueries = await this.createLLMSubQueries(query, context, service);
|
||||
// Make a simple request to decompose the query
|
||||
const result = await this.simpleQueryDecomposition(query, service, context);
|
||||
|
||||
if (enhancedSubQueries && enhancedSubQueries.length > 0) {
|
||||
log.info(`LLM decomposed query into ${enhancedSubQueries.length} sub-queries`);
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: enhancedSubQueries,
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
}
|
||||
} catch (error: any) {
|
||||
log.error(`Error during LLM-based decomposition: ${error.message}, falling back to basic decomposition`);
|
||||
// Fall through to basic decomposition
|
||||
}
|
||||
|
||||
// Fallback to basic decomposition
|
||||
return this.createBasicDecomposition(query, complexity);
|
||||
// Return the result
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: result,
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
} catch (error: any) {
|
||||
log.error(`Error decomposing query: ${error.message}`);
|
||||
|
||||
@ -214,361 +256,160 @@ Format your answer as a valid JSON array without markdown code blocks, like this
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a basic decomposition of a query without using LLM
|
||||
* Simple LLM-based query decomposition
|
||||
*
|
||||
* @param query The original query
|
||||
* @param complexity The assessed complexity
|
||||
* @returns A basic decomposed query
|
||||
*/
|
||||
private createBasicDecomposition(query: string, complexity: number): DecomposedQuery {
|
||||
log.info(`Using basic decomposition approach (complexity: ${complexity})`);
|
||||
|
||||
const mainSubQuery = {
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: "Direct question that can be answered without decomposition",
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
// Add a generic exploration query for context
|
||||
const genericQuery = {
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What information is related to ${query}?`,
|
||||
reason: "General exploration to find related content",
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [mainSubQuery, genericQuery],
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Use LLM to create advanced sub-queries from a complex query
|
||||
*
|
||||
* @param query The original complex query
|
||||
* @param query The original query to decompose
|
||||
* @param llmService LLM service to use
|
||||
* @param context Optional context to help with decomposition
|
||||
* @param llmService LLM service to use for advanced decomposition
|
||||
* @returns Array of sub-queries
|
||||
*/
|
||||
private async createLLMSubQueries(
|
||||
private async simpleQueryDecomposition(
|
||||
query: string,
|
||||
context?: string,
|
||||
llmService?: LLMServiceInterface
|
||||
llmService: LLMServiceInterface,
|
||||
context?: string
|
||||
): Promise<SubQuery[]> {
|
||||
// If no LLM service, use basic decomposition
|
||||
if (!llmService) {
|
||||
return this.createSubQueries(query, context);
|
||||
}
|
||||
|
||||
try {
|
||||
// Create a much better prompt for more effective query decomposition
|
||||
const prompt = `Decompose the following query into 3-5 specific search queries that would help find comprehensive information.
|
||||
// Create a simple prompt for query decomposition
|
||||
const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.
|
||||
|
||||
Your task is to identify the main concepts and break them down into specific, targeted search queries.
|
||||
Your goal is to help find comprehensive information by breaking down the query into multiple search terms.
|
||||
|
||||
DO NOT simply rephrase the original query or create a generic "what's related to X" pattern.
|
||||
DO create specific queries that explore different aspects of the topic.
|
||||
IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.
|
||||
|
||||
For example:
|
||||
If the query is "How does Docker compare to Kubernetes?", good sub-queries would be:
|
||||
- "Docker container architecture and features"
|
||||
- "Kubernetes container orchestration capabilities"
|
||||
- "Docker vs Kubernetes performance comparison"
|
||||
- "When to use Docker versus Kubernetes"
|
||||
For example, if the query is "What are Docker containers?", good sub-queries would be:
|
||||
1. "Docker container architecture and components"
|
||||
2. "Docker vs virtual machines differences"
|
||||
3. "Docker container use cases and benefits"
|
||||
4. "Docker container deployment best practices"
|
||||
|
||||
Format your response as a JSON array of objects with 'text' and 'reason' properties.
|
||||
Example: [
|
||||
{"text": "Docker container architecture", "reason": "Understanding Docker's core technology"},
|
||||
{"text": "Kubernetes orchestration features", "reason": "Exploring Kubernetes' main capabilities"}
|
||||
{"text": "Docker container architecture", "reason": "Understanding the technical structure"},
|
||||
{"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
|
||||
{"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
|
||||
{"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
|
||||
]
|
||||
|
||||
${context ? `\nContext: ${context}` : ''}
|
||||
|
||||
Query: ${query}`;
|
||||
|
||||
log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
|
||||
|
||||
const messages = [
|
||||
{ role: "system" as const, content: prompt }
|
||||
];
|
||||
|
||||
const options = {
|
||||
temperature: 0.7, // Higher temperature for more creative decomposition
|
||||
temperature: 0.7,
|
||||
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
||||
bypassFormatter: true,
|
||||
expectsJsonResponse: true,
|
||||
_bypassContextProcessing: true, // Prevent recursive calls
|
||||
enableTools: false // Explicitly disable tools for this request
|
||||
_bypassContextProcessing: true,
|
||||
enableTools: false
|
||||
};
|
||||
|
||||
// Get the response from the LLM
|
||||
const response = await llmService.generateChatCompletion(messages, options);
|
||||
const responseText = response.text;
|
||||
|
||||
// Try to extract structured sub-queries from the response
|
||||
log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
|
||||
|
||||
// Try to parse the response as JSON
|
||||
let subQueries: SubQuery[] = [];
|
||||
try {
|
||||
// Expected format is an array of objects with "text" and "reason" keys
|
||||
interface RawSubQuery {
|
||||
text: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
// Log the response for debugging
|
||||
log.info(`Received response from LLM for query decomposition, extracting JSON...`);
|
||||
|
||||
log.info(`Response: ${responseText}`);
|
||||
|
||||
// Extract JSON from the response
|
||||
const extractedData = JsonExtractor.extract<RawSubQuery[]>(responseText, {
|
||||
// Extract the JSON from the response
|
||||
const extractedJson = JsonExtractor.extract(responseText, {
|
||||
extractArrays: true,
|
||||
applyFixes: true,
|
||||
useFallbacks: true
|
||||
});
|
||||
|
||||
// Validate the extracted data
|
||||
if (!Array.isArray(extractedData)) {
|
||||
log.error(`Failed to extract array from LLM response, got: ${typeof extractedData}`);
|
||||
return this.createSubQueries(query, context);
|
||||
log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
|
||||
|
||||
if (Array.isArray(extractedJson) && extractedJson.length > 0) {
|
||||
// Convert the extracted data to SubQuery objects
|
||||
subQueries = extractedJson
|
||||
.filter(item => item && typeof item === 'object' && item.text)
|
||||
.map(item => ({
|
||||
id: this.generateSubQueryId(),
|
||||
text: item.text,
|
||||
reason: item.reason || "Sub-aspect of the main question",
|
||||
isAnswered: false
|
||||
}));
|
||||
|
||||
log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
|
||||
} else {
|
||||
log.info(`Failed to extract array of sub-queries from LLM response`);
|
||||
}
|
||||
} catch (error) {
|
||||
log.error(`Error parsing LLM response: ${error}`);
|
||||
}
|
||||
|
||||
if (extractedData.length === 0) {
|
||||
log.error(`Extracted array is empty, falling back to basic decomposition`);
|
||||
return this.createSubQueries(query, context);
|
||||
}
|
||||
|
||||
log.info(`Successfully extracted ${extractedData.length} items using regex pattern`);
|
||||
|
||||
// Validate each sub-query to ensure it has a text property
|
||||
const validSubQueries = extractedData.filter(item => {
|
||||
if (!item || typeof item !== 'object') {
|
||||
log.error(`Invalid sub-query item: ${JSON.stringify(item)}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!item.text || typeof item.text !== 'string') {
|
||||
log.error(`Sub-query missing text property: ${JSON.stringify(item)}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (validSubQueries.length === 0) {
|
||||
log.error(`No valid sub-queries found after validation, falling back to basic decomposition`);
|
||||
return this.createSubQueries(query, context);
|
||||
}
|
||||
|
||||
if (validSubQueries.length < extractedData.length) {
|
||||
log.info(`Some invalid sub-queries were filtered out: ${extractedData.length} -> ${validSubQueries.length}`);
|
||||
}
|
||||
|
||||
// Convert the raw data to SubQuery objects
|
||||
let subQueries = validSubQueries.map(item => ({
|
||||
// Always include the original query
|
||||
const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
|
||||
if (!hasOriginal) {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: item.text,
|
||||
reason: item.reason || "Sub-aspect of the main question",
|
||||
text: query,
|
||||
reason: "Original query",
|
||||
isAnswered: false
|
||||
}));
|
||||
|
||||
// Make sure we have at least the original query
|
||||
const hasOriginalQuery = subQueries.some(sq => {
|
||||
// Check if either sq.text or query is null/undefined before using toLowerCase
|
||||
if (!sq.text) return false;
|
||||
const sqText = sq.text.toLowerCase();
|
||||
const originalQuery = query.toLowerCase();
|
||||
|
||||
return sqText.includes(originalQuery) || originalQuery.includes(sqText);
|
||||
});
|
||||
log.info(`Added original query to sub-queries list`);
|
||||
}
|
||||
|
||||
if (!hasOriginalQuery) {
|
||||
subQueries.unshift({
|
||||
// Ensure we have at least 3 queries for better search coverage
|
||||
if (subQueries.length < 3) {
|
||||
// Create some generic variants of the original query
|
||||
const genericVariants = [
|
||||
{ text: `${query} examples and use cases`, reason: "Practical applications" },
|
||||
{ text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
|
||||
{ text: `${query} best practices`, reason: "Implementation guidance" }
|
||||
];
|
||||
|
||||
// Add variants until we have at least 3 queries
|
||||
for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: "Original query",
|
||||
text: genericVariants[i].text,
|
||||
reason: genericVariants[i].reason,
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
|
||||
// Log the extracted sub-queries for debugging
|
||||
log.info(`Successfully extracted ${subQueries.length} sub-queries from LLM response`);
|
||||
|
||||
return subQueries;
|
||||
} catch (error: any) {
|
||||
log.error(`Error extracting sub-queries from LLM response: ${error.message}`);
|
||||
// Fall through to traditional decomposition
|
||||
log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
|
||||
}
|
||||
|
||||
// Fallback to traditional decomposition
|
||||
return this.createSubQueries(query, context);
|
||||
} catch (error: any) {
|
||||
log.error(`Error in createLLMSubQueries: ${error.message}`);
|
||||
return this.createSubQueries(query, context);
|
||||
}
|
||||
}
|
||||
log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
||||
return subQueries;
|
||||
} catch (error) {
|
||||
log.error(`Error in simpleQueryDecomposition: ${error}`);
|
||||
|
||||
/**
|
||||
* Create sub-queries from a complex query
|
||||
*
|
||||
* @param query The original complex query
|
||||
* @param context Optional context to help with decomposition
|
||||
* @returns Array of sub-queries
|
||||
*/
|
||||
private createSubQueries(query: string, context?: string): SubQuery[] {
|
||||
// Analyze the query to identify potential aspects to explore
|
||||
const questionParts = this.identifyQuestionParts(query);
|
||||
const subQueries: SubQuery[] = [];
|
||||
|
||||
// Add the main query as the first sub-query
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: "Main question (for direct matching)",
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
// Add sub-queries for each identified question part
|
||||
for (const part of questionParts) {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: part,
|
||||
reason: "Sub-aspect of the main question",
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
|
||||
// Add a generic exploration query to find related information
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What information is related to ${query}?`,
|
||||
reason: "General exploration to find related content",
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
// If we have context, add a specific query for that context
|
||||
if (context) {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `How does "${context}" relate to ${query}?`,
|
||||
reason: "Contextual relationship exploration",
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
|
||||
return subQueries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Identify parts of a complex question that could be individual sub-questions
|
||||
*
|
||||
* @param query The complex query to analyze
|
||||
* @returns Array of potential sub-questions
|
||||
*/
|
||||
private identifyQuestionParts(query: string): string[] {
|
||||
const parts: string[] = [];
|
||||
|
||||
// Check for multiple question marks
|
||||
const questionSentences = query.split(/(?<=\?)/).filter(s => s.includes('?'));
|
||||
if (questionSentences.length > 1) {
|
||||
// Multiple explicit questions detected
|
||||
return questionSentences.map(s => s.trim());
|
||||
}
|
||||
|
||||
// Check for conjunctions that might separate multiple questions
|
||||
const conjunctions = ['and', 'or', 'but', 'plus', 'also'];
|
||||
for (const conjunction of conjunctions) {
|
||||
const pattern = new RegExp(`\\b${conjunction}\\b`, 'i');
|
||||
if (pattern.test(query)) {
|
||||
// Split by conjunction and check if each part could be a question
|
||||
const splitParts = query.split(pattern);
|
||||
for (const part of splitParts) {
|
||||
const trimmed = part.trim();
|
||||
if (trimmed.length > 10) { // Avoid tiny fragments
|
||||
parts.push(trimmed);
|
||||
}
|
||||
// Return the original query plus some variants as fallback
|
||||
const fallbackQueries = [
|
||||
{
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: "Original query",
|
||||
isAnswered: false
|
||||
},
|
||||
{
|
||||
id: this.generateSubQueryId(),
|
||||
text: `${query} overview`,
|
||||
reason: "General information",
|
||||
isAnswered: false
|
||||
},
|
||||
{
|
||||
id: this.generateSubQueryId(),
|
||||
text: `${query} examples`,
|
||||
reason: "Practical examples",
|
||||
isAnswered: false
|
||||
}
|
||||
if (parts.length > 0) {
|
||||
return parts;
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
||||
return fallbackQueries;
|
||||
}
|
||||
|
||||
// Check for comparison indicators
|
||||
const comparisonTerms = ['compare', 'difference', 'differences', 'versus', 'vs'];
|
||||
for (const term of comparisonTerms) {
|
||||
if (query.toLowerCase().includes(term)) {
|
||||
// This is likely a comparison question, extract the items being compared
|
||||
const beforeAfter = query.split(new RegExp(`\\b${term}\\b`, 'i'));
|
||||
if (beforeAfter.length === 2) {
|
||||
// Try to extract compared items
|
||||
const aspects = this.extractComparisonAspects(beforeAfter[0], beforeAfter[1]);
|
||||
if (aspects.length > 0) {
|
||||
for (const aspect of aspects) {
|
||||
parts.push(`What are the key points about ${aspect}?`);
|
||||
}
|
||||
parts.push(`What are the differences between ${aspects.join(' and ')}?`);
|
||||
return parts;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for "multiple aspects" questions
|
||||
const aspectPatterns = [
|
||||
/what (?:are|is) the (\w+) (?:of|about|for|in) /i,
|
||||
/how (?:to|do|does|can) .+ (\w+)/i
|
||||
];
|
||||
|
||||
for (const pattern of aspectPatterns) {
|
||||
const match = query.match(pattern);
|
||||
if (match && match[1]) {
|
||||
const aspect = match[1];
|
||||
parts.push(`What is the ${aspect}?`);
|
||||
parts.push(`How does ${aspect} relate to the main topic?`);
|
||||
}
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract items being compared from a comparison question
|
||||
*
|
||||
* @param before Text before the comparison term
|
||||
* @param after Text after the comparison term
|
||||
* @returns Array of items being compared
|
||||
*/
|
||||
private extractComparisonAspects(before: string, after: string): string[] {
|
||||
const aspects: string[] = [];
|
||||
|
||||
// Look for "between A and B" pattern
|
||||
const betweenMatch = after.match(/between (.+?) and (.+?)(?:\?|$)/i);
|
||||
if (betweenMatch) {
|
||||
aspects.push(betweenMatch[1].trim());
|
||||
aspects.push(betweenMatch[2].trim());
|
||||
return aspects;
|
||||
}
|
||||
|
||||
// Look for A vs B pattern
|
||||
const directComparison = after.match(/(.+?) (?:and|vs|versus) (.+?)(?:\?|$)/i);
|
||||
if (directComparison) {
|
||||
aspects.push(directComparison[1].trim());
|
||||
aspects.push(directComparison[2].trim());
|
||||
return aspects;
|
||||
}
|
||||
|
||||
// Fall back to looking for named entities or key terms in both parts
|
||||
const beforeTerms = before.match(/(\w+(?:\s+\w+){0,2})/g) || [];
|
||||
const afterTerms = after.match(/(\w+(?:\s+\w+){0,2})/g) || [];
|
||||
|
||||
// Look for substantial terms (longer than 3 chars)
|
||||
const candidateTerms = [...beforeTerms, ...afterTerms]
|
||||
.filter(term => term.length > 3)
|
||||
.map(term => term.trim());
|
||||
|
||||
// Take up to 2 distinct terms
|
||||
return [...new Set(candidateTerms)].slice(0, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,17 +1,38 @@
|
||||
/**
|
||||
* Query Decomposition Tool - Compatibility Layer
|
||||
* Query Decomposition Tool
|
||||
*
|
||||
* This file provides backward compatibility with the new consolidated
|
||||
* query_processor.js implementation.
|
||||
* This tool helps the LLM agent break down complex user queries into
|
||||
* sub-questions that can be answered individually and then synthesized
|
||||
* into a comprehensive response.
|
||||
*
|
||||
* Features:
|
||||
* - Analyze query complexity
|
||||
* - Extract multiple intents from a single question
|
||||
* - Create a multi-stage research plan
|
||||
* - Track progress through complex information gathering
|
||||
*/
|
||||
|
||||
import log from '../../log.js';
|
||||
import queryProcessor from '../context/services/query_processor.js';
|
||||
import type { SubQuery, DecomposedQuery } from '../context/services/query_processor.js';
|
||||
import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';
|
||||
|
||||
export type { SubQuery, DecomposedQuery };
|
||||
export interface SubQuery {
|
||||
id: string;
|
||||
text: string;
|
||||
reason: string;
|
||||
isAnswered: boolean;
|
||||
answer?: string;
|
||||
}
|
||||
|
||||
export interface DecomposedQuery {
|
||||
originalQuery: string;
|
||||
subQueries: SubQuery[];
|
||||
status: 'pending' | 'in_progress' | 'completed';
|
||||
complexity: number;
|
||||
}
|
||||
|
||||
export class QueryDecompositionTool {
|
||||
private static queryCounter: number = 0;
|
||||
|
||||
/**
|
||||
* Break down a complex query into smaller, more manageable sub-queries
|
||||
*
|
||||
@ -20,54 +41,83 @@ export class QueryDecompositionTool {
|
||||
* @returns A decomposed query object with sub-queries
|
||||
*/
|
||||
decomposeQuery(query: string, context?: string): DecomposedQuery {
|
||||
log.info('Using compatibility layer for QueryDecompositionTool.decomposeQuery');
|
||||
try {
|
||||
// Log the decomposition attempt for tracking
|
||||
log.info(`Decomposing query: "${query.substring(0, 100)}..."`);
|
||||
|
||||
// Since the main implementation is now async but we need to maintain a sync interface,
|
||||
// we'll use a simpler approach that doesn't require LLM
|
||||
if (!query || query.trim().length === 0) {
|
||||
log.info("Query decomposition called with empty query");
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [],
|
||||
status: 'pending',
|
||||
complexity: 0
|
||||
};
|
||||
}
|
||||
|
||||
// Get the complexity to determine approach
|
||||
const complexity = queryProcessor.assessQueryComplexity(query);
|
||||
// Assess query complexity to determine if decomposition is needed
|
||||
const complexity = this.assessQueryComplexity(query);
|
||||
log.info(`Query complexity assessment: ${complexity}/10`);
|
||||
|
||||
// For simple queries, just return the original as a single sub-query
|
||||
// Use a lower threshold (2 instead of 3) to decompose more queries
|
||||
if (complexity < 2) {
|
||||
log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`);
|
||||
|
||||
const mainSubQuery = {
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
// Still add a generic exploration query to get some related content
|
||||
const genericQuery = {
|
||||
id: this.generateSubQueryId(),
|
||||
text: `Information related to ${query}`,
|
||||
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [mainSubQuery, genericQuery],
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
}
|
||||
|
||||
// For complex queries, perform decomposition
|
||||
const subQueries = this.createSubQueries(query, context);
|
||||
log.info(`Decomposed query into ${subQueries.length} sub-queries`);
|
||||
|
||||
// Log the sub-queries for better visibility
|
||||
subQueries.forEach((sq, index) => {
|
||||
log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`);
|
||||
});
|
||||
|
||||
if (!query || query.trim().length === 0) {
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [],
|
||||
subQueries,
|
||||
status: 'pending',
|
||||
complexity: 0
|
||||
complexity
|
||||
};
|
||||
} catch (error: any) {
|
||||
log.error(`Error decomposing query: ${error.message}`);
|
||||
|
||||
// Fallback to treating it as a simple query
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: [{
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
|
||||
isAnswered: false
|
||||
}],
|
||||
status: 'pending',
|
||||
complexity: 1
|
||||
};
|
||||
}
|
||||
|
||||
// Create a baseline decomposed query
|
||||
let subQueries = [];
|
||||
|
||||
// For compatibility, we'll use the basic SubQuery generation
|
||||
// This avoids the async LLM call which would break the sync interface
|
||||
const mainSubQuery = {
|
||||
id: `sq_${Date.now()}_sync_0`,
|
||||
text: query,
|
||||
reason: "Main question (for direct matching)",
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
subQueries.push(mainSubQuery);
|
||||
|
||||
// Add a generic exploration query for context
|
||||
const genericQuery = {
|
||||
id: `sq_${Date.now()}_sync_1`,
|
||||
text: `What information is related to ${query}?`,
|
||||
reason: "General exploration to find related content",
|
||||
isAnswered: false
|
||||
};
|
||||
|
||||
subQueries.push(genericQuery);
|
||||
|
||||
// Simplified implementation that doesn't require async/LLM calls
|
||||
return {
|
||||
originalQuery: query,
|
||||
subQueries: subQueries,
|
||||
status: 'pending',
|
||||
complexity
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@ -83,8 +133,25 @@ export class QueryDecompositionTool {
|
||||
subQueryId: string,
|
||||
answer: string
|
||||
): DecomposedQuery {
|
||||
log.info('Using compatibility layer for QueryDecompositionTool.updateSubQueryAnswer');
|
||||
return queryProcessor.updateSubQueryAnswer(decomposedQuery, subQueryId, answer);
|
||||
const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
|
||||
if (sq.id === subQueryId) {
|
||||
return {
|
||||
...sq,
|
||||
answer,
|
||||
isAnswered: true
|
||||
};
|
||||
}
|
||||
return sq;
|
||||
});
|
||||
|
||||
// Check if all sub-queries are answered
|
||||
const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
|
||||
|
||||
return {
|
||||
...decomposedQuery,
|
||||
subQueries: updatedSubQueries,
|
||||
status: allAnswered ? 'completed' : 'in_progress'
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@ -94,8 +161,40 @@ export class QueryDecompositionTool {
|
||||
* @returns A synthesized answer to the original query
|
||||
*/
|
||||
synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
|
||||
log.info('Using compatibility layer for QueryDecompositionTool.synthesizeAnswer');
|
||||
return queryProcessor.synthesizeAnswer(decomposedQuery);
|
||||
try {
|
||||
// Ensure all sub-queries are answered
|
||||
if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
|
||||
return "Cannot synthesize answer - not all sub-queries have been answered.";
|
||||
}
|
||||
|
||||
// For simple queries with just one sub-query, return the answer directly
|
||||
if (decomposedQuery.subQueries.length === 1) {
|
||||
return decomposedQuery.subQueries[0].answer || "";
|
||||
}
|
||||
|
||||
// For complex queries, build a structured response that references each sub-answer
|
||||
let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`;
|
||||
|
||||
// Group by themes if there are many sub-queries
|
||||
if (decomposedQuery.subQueries.length > 3) {
|
||||
// Here we would ideally group related sub-queries, but for now we'll just present them in order
|
||||
synthesized += "Based on the information gathered:\n\n";
|
||||
|
||||
for (const sq of decomposedQuery.subQueries) {
|
||||
synthesized += `${sq.answer}\n\n`;
|
||||
}
|
||||
} else {
|
||||
// For fewer sub-queries, present each one with its question
|
||||
for (const sq of decomposedQuery.subQueries) {
|
||||
synthesized += `${sq.answer}\n\n`;
|
||||
}
|
||||
}
|
||||
|
||||
return synthesized.trim();
|
||||
} catch (error: any) {
|
||||
log.error(`Error synthesizing answer: ${error.message}`);
|
||||
return "Error synthesizing the final answer.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -105,10 +204,6 @@ export class QueryDecompositionTool {
|
||||
* @returns A status report string
|
||||
*/
|
||||
getQueryStatus(decomposedQuery: DecomposedQuery): string {
|
||||
log.info('Using compatibility layer for QueryDecompositionTool.getQueryStatus');
|
||||
// This method doesn't exist directly in the new implementation
|
||||
// We'll implement a simple fallback
|
||||
|
||||
const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
|
||||
const totalCount = decomposedQuery.subQueries.length;
|
||||
|
||||
@ -116,10 +211,9 @@ export class QueryDecompositionTool {
|
||||
|
||||
for (const sq of decomposedQuery.subQueries) {
|
||||
status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`;
|
||||
if (sq.isAnswered && sq.answer) {
|
||||
status += `Answer: ${sq.answer.substring(0, 100)}${sq.answer.length > 100 ? '...' : ''}\n`;
|
||||
if (sq.isAnswered) {
|
||||
status += ` Answer: ${this.truncateText(sq.answer || "", 100)}\n`;
|
||||
}
|
||||
status += '\n';
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -127,15 +221,302 @@ export class QueryDecompositionTool {
|
||||
|
||||
/**
|
||||
* Assess the complexity of a query on a scale of 1-10
|
||||
* This helps determine how many sub-queries are needed
|
||||
*
|
||||
* @param query The query to assess
|
||||
* @returns A complexity score from 1-10
|
||||
*/
|
||||
assessQueryComplexity(query: string): number {
|
||||
log.info('Using compatibility layer for QueryDecompositionTool.assessQueryComplexity');
|
||||
return queryProcessor.assessQueryComplexity(query);
|
||||
// Count the number of question marks as a basic indicator
|
||||
const questionMarkCount = (query.match(/\?/g) || []).length;
|
||||
|
||||
// Count potential sub-questions based on question words
|
||||
const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which'];
|
||||
const questionWordMatches = questionWords.map(word => {
|
||||
const regex = new RegExp(`\\b${word}\\b`, 'gi');
|
||||
return (query.match(regex) || []).length;
|
||||
});
|
||||
|
||||
const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
|
||||
|
||||
// Look for conjunctions which might join multiple questions
|
||||
const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length;
|
||||
|
||||
// Look for complex requirements
|
||||
const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length;
|
||||
const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length;
|
||||
|
||||
// Calculate base complexity
|
||||
let complexity = 1;
|
||||
|
||||
// Add for multiple questions
|
||||
complexity += Math.min(2, questionMarkCount);
|
||||
|
||||
// Add for question words beyond the first one
|
||||
complexity += Math.min(2, Math.max(0, questionWordCount - 1));
|
||||
|
||||
// Add for conjunctions that might join questions
|
||||
complexity += Math.min(2, conjunctionCount);
|
||||
|
||||
// Add for comparative/analytical requirements
|
||||
complexity += Math.min(2, comparisonCount + analysisCount);
|
||||
|
||||
// Add for overall length/complexity
|
||||
if (query.length > 100) complexity += 1;
|
||||
if (query.length > 200) complexity += 1;
|
||||
|
||||
// Ensure we stay in the 1-10 range
|
||||
return Math.max(1, Math.min(10, complexity));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique ID for a sub-query
|
||||
*/
|
||||
generateSubQueryId(): string {
|
||||
return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create sub-queries based on the original query
|
||||
*/
|
||||
createSubQueries(query: string, context?: string): SubQuery[] {
|
||||
// Simple rules to create sub-queries based on query content
|
||||
const subQueries: SubQuery[] = [];
|
||||
|
||||
// Avoid creating subqueries that start with "Provide details about" or similar
|
||||
// as these have been causing recursive loops
|
||||
if (query.toLowerCase().includes("provide details about") ||
|
||||
query.toLowerCase().includes("information related to")) {
|
||||
log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`);
|
||||
return [{
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
|
||||
isAnswered: false
|
||||
}];
|
||||
}
|
||||
|
||||
// First, add the original query as a sub-query (always)
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: query,
|
||||
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
// Check for "compare", "difference", "versus" to identify comparison questions
|
||||
if (
|
||||
query.toLowerCase().includes('compare') ||
|
||||
query.toLowerCase().includes('difference between') ||
|
||||
query.toLowerCase().includes(' vs ') ||
|
||||
query.toLowerCase().includes('versus')
|
||||
) {
|
||||
// Extract entities to compare (simplified approach)
|
||||
const entities = this.extractEntitiesForComparison(query);
|
||||
|
||||
if (entities.length >= 2) {
|
||||
// Add sub-queries for each entity
|
||||
entities.forEach(entity => {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What are the key characteristics of ${entity}?`,
|
||||
reason: `Getting details about "${entity}" for comparison`,
|
||||
isAnswered: false
|
||||
});
|
||||
});
|
||||
|
||||
// Add explicit comparison sub-query
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `How do ${entities.join(' and ')} compare in terms of their primary features?`,
|
||||
reason: 'Direct comparison of the entities',
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
}
|
||||
// Check for "how to" questions
|
||||
else if (query.toLowerCase().includes('how to ')) {
|
||||
const topic = query.replace(/how to /i, '').trim();
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What are the steps to ${topic}?`,
|
||||
reason: 'Finding procedural information',
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What are common challenges or pitfalls when trying to ${topic}?`,
|
||||
reason: 'Identifying potential difficulties',
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
// Check for "why" questions
|
||||
else if (query.toLowerCase().startsWith('why ')) {
|
||||
const topic = query.replace(/why /i, '').trim();
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What are the causes of ${topic}?`,
|
||||
reason: 'Identifying causes',
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `What evidence supports explanations for ${topic}?`,
|
||||
reason: 'Finding supporting evidence',
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
// Handle "what is" questions
|
||||
else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) {
|
||||
const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `Definition of ${topic}`,
|
||||
reason: 'Getting basic definition',
|
||||
isAnswered: false
|
||||
});
|
||||
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `Examples of ${topic}`,
|
||||
reason: 'Finding examples',
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
|
||||
// If no specific sub-queries were added (beyond the original),
|
||||
// generate generic exploratory sub-queries
|
||||
if (subQueries.length <= 1) {
|
||||
// Extract main entities/concepts from the query
|
||||
const concepts = this.extractMainConcepts(query);
|
||||
|
||||
concepts.forEach(concept => {
|
||||
// Don't create recursive or self-referential queries
|
||||
if (!concept.toLowerCase().includes('provide details') &&
|
||||
!concept.toLowerCase().includes('information related')) {
|
||||
subQueries.push({
|
||||
id: this.generateSubQueryId(),
|
||||
text: `Key information about ${concept}`,
|
||||
reason: `Finding information about "${concept}"`,
|
||||
isAnswered: false
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return subQueries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate text to a maximum length with ellipsis
|
||||
*/
|
||||
private truncateText(text: string, maxLength: number): string {
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength - 3) + '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract entities for comparison from a query
|
||||
*
|
||||
* @param query The query to extract entities from
|
||||
* @returns Array of entity strings
|
||||
*/
|
||||
extractEntitiesForComparison(query: string): string[] {
|
||||
// Try to match patterns like "compare X and Y" or "difference between X and Y"
|
||||
const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
|
||||
const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
|
||||
|
||||
let match = query.match(comparePattern) || query.match(vsPattern);
|
||||
|
||||
if (match) {
|
||||
return [match[1].trim(), match[2].trim()];
|
||||
}
|
||||
|
||||
// If no pattern match, try to extract noun phrases
|
||||
const words = query.split(/\s+/);
|
||||
const potentialEntities = [];
|
||||
let currentPhrase = '';
|
||||
|
||||
for (const word of words) {
|
||||
// Skip common words that are unlikely to be part of entity names
|
||||
if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) {
|
||||
if (currentPhrase.trim()) {
|
||||
potentialEntities.push(currentPhrase.trim());
|
||||
currentPhrase = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
currentPhrase += word + ' ';
|
||||
}
|
||||
|
||||
if (currentPhrase.trim()) {
|
||||
potentialEntities.push(currentPhrase.trim());
|
||||
}
|
||||
|
||||
return potentialEntities.slice(0, 2); // Return at most 2 entities
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract main concepts from a query
|
||||
*
|
||||
* @param query The query to extract concepts from
|
||||
* @returns Array of concept strings
|
||||
*/
|
||||
extractMainConcepts(query: string): string[] {
|
||||
// Remove question words and common stop words
|
||||
const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' ');
|
||||
|
||||
// Split into words and filter out short words
|
||||
const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
|
||||
|
||||
// Count word frequency
|
||||
const wordCounts: Record<string, number> = {};
|
||||
for (const word of words) {
|
||||
wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
|
||||
}
|
||||
|
||||
// Sort by frequency
|
||||
const sortedWords = Object.entries(wordCounts)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.map(entry => entry[0]);
|
||||
|
||||
// Try to build meaningful phrases around top words
|
||||
const conceptPhrases: string[] = [];
|
||||
|
||||
if (sortedWords.length === 0) {
|
||||
// Fallback if no significant words found
|
||||
return [query.trim()];
|
||||
}
|
||||
|
||||
// Use the top 2-3 words to form concepts
|
||||
for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
|
||||
const word = sortedWords[i];
|
||||
|
||||
// Try to find the word in the original query and extract a small phrase around it
|
||||
const wordIndex = query.toLowerCase().indexOf(word);
|
||||
if (wordIndex >= 0) {
|
||||
// Extract a window of text around the word (3 words before and after)
|
||||
const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
|
||||
const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
|
||||
|
||||
if (end > start) {
|
||||
conceptPhrases.push(query.substring(start, end).trim());
|
||||
} else {
|
||||
conceptPhrases.push(word);
|
||||
}
|
||||
} else {
|
||||
conceptPhrases.push(word);
|
||||
}
|
||||
}
|
||||
|
||||
return conceptPhrases;
|
||||
}
|
||||
}
|
||||
|
||||
// Export default instance for compatibility
|
||||
export default new QueryDecompositionTool();
|
||||
export default QueryDecompositionTool;
|
||||
|
Loading…
x
Reference in New Issue
Block a user