Notes/src/services/llm/context_extractors/query_decomposition_tool.ts

535 lines
21 KiB
TypeScript
Raw Normal View History

2025-03-19 16:19:48 +00:00
/**
* Query Decomposition Tool
*
* This tool helps the LLM agent break down complex user queries into
* sub-questions that can be answered individually and then synthesized
* into a comprehensive response.
*
* Features:
* - Analyze query complexity
* - Extract multiple intents from a single question
* - Create a multi-stage research plan
* - Track progress through complex information gathering
*
* Integration with pipeline architecture:
* - Can use pipeline stages when available
* - Falls back to direct methods when needed
2025-03-19 16:19:48 +00:00
*/
import log from '../../log.js';
import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js';
import { QUERY_DECOMPOSITION_STRINGS } from '../constants/query_decomposition_constants.js';
import aiServiceManager from '../ai_service_manager.js';
2025-03-19 16:19:48 +00:00
export interface SubQuery {
2025-03-19 20:17:52 +00:00
id: string;
text: string;
reason: string;
isAnswered: boolean;
answer?: string;
2025-03-19 16:19:48 +00:00
}
export interface DecomposedQuery {
2025-03-19 20:17:52 +00:00
originalQuery: string;
subQueries: SubQuery[];
status: 'pending' | 'in_progress' | 'completed';
complexity: number;
2025-03-19 16:19:48 +00:00
}
export class QueryDecompositionTool {
2025-03-19 20:17:52 +00:00
private static queryCounter: number = 0;
/**
* Break down a complex query into smaller, more manageable sub-queries
*
* @param query The original user query
* @param context Optional context about the current note being viewed
* @returns A decomposed query object with sub-queries
*/
decomposeQuery(query: string, context?: string): DecomposedQuery {
try {
// Log the decomposition attempt for tracking
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.DECOMPOSING_QUERY(query));
2025-03-19 20:17:52 +00:00
if (!query || query.trim().length === 0) {
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.EMPTY_QUERY);
2025-03-19 20:17:52 +00:00
return {
originalQuery: query,
subQueries: [],
status: 'pending',
complexity: 0
};
}
// Assess query complexity to determine if decomposition is needed
const complexity = this.assessQueryComplexity(query);
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.COMPLEXITY_ASSESSMENT(complexity));
2025-03-19 20:17:52 +00:00
// For simple queries, just return the original as a single sub-query
// Use a lower threshold (2 instead of 3) to decompose more queries
if (complexity < 2) {
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.SIMPLE_QUERY(complexity));
2025-03-19 20:17:52 +00:00
const mainSubQuery = {
id: this.generateSubQueryId(),
text: query,
2025-03-20 00:06:56 +00:00
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT,
2025-03-19 20:17:52 +00:00
isAnswered: false
};
// Still add a generic exploration query to get some related content
const genericQuery = {
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.INFORMATION_RELATED(query),
2025-03-20 00:06:56 +00:00
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC,
2025-03-19 20:17:52 +00:00
isAnswered: false
};
return {
originalQuery: query,
subQueries: [mainSubQuery, genericQuery],
status: 'pending',
complexity
};
}
// For complex queries, perform decomposition
const subQueries = this.createSubQueries(query, context);
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.DECOMPOSED_INTO(subQueries.length));
2025-03-19 20:17:52 +00:00
// Log the sub-queries for better visibility
subQueries.forEach((sq, index) => {
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.SUB_QUERY_LOG(index, sq.text, sq.reason));
2025-03-19 20:17:52 +00:00
});
return {
originalQuery: query,
subQueries,
status: 'pending',
complexity
};
} catch (error: any) {
log.error(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.ERROR_DECOMPOSING(error.message));
2025-03-19 20:17:52 +00:00
// Fallback to treating it as a simple query
return {
originalQuery: query,
subQueries: [{
id: this.generateSubQueryId(),
text: query,
2025-03-20 00:06:56 +00:00
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_ERROR,
2025-03-19 20:17:52 +00:00
isAnswered: false
}],
status: 'pending',
complexity: 1
};
}
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
/**
* Update a sub-query with its answer
*
* @param decomposedQuery The decomposed query object
* @param subQueryId The ID of the sub-query to update
* @param answer The answer to the sub-query
* @returns The updated decomposed query
*/
updateSubQueryAnswer(
decomposedQuery: DecomposedQuery,
subQueryId: string,
answer: string
): DecomposedQuery {
const updatedSubQueries = decomposedQuery.subQueries.map(sq => {
if (sq.id === subQueryId) {
return {
...sq,
answer,
isAnswered: true
};
}
return sq;
});
// Check if all sub-queries are answered
const allAnswered = updatedSubQueries.every(sq => sq.isAnswered);
2025-03-19 16:19:48 +00:00
return {
2025-03-19 20:17:52 +00:00
...decomposedQuery,
subQueries: updatedSubQueries,
status: allAnswered ? 'completed' : 'in_progress'
2025-03-19 16:19:48 +00:00
};
2025-03-19 20:17:52 +00:00
}
2025-03-19 16:19:48 +00:00
2025-03-19 20:17:52 +00:00
/**
* Synthesize all sub-query answers into a comprehensive response
*
* @param decomposedQuery The decomposed query with all sub-queries answered
* @returns A synthesized answer to the original query
*/
synthesizeAnswer(decomposedQuery: DecomposedQuery): string {
try {
// Ensure all sub-queries are answered
if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) {
return QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.CANNOT_SYNTHESIZE;
2025-03-19 20:17:52 +00:00
}
// For simple queries with just one sub-query, return the answer directly
if (decomposedQuery.subQueries.length === 1) {
return decomposedQuery.subQueries[0].answer || "";
}
// For complex queries, build a structured response that references each sub-answer
let synthesized = QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.ANSWER_TO(decomposedQuery.originalQuery);
2025-03-19 20:17:52 +00:00
// Group by themes if there are many sub-queries
if (decomposedQuery.subQueries.length > 3) {
// Here we would ideally group related sub-queries, but for now we'll just present them in order
synthesized += QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.BASED_ON_INFORMATION;
2025-03-19 20:17:52 +00:00
for (const sq of decomposedQuery.subQueries) {
synthesized += `${sq.answer}\n\n`;
}
} else {
// For fewer sub-queries, present each one with its question
for (const sq of decomposedQuery.subQueries) {
synthesized += `${sq.answer}\n\n`;
}
}
return synthesized.trim();
} catch (error: any) {
log.error(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.ERROR_SYNTHESIZING(error.message));
return QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.ERROR_SYNTHESIZING;
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
}
/**
* Generate a status report on the progress of answering a complex query
*
* @param decomposedQuery The decomposed query
* @returns A status report string
*/
getQueryStatus(decomposedQuery: DecomposedQuery): string {
const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length;
const totalCount = decomposedQuery.subQueries.length;
let status = QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.PROGRESS(answeredCount, totalCount);
2025-03-19 20:17:52 +00:00
2025-03-19 16:19:48 +00:00
for (const sq of decomposedQuery.subQueries) {
status += `${sq.isAnswered ? QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.ANSWERED_MARKER : QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.UNANSWERED_MARKER} ${sq.text}\n`;
2025-03-19 20:17:52 +00:00
if (sq.isAnswered) {
status += `${QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.ANSWER_PREFIX}${this.truncateText(sq.answer || "", 100)}\n`;
2025-03-19 20:17:52 +00:00
}
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
return status;
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
/**
* Assess the complexity of a query on a scale of 1-10
* This helps determine how many sub-queries are needed
*
* @param query The query to assess
* @returns A complexity score from 1-10
*/
assessQueryComplexity(query: string): number {
// Count the number of question marks as a basic indicator
const questionMarkCount = (query.match(/\?/g) || []).length;
// Count potential sub-questions based on question words
const questionWordMatches = QUERY_DECOMPOSITION_STRINGS.QUESTION_WORDS.map(word => {
2025-03-19 20:17:52 +00:00
const regex = new RegExp(`\\b${word}\\b`, 'gi');
return (query.match(regex) || []).length;
});
2025-03-19 16:19:48 +00:00
2025-03-19 20:17:52 +00:00
const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0);
2025-03-19 16:19:48 +00:00
2025-03-19 20:17:52 +00:00
// Look for conjunctions which might join multiple questions
const conjunctionPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.CONJUNCTIONS.join('|')})\\b`, 'gi');
const conjunctionCount = (query.match(conjunctionPattern) || []).length;
2025-03-19 20:17:52 +00:00
// Look for complex requirements
const comparisonPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.COMPARISON_TERMS.join('|')})\\b`, 'gi');
const comparisonCount = (query.match(comparisonPattern) || []).length;
const analysisPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.ANALYSIS_TERMS.join('|')})\\b`, 'gi');
const analysisCount = (query.match(analysisPattern) || []).length;
2025-03-19 20:17:52 +00:00
// Calculate base complexity
let complexity = 1;
// Add for multiple questions
complexity += Math.min(2, questionMarkCount);
2025-03-19 20:09:18 +00:00
2025-03-19 20:17:52 +00:00
// Add for question words beyond the first one
complexity += Math.min(2, Math.max(0, questionWordCount - 1));
2025-03-19 20:09:18 +00:00
2025-03-19 20:17:52 +00:00
// Add for conjunctions that might join questions
complexity += Math.min(2, conjunctionCount);
// Add for comparative/analytical requirements
complexity += Math.min(2, comparisonCount + analysisCount);
// Add for overall length/complexity
if (query.length > 100) complexity += 1;
if (query.length > 200) complexity += 1;
// Ensure we stay in the 1-10 range
return Math.max(1, Math.min(10, complexity));
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
/**
* Generate a unique ID for a sub-query
*/
2025-03-19 20:35:17 +00:00
generateSubQueryId(): string {
return `sq_${Date.now()}_${Math.floor(Math.random() * 10000)}`;
2025-03-19 20:17:52 +00:00
}
2025-03-19 16:19:48 +00:00
2025-03-19 20:17:52 +00:00
/**
2025-03-19 20:35:17 +00:00
* Create sub-queries based on the original query
2025-03-19 20:17:52 +00:00
*/
2025-03-19 20:35:17 +00:00
createSubQueries(query: string, context?: string): SubQuery[] {
// Simple rules to create sub-queries based on query content
2025-03-19 20:17:52 +00:00
const subQueries: SubQuery[] = [];
2025-03-19 16:19:48 +00:00
2025-03-19 20:35:17 +00:00
// Avoid creating subqueries that start with "Provide details about" or similar
// as these have been causing recursive loops
if (query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.PROVIDE_DETAILS_ABOUT) ||
query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.INFORMATION_RELATED_TO)) {
log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.AVOIDING_RECURSIVE(query));
2025-03-19 20:35:17 +00:00
return [{
2025-03-19 20:17:52 +00:00
id: this.generateSubQueryId(),
2025-03-19 20:35:17 +00:00
text: query,
2025-03-20 00:06:56 +00:00
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_DIRECT_ANALYSIS,
2025-03-19 20:17:52 +00:00
isAnswered: false
2025-03-19 20:35:17 +00:00
}];
2025-03-19 20:17:52 +00:00
}
2025-03-19 20:09:18 +00:00
2025-03-19 20:35:17 +00:00
// First, add the original query as a sub-query (always)
subQueries.push({
id: this.generateSubQueryId(),
text: query,
2025-03-20 00:06:56 +00:00
reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.ORIGINAL_QUERY,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
// Check for "compare", "difference", "versus" to identify comparison questions
if (
query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.COMPARE) ||
query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.DIFFERENCE_BETWEEN) ||
query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.VS) ||
query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.VERSUS)
2025-03-19 20:35:17 +00:00
) {
// Extract entities to compare (simplified approach)
const entities = this.extractEntitiesForComparison(query);
if (entities.length >= 2) {
// Add sub-queries for each entity
entities.forEach(entity => {
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.KEY_CHARACTERISTICS(entity),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.GETTING_DETAILS(entity),
2025-03-19 20:35:17 +00:00
isAnswered: false
});
});
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
// Add explicit comparison sub-query
2025-03-19 20:17:52 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.COMPARISON_FEATURES(entities),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.DIRECT_COMPARISON,
2025-03-19 20:17:52 +00:00
isAnswered: false
});
}
2025-03-19 20:35:17 +00:00
}
// Check for "how to" questions
else if (query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.HOW_TO)) {
2025-03-19 20:35:17 +00:00
const topic = query.replace(/how to /i, '').trim();
2025-03-19 20:09:18 +00:00
subQueries.push({
2025-03-19 20:17:52 +00:00
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.STEPS_TO(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_PROCEDURAL,
2025-03-19 20:17:52 +00:00
isAnswered: false
2025-03-19 16:19:48 +00:00
});
2025-03-19 20:35:17 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.CHALLENGES(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.IDENTIFYING_DIFFICULTIES,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:35:17 +00:00
// Check for "why" questions
else if (query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHY)) {
2025-03-19 20:35:17 +00:00
const topic = query.replace(/why /i, '').trim();
2025-03-19 16:19:48 +00:00
2025-03-19 20:35:17 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.CAUSES(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.IDENTIFYING_CAUSES,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.EVIDENCE(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_EVIDENCE,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
2025-03-19 20:17:52 +00:00
}
2025-03-19 20:35:17 +00:00
// Handle "what is" questions
else if (query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHAT_IS) ||
query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHAT_ARE)) {
2025-03-19 20:35:17 +00:00
const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, '');
2025-03-19 16:19:48 +00:00
2025-03-19 20:35:17 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.DEFINITION(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.GETTING_DEFINITION,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
2025-03-19 20:09:18 +00:00
2025-03-19 20:35:17 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.EXAMPLES(topic),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_EXAMPLES,
2025-03-19 20:35:17 +00:00
isAnswered: false
});
}
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
// If no specific sub-queries were added (beyond the original),
// generate generic exploratory sub-queries
if (subQueries.length <= 1) {
// Extract main entities/concepts from the query
const concepts = this.extractMainConcepts(query);
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
concepts.forEach(concept => {
// Don't create recursive or self-referential queries
if (!concept.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.PROVIDE_DETAILS_ABOUT) &&
!concept.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.INFORMATION_RELATED_TO)) {
2025-03-19 20:17:52 +00:00
subQueries.push({
id: this.generateSubQueryId(),
text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.KEY_INFORMATION(concept),
reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_INFORMATION(concept),
2025-03-19 20:17:52 +00:00
isAnswered: false
});
}
2025-03-19 20:35:17 +00:00
});
}
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
return subQueries;
}
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
/**
* Truncate text to a maximum length with ellipsis
*/
private truncateText(text: string, maxLength: number): string {
if (text.length <= maxLength) return text;
return text.substring(0, maxLength - 3) + '...';
}
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
/**
* Extract entities for comparison from a query
*
* @param query The query to extract entities from
* @returns Array of entity strings
*/
extractEntitiesForComparison(query: string): string[] {
// Try to match patterns like "compare X and Y" or "difference between X and Y"
const comparePattern = /\b(?:compare|difference between|similarities between)\s+([^,]+?)\s+(?:and|with|to)\s+([^,\?\.]+)/i;
const vsPattern = /\b([^,]+?)\s+(?:vs\.?|versus)\s+([^,\?\.]+)/i;
let match = query.match(comparePattern) || query.match(vsPattern);
if (match) {
return [match[1].trim(), match[2].trim()];
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:35:17 +00:00
// If no pattern match, try to extract noun phrases
const words = query.split(/\s+/);
const potentialEntities = [];
let currentPhrase = '';
for (const word of words) {
// Skip common words that are unlikely to be part of entity names
const stopWordsPattern = new RegExp(`^(${QUERY_DECOMPOSITION_STRINGS.STOP_WORDS.join('|')})$`, 'i');
if (stopWordsPattern.test(word)) {
2025-03-19 20:35:17 +00:00
if (currentPhrase.trim()) {
potentialEntities.push(currentPhrase.trim());
currentPhrase = '';
}
continue;
}
2025-03-19 20:09:18 +00:00
2025-03-19 20:35:17 +00:00
currentPhrase += word + ' ';
}
2025-03-19 20:17:52 +00:00
2025-03-19 20:35:17 +00:00
if (currentPhrase.trim()) {
potentialEntities.push(currentPhrase.trim());
}
2025-03-19 16:19:48 +00:00
2025-03-19 20:35:17 +00:00
return potentialEntities.slice(0, 2); // Return at most 2 entities
2025-03-19 16:19:48 +00:00
}
2025-03-19 20:17:52 +00:00
/**
2025-03-19 20:35:17 +00:00
* Extract main concepts from a query
*
* @param query The query to extract concepts from
* @returns Array of concept strings
2025-03-19 20:17:52 +00:00
*/
2025-03-19 20:35:17 +00:00
extractMainConcepts(query: string): string[] {
// Remove question words and common stop words
const stopWordsPattern = new RegExp(QUERY_DECOMPOSITION_STRINGS.STOP_WORDS.join('|'), 'gi');
const cleanedQuery = query.replace(stopWordsPattern, ' ');
2025-03-19 20:35:17 +00:00
// Split into words and filter out short words
const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3);
// Count word frequency
const wordCounts: Record<string, number> = {};
for (const word of words) {
wordCounts[word.toLowerCase()] = (wordCounts[word.toLowerCase()] || 0) + 1;
}
// Sort by frequency
const sortedWords = Object.entries(wordCounts)
.sort((a, b) => b[1] - a[1])
.map(entry => entry[0]);
// Try to build meaningful phrases around top words
const conceptPhrases: string[] = [];
if (sortedWords.length === 0) {
// Fallback if no significant words found
return [query.trim()];
}
// Use the top 2-3 words to form concepts
for (let i = 0; i < Math.min(sortedWords.length, 3); i++) {
const word = sortedWords[i];
// Try to find the word in the original query and extract a small phrase around it
const wordIndex = query.toLowerCase().indexOf(word);
if (wordIndex >= 0) {
// Extract a window of text around the word (3 words before and after)
const start = Math.max(0, query.lastIndexOf(' ', wordIndex - 15) + 1);
const end = Math.min(query.length, query.indexOf(' ', wordIndex + word.length + 15));
if (end > start) {
conceptPhrases.push(query.substring(start, end).trim());
} else {
conceptPhrases.push(word);
}
} else {
conceptPhrases.push(word);
}
}
return conceptPhrases;
2025-03-19 20:17:52 +00:00
}
2025-03-19 16:19:48 +00:00
}
export default QueryDecompositionTool;