From dd9b37e9fbbeca7750cd1e31053754e5df2bb812 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sun, 30 Mar 2025 20:08:27 +0000 Subject: [PATCH] move query decomp strings to their own file --- .../agent_tools/query_decomposition_tool.ts | 113 ++++++++++-------- .../query_decomposition_constants.ts | 95 +++++++++++++++ 2 files changed, 155 insertions(+), 53 deletions(-) create mode 100644 src/services/llm/constants/query_decomposition_constants.ts diff --git a/src/services/llm/agent_tools/query_decomposition_tool.ts b/src/services/llm/agent_tools/query_decomposition_tool.ts index 2e65ada76..4ffa9b5d7 100644 --- a/src/services/llm/agent_tools/query_decomposition_tool.ts +++ b/src/services/llm/agent_tools/query_decomposition_tool.ts @@ -14,6 +14,7 @@ import log from '../../log.js'; import { AGENT_TOOL_PROMPTS } from '../constants/llm_prompt_constants.js'; +import { QUERY_DECOMPOSITION_STRINGS } from '../constants/query_decomposition_constants.js'; export interface SubQuery { id: string; @@ -43,10 +44,10 @@ export class QueryDecompositionTool { decomposeQuery(query: string, context?: string): DecomposedQuery { try { // Log the decomposition attempt for tracking - log.info(`Decomposing query: "${query.substring(0, 100)}..."`); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.DECOMPOSING_QUERY(query)); if (!query || query.trim().length === 0) { - log.info("Query decomposition called with empty query"); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.EMPTY_QUERY); return { originalQuery: query, subQueries: [], @@ -57,12 +58,12 @@ export class QueryDecompositionTool { // Assess query complexity to determine if decomposition is needed const complexity = this.assessQueryComplexity(query); - log.info(`Query complexity assessment: ${complexity}/10`); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.COMPLEXITY_ASSESSMENT(complexity)); // For simple queries, just return the original as a single sub-query // Use a lower threshold (2 instead of 3) to decompose more queries if (complexity < 2) { - log.info(`Query is simple (complexity ${complexity}), returning as single sub-query`); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.SIMPLE_QUERY(complexity)); const mainSubQuery = { id: this.generateSubQueryId(), @@ -74,7 +75,7 @@ export class QueryDecompositionTool { // Still add a generic exploration query to get some related content const genericQuery = { id: this.generateSubQueryId(), - text: `Information related to ${query}`, + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.INFORMATION_RELATED(query), reason: AGENT_TOOL_PROMPTS.QUERY_DECOMPOSITION.SUB_QUERY_GENERIC, isAnswered: false }; @@ -89,11 +90,11 @@ export class QueryDecompositionTool { // For complex queries, perform decomposition const subQueries = this.createSubQueries(query, context); - log.info(`Decomposed query into ${subQueries.length} sub-queries`); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.DECOMPOSED_INTO(subQueries.length)); // Log the sub-queries for better visibility subQueries.forEach((sq, index) => { - log.info(`Sub-query ${index + 1}: "${sq.text}" - Reason: ${sq.reason}`); + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.SUB_QUERY_LOG(index, sq.text, sq.reason)); }); return { @@ -103,7 +104,7 @@ export class QueryDecompositionTool { complexity }; } catch (error: any) { - log.error(`Error decomposing query: ${error.message}`); + log.error(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.ERROR_DECOMPOSING(error.message)); // Fallback to treating it as a simple query return { @@ -164,7 +165,7 @@ export class QueryDecompositionTool { try { // Ensure all sub-queries are answered if (!decomposedQuery.subQueries.every(sq => sq.isAnswered)) { - return "Cannot synthesize answer - not all sub-queries have been answered."; + return QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.CANNOT_SYNTHESIZE; } // For simple queries with just one sub-query, return the answer directly @@ -173,12 +174,12 @@ export class QueryDecompositionTool { } // For complex queries, build a structured response that references each sub-answer - let synthesized = `Answer to: "${decomposedQuery.originalQuery}"\n\n`; + let synthesized = QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.ANSWER_TO(decomposedQuery.originalQuery); // Group by themes if there are many sub-queries if (decomposedQuery.subQueries.length > 3) { // Here we would ideally group related sub-queries, but for now we'll just present them in order - synthesized += "Based on the information gathered:\n\n"; + synthesized += QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.BASED_ON_INFORMATION; for (const sq of decomposedQuery.subQueries) { synthesized += `${sq.answer}\n\n`; @@ -192,8 +193,8 @@ export class QueryDecompositionTool { return synthesized.trim(); } catch (error: any) { - log.error(`Error synthesizing answer: ${error.message}`); - return "Error synthesizing the final answer."; + log.error(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.ERROR_SYNTHESIZING(error.message)); + return QUERY_DECOMPOSITION_STRINGS.SYNTHESIS_TEMPLATES.ERROR_SYNTHESIZING; } } @@ -207,12 +208,12 @@ export class QueryDecompositionTool { const answeredCount = decomposedQuery.subQueries.filter(sq => sq.isAnswered).length; const totalCount = decomposedQuery.subQueries.length; - let status = `Progress: ${answeredCount}/${totalCount} sub-queries answered\n\n`; + let status = QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.PROGRESS(answeredCount, totalCount); for (const sq of decomposedQuery.subQueries) { - status += `${sq.isAnswered ? '✓' : '○'} ${sq.text}\n`; + status += `${sq.isAnswered ? QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.ANSWERED_MARKER : QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.UNANSWERED_MARKER} ${sq.text}\n`; if (sq.isAnswered) { - status += ` Answer: ${this.truncateText(sq.answer || "", 100)}\n`; + status += `${QUERY_DECOMPOSITION_STRINGS.STATUS_TEMPLATES.ANSWER_PREFIX}${this.truncateText(sq.answer || "", 100)}\n`; } } @@ -231,8 +232,7 @@ export class QueryDecompositionTool { const questionMarkCount = (query.match(/\?/g) || []).length; // Count potential sub-questions based on question words - const questionWords = ['what', 'how', 'why', 'where', 'when', 'who', 'which']; - const questionWordMatches = questionWords.map(word => { + const questionWordMatches = QUERY_DECOMPOSITION_STRINGS.QUESTION_WORDS.map(word => { const regex = new RegExp(`\\b${word}\\b`, 'gi'); return (query.match(regex) || []).length; }); @@ -240,11 +240,15 @@ export class QueryDecompositionTool { const questionWordCount = questionWordMatches.reduce((sum, count) => sum + count, 0); // Look for conjunctions which might join multiple questions - const conjunctionCount = (query.match(/\b(and|or|but|as well as)\b/gi) || []).length; + const conjunctionPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.CONJUNCTIONS.join('|')})\\b`, 'gi'); + const conjunctionCount = (query.match(conjunctionPattern) || []).length; // Look for complex requirements - const comparisonCount = (query.match(/\b(compare|versus|vs|difference|similarities?)\b/gi) || []).length; - const analysisCount = (query.match(/\b(analyze|examine|investigate|explore|explain|discuss)\b/gi) || []).length; + const comparisonPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.COMPARISON_TERMS.join('|')})\\b`, 'gi'); + const comparisonCount = (query.match(comparisonPattern) || []).length; + + const analysisPattern = new RegExp(`\\b(${QUERY_DECOMPOSITION_STRINGS.ANALYSIS_TERMS.join('|')})\\b`, 'gi'); + const analysisCount = (query.match(analysisPattern) || []).length; // Calculate base complexity let complexity = 1; @@ -285,9 +289,9 @@ export class QueryDecompositionTool { // Avoid creating subqueries that start with "Provide details about" or similar // as these have been causing recursive loops - if (query.toLowerCase().includes("provide details about") || - query.toLowerCase().includes("information related to")) { - log.info(`Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`); + if (query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.PROVIDE_DETAILS_ABOUT) || + query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.INFORMATION_RELATED_TO)) { + log.info(QUERY_DECOMPOSITION_STRINGS.LOG_MESSAGES.AVOIDING_RECURSIVE(query)); return [{ id: this.generateSubQueryId(), text: query, @@ -306,10 +310,10 @@ export class QueryDecompositionTool { // Check for "compare", "difference", "versus" to identify comparison questions if ( - query.toLowerCase().includes('compare') || - query.toLowerCase().includes('difference between') || - query.toLowerCase().includes(' vs ') || - query.toLowerCase().includes('versus') + query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.COMPARE) || + query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.DIFFERENCE_BETWEEN) || + query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.VS) || + query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.VERSUS) ) { // Extract entities to compare (simplified approach) const entities = this.extractEntitiesForComparison(query); @@ -319,8 +323,8 @@ export class QueryDecompositionTool { entities.forEach(entity => { subQueries.push({ id: this.generateSubQueryId(), - text: `What are the key characteristics of ${entity}?`, - reason: `Getting details about "${entity}" for comparison`, + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.KEY_CHARACTERISTICS(entity), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.GETTING_DETAILS(entity), isAnswered: false }); }); @@ -328,63 +332,64 @@ export class QueryDecompositionTool { // Add explicit comparison sub-query subQueries.push({ id: this.generateSubQueryId(), - text: `How do ${entities.join(' and ')} compare in terms of their primary features?`, - reason: 'Direct comparison of the entities', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.COMPARISON_FEATURES(entities), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.DIRECT_COMPARISON, isAnswered: false }); } } // Check for "how to" questions - else if (query.toLowerCase().includes('how to ')) { + else if (query.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.HOW_TO)) { const topic = query.replace(/how to /i, '').trim(); subQueries.push({ id: this.generateSubQueryId(), - text: `What are the steps to ${topic}?`, - reason: 'Finding procedural information', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.STEPS_TO(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_PROCEDURAL, isAnswered: false }); subQueries.push({ id: this.generateSubQueryId(), - text: `What are common challenges or pitfalls when trying to ${topic}?`, - reason: 'Identifying potential difficulties', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.CHALLENGES(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.IDENTIFYING_DIFFICULTIES, isAnswered: false }); } // Check for "why" questions - else if (query.toLowerCase().startsWith('why ')) { + else if (query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHY)) { const topic = query.replace(/why /i, '').trim(); subQueries.push({ id: this.generateSubQueryId(), - text: `What are the causes of ${topic}?`, - reason: 'Identifying causes', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.CAUSES(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.IDENTIFYING_CAUSES, isAnswered: false }); subQueries.push({ id: this.generateSubQueryId(), - text: `What evidence supports explanations for ${topic}?`, - reason: 'Finding supporting evidence', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.EVIDENCE(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_EVIDENCE, isAnswered: false }); } // Handle "what is" questions - else if (query.toLowerCase().startsWith('what is ') || query.toLowerCase().startsWith('what are ')) { + else if (query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHAT_IS) || + query.toLowerCase().startsWith(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.WHAT_ARE)) { const topic = query.replace(/what (is|are) /i, '').trim().replace(/\?$/, ''); subQueries.push({ id: this.generateSubQueryId(), - text: `Definition of ${topic}`, - reason: 'Getting basic definition', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.DEFINITION(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.GETTING_DEFINITION, isAnswered: false }); subQueries.push({ id: this.generateSubQueryId(), - text: `Examples of ${topic}`, - reason: 'Finding examples', + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.EXAMPLES(topic), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_EXAMPLES, isAnswered: false }); } @@ -397,12 +402,12 @@ export class QueryDecompositionTool { concepts.forEach(concept => { // Don't create recursive or self-referential queries - if (!concept.toLowerCase().includes('provide details') && - !concept.toLowerCase().includes('information related')) { + if (!concept.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.PROVIDE_DETAILS_ABOUT) && + !concept.toLowerCase().includes(QUERY_DECOMPOSITION_STRINGS.QUERY_PATTERNS.INFORMATION_RELATED_TO)) { subQueries.push({ id: this.generateSubQueryId(), - text: `Key information about ${concept}`, - reason: `Finding information about "${concept}"`, + text: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_TEMPLATES.KEY_INFORMATION(concept), + reason: QUERY_DECOMPOSITION_STRINGS.SUB_QUERY_REASONS.FINDING_INFORMATION(concept), isAnswered: false }); } @@ -444,7 +449,8 @@ export class QueryDecompositionTool { for (const word of words) { // Skip common words that are unlikely to be part of entity names - if (/^(the|of|and|or|vs|versus|between|comparison|compared|to|with|what|is|are|how|why|when|which)$/i.test(word)) { + const stopWordsPattern = new RegExp(`^(${QUERY_DECOMPOSITION_STRINGS.STOP_WORDS.join('|')})$`, 'i'); + if (stopWordsPattern.test(word)) { if (currentPhrase.trim()) { potentialEntities.push(currentPhrase.trim()); currentPhrase = ''; @@ -470,7 +476,8 @@ export class QueryDecompositionTool { */ extractMainConcepts(query: string): string[] { // Remove question words and common stop words - const cleanedQuery = query.replace(/what|is|are|how|why|when|which|the|of|and|or|to|with|in|on|by/gi, ' '); + const stopWordsPattern = new RegExp(QUERY_DECOMPOSITION_STRINGS.STOP_WORDS.join('|'), 'gi'); + const cleanedQuery = query.replace(stopWordsPattern, ' '); // Split into words and filter out short words const words = cleanedQuery.split(/\s+/).filter(word => word.length > 3); diff --git a/src/services/llm/constants/query_decomposition_constants.ts b/src/services/llm/constants/query_decomposition_constants.ts new file mode 100644 index 000000000..2c6df4386 --- /dev/null +++ b/src/services/llm/constants/query_decomposition_constants.ts @@ -0,0 +1,95 @@ +/** + * Query Decomposition Constants + * + * This file centralizes all string constants used in the query decomposition tool. + * These constants can be translated for internationalization support. + */ + +export const QUERY_DECOMPOSITION_STRINGS = { + // Log messages + LOG_MESSAGES: { + DECOMPOSING_QUERY: (query: string) => `Decomposing query: "${query.substring(0, 100)}..."`, + EMPTY_QUERY: "Query decomposition called with empty query", + COMPLEXITY_ASSESSMENT: (complexity: number) => `Query complexity assessment: ${complexity}/10`, + SIMPLE_QUERY: (complexity: number) => `Query is simple (complexity ${complexity}), returning as single sub-query`, + DECOMPOSED_INTO: (count: number) => `Decomposed query into ${count} sub-queries`, + SUB_QUERY_LOG: (index: number, text: string, reason: string) => `Sub-query ${index + 1}: "${text}" - Reason: ${reason}`, + ERROR_DECOMPOSING: (error: string) => `Error decomposing query: ${error}`, + AVOIDING_RECURSIVE: (query: string) => `Avoiding recursive subqueries for query "${query.substring(0, 50)}..."`, + ERROR_SYNTHESIZING: (error: string) => `Error synthesizing answer: ${error}` + }, + + // Query identification patterns + QUERY_PATTERNS: { + PROVIDE_DETAILS_ABOUT: "provide details about", + INFORMATION_RELATED_TO: "information related to", + COMPARE: "compare", + DIFFERENCE_BETWEEN: "difference between", + VS: " vs ", + VERSUS: "versus", + HOW_TO: "how to ", + WHY: "why ", + WHAT_IS: "what is ", + WHAT_ARE: "what are " + }, + + // Question words used for complexity assessment + QUESTION_WORDS: ['what', 'how', 'why', 'where', 'when', 'who', 'which'], + + // Conjunctions used for complexity assessment + CONJUNCTIONS: ['and', 'or', 'but', 'as well as'], + + // Comparison terms used for complexity assessment + COMPARISON_TERMS: ['compare', 'versus', 'vs', 'difference', 'similarities'], + + // Analysis terms used for complexity assessment + ANALYSIS_TERMS: ['analyze', 'examine', 'investigate', 'explore', 'explain', 'discuss'], + + // Common stop words for parsing + STOP_WORDS: ['the', 'of', 'and', 'or', 'vs', 'versus', 'between', 'comparison', 'compared', 'to', 'with', 'what', 'is', 'are', 'how', 'why', 'when', 'which'], + + // Sub-query templates + SUB_QUERY_TEMPLATES: { + INFORMATION_RELATED: (query: string) => `Information related to ${query}`, + KEY_CHARACTERISTICS: (entity: string) => `What are the key characteristics of ${entity}?`, + COMPARISON_FEATURES: (entities: string[]) => `How do ${entities.join(' and ')} compare in terms of their primary features?`, + STEPS_TO: (topic: string) => `What are the steps to ${topic}?`, + CHALLENGES: (topic: string) => `What are common challenges or pitfalls when trying to ${topic}?`, + CAUSES: (topic: string) => `What are the causes of ${topic}?`, + EVIDENCE: (topic: string) => `What evidence supports explanations for ${topic}?`, + DEFINITION: (topic: string) => `Definition of ${topic}`, + EXAMPLES: (topic: string) => `Examples of ${topic}`, + KEY_INFORMATION: (concept: string) => `Key information about ${concept}` + }, + + // Sub-query reasons + SUB_QUERY_REASONS: { + GETTING_DETAILS: (entity: string) => `Getting details about "${entity}" for comparison`, + DIRECT_COMPARISON: 'Direct comparison of the entities', + FINDING_PROCEDURAL: 'Finding procedural information', + IDENTIFYING_DIFFICULTIES: 'Identifying potential difficulties', + IDENTIFYING_CAUSES: 'Identifying causes', + FINDING_EVIDENCE: 'Finding supporting evidence', + GETTING_DEFINITION: 'Getting basic definition', + FINDING_EXAMPLES: 'Finding examples', + FINDING_INFORMATION: (concept: string) => `Finding information about "${concept}"` + }, + + // Synthesis answer templates + SYNTHESIS_TEMPLATES: { + CANNOT_SYNTHESIZE: "Cannot synthesize answer - not all sub-queries have been answered.", + ANSWER_TO: (query: string) => `Answer to: "${query}"\n\n`, + BASED_ON_INFORMATION: "Based on the information gathered:\n\n", + ERROR_SYNTHESIZING: "Error synthesizing the final answer." + }, + + // Query status templates + STATUS_TEMPLATES: { + PROGRESS: (answered: number, total: number) => `Progress: ${answered}/${total} sub-queries answered\n\n`, + ANSWERED_MARKER: "✓", + UNANSWERED_MARKER: "○", + ANSWER_PREFIX: " Answer: " + } +}; + +export default QUERY_DECOMPOSITION_STRINGS;