query decomp is closer to working

This commit is contained in:
perf3ct 2025-04-17 17:26:41 +00:00
parent 7062e51f2d
commit 19c36b18a6
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
3 changed files with 115 additions and 5 deletions

View File

@ -376,6 +376,76 @@ export class VectorSearchService {
return '';
}
}
/**
* Find notes that are semantically relevant to multiple queries
* Combines results from multiple queries, deduplicates them, and returns the most relevant ones
*
* @param queries - Array of search queries
* @param contextNoteId - Optional note ID to restrict search to a branch
* @param options - Search options including result limit and summarization preference
* @returns Array of relevant notes with similarity scores, deduplicated and sorted
*/
async findRelevantNotesMultiQuery(
queries: string[],
contextNoteId: string | null = null,
options: VectorSearchOptions = {}
): Promise<NoteSearchResult[]> {
if (!queries || queries.length === 0) {
log.info('No queries provided to findRelevantNotesMultiQuery');
return [];
}
log.info(`VectorSearchService: Finding relevant notes for ${queries.length} queries`);
log.info(`Multi-query parameters: contextNoteId=${contextNoteId || 'global'}, queries=${JSON.stringify(queries.map(q => q.substring(0, 20) + '...'))}`);
try {
// Create a Map to deduplicate results across queries
const allResults = new Map<string, NoteSearchResult>();
// For each query, adjust maxResults to avoid getting too many total results
const adjustedMaxResults = options.maxResults ?
Math.ceil(options.maxResults / queries.length) :
Math.ceil(SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS / queries.length);
// Search for each query and combine results
for (const query of queries) {
try {
const queryOptions = {
...options,
maxResults: adjustedMaxResults,
useEnhancedQueries: false // We're already using enhanced queries
};
const results = await this.findRelevantNotes(query, contextNoteId, queryOptions);
// Merge results, keeping the highest similarity score for duplicates
for (const note of results) {
if (!allResults.has(note.noteId) ||
(allResults.has(note.noteId) && note.similarity > (allResults.get(note.noteId)?.similarity || 0))) {
allResults.set(note.noteId, note);
}
}
log.info(`Found ${results.length} results for query: "${query.substring(0, 30)}..."`);
} catch (error) {
log.error(`Error searching for query "${query}": ${error}`);
}
}
// Convert map to array and sort by similarity
const combinedResults = Array.from(allResults.values())
.sort((a, b) => b.similarity - a.similarity)
.slice(0, options.maxResults || SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_MAX_RESULTS);
log.info(`VectorSearchService: Found ${combinedResults.length} total deduplicated results across ${queries.length} queries`);
return combinedResults;
} catch (error) {
log.error(`Error in findRelevantNotesMultiQuery: ${error}`);
return [];
}
}
}
// Export a singleton instance

View File

@ -202,21 +202,23 @@ export class ChatPipeline {
const vectorSearchStartTime = Date.now();
log.info(`========== STAGE 3: VECTOR SEARCH ==========`);
log.info('Using VectorSearchStage pipeline component to find relevant notes');
log.info(`Searching with ${searchQueries.length} queries from decomposition`);
// Use the vectorSearchStage with multiple queries
const vectorSearchResult = await this.stages.vectorSearch.execute({
query: userQuery,
query: userQuery, // Original query as fallback
queries: searchQueries, // All decomposed queries
noteId: input.noteId || 'global',
options: {
maxResults: SEARCH_CONSTANTS.CONTEXT.MAX_SIMILAR_NOTES,
useEnhancedQueries: true,
useEnhancedQueries: false, // We're already using enhanced queries from decomposition
threshold: SEARCH_CONSTANTS.VECTOR_SEARCH.DEFAULT_THRESHOLD,
llmService: llmService || undefined
}
});
this.updateStageMetrics('vectorSearch', vectorSearchStartTime);
log.info(`Vector search found ${vectorSearchResult.searchResults.length} relevant notes`);
log.info(`Vector search found ${vectorSearchResult.searchResults.length} relevant notes across ${searchQueries.length} queries`);
// Extract context from search results
log.info(`========== SEMANTIC CONTEXT EXTRACTION ==========`);

View File

@ -13,6 +13,7 @@ import { SEARCH_CONSTANTS } from '../../constants/search_constants.js';
export interface VectorSearchInput {
query: string;
queries?: string[];
noteId?: string;
options?: {
maxResults?: number;
@ -42,6 +43,7 @@ export class VectorSearchStage {
async execute(input: VectorSearchInput): Promise<VectorSearchOutput> {
const {
query,
queries = [],
noteId = 'global',
options = {}
} = input;
@ -53,6 +55,42 @@ export class VectorSearchStage {
llmService = undefined
} = options;
// If queries array is provided, use multi-query search
if (queries && queries.length > 0) {
log.info(`VectorSearchStage: Searching with ${queries.length} queries`);
log.info(`Parameters: noteId=${noteId}, maxResults=${maxResults}, threshold=${threshold}`);
try {
// Use the new multi-query method
const searchResults = await vectorSearchService.findRelevantNotesMultiQuery(
queries,
noteId === 'global' ? null : noteId,
{
maxResults,
threshold,
llmService: llmService || null
}
);
log.info(`VectorSearchStage: Found ${searchResults.length} relevant notes from multi-query search`);
return {
searchResults,
originalQuery: query,
noteId
};
} catch (error) {
log.error(`Error in vector search stage multi-query: ${error}`);
// Return empty results on error
return {
searchResults: [],
originalQuery: query,
noteId
};
}
}
// Fallback to single query search
log.info(`VectorSearchStage: Searching for "${query.substring(0, 50)}..."`);
log.info(`Parameters: noteId=${noteId}, maxResults=${maxResults}, threshold=${threshold}`);
@ -64,7 +102,7 @@ export class VectorSearchStage {
{
maxResults,
threshold,
llmService
llmService: llmService || null
}
);