mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-09-03 22:30:51 +08:00
this is absolutely cursed, but query decomp works now
This commit is contained in:
parent
daa56b10e8
commit
a0dda48748
@ -99,29 +99,56 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
|
|||||||
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
|
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
// Check if the text might contain a JSON array (has square brackets)
|
log.info(`Cleaned JSON string: ${jsonStr}`);
|
||||||
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
|
||||||
// Extract just the array part if there's explanatory text
|
// Check if the text might contain a JSON structure (has curly braces or square brackets)
|
||||||
const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
|
if ((jsonStr.includes('{') && jsonStr.includes('}')) || (jsonStr.includes('[') && jsonStr.includes(']'))) {
|
||||||
if (arrayMatch) {
|
// Try to extract the JSON structure
|
||||||
jsonStr = arrayMatch[0];
|
let jsonMatch = jsonStr.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||||
|
if (jsonMatch) {
|
||||||
|
jsonStr = jsonMatch[0];
|
||||||
|
log.info(`Extracted JSON structure: ${jsonStr}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to parse the JSON
|
// Try to parse the JSON
|
||||||
try {
|
try {
|
||||||
const queries = JSON.parse(jsonStr);
|
const parsed = JSON.parse(jsonStr);
|
||||||
if (Array.isArray(queries) && queries.length > 0) {
|
|
||||||
const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
|
// Handle array format: ["query1", "query2"]
|
||||||
|
if (Array.isArray(parsed)) {
|
||||||
|
const result = parsed
|
||||||
|
.map(q => typeof q === 'string' ? q.trim() : String(q).trim())
|
||||||
|
.filter(Boolean);
|
||||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
} catch (innerError) {
|
// Handle object format: {"query1": "reason1", "query2": "reason2"} or {"query1" : "query2"}
|
||||||
// If parsing fails, log it and continue to the fallback
|
else if (typeof parsed === 'object' && parsed !== null) {
|
||||||
log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
|
// Extract both keys and values as potential queries
|
||||||
|
const keys = Object.keys(parsed);
|
||||||
|
const values = Object.values(parsed);
|
||||||
|
|
||||||
|
// Add keys as queries
|
||||||
|
const keysResult = keys
|
||||||
|
.filter(key => key && key.length > 3)
|
||||||
|
.map(key => key.trim());
|
||||||
|
|
||||||
|
// Add values as queries if they're strings and not already included
|
||||||
|
const valuesResult = values
|
||||||
|
.filter((val): val is string => typeof val === 'string' && val.length > 3)
|
||||||
|
.map(val => val.trim())
|
||||||
|
.filter(val => !keysResult.includes(val));
|
||||||
|
|
||||||
|
const result = [...keysResult, ...valuesResult];
|
||||||
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
log.info(`JSON parse error: ${parseError}. Will use fallback parsing.`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback 1: Try to extract an array manually by splitting on commas between quotes
|
// Fallback: Try to extract an array manually by splitting on commas between quotes
|
||||||
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
if (jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||||
const arrayContent = jsonStr.substring(
|
const arrayContent = jsonStr.substring(
|
||||||
jsonStr.indexOf('[') + 1,
|
jsonStr.indexOf('[') + 1,
|
||||||
@ -132,32 +159,43 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
|
|||||||
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
|
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
|
||||||
if (stringMatches && stringMatches.length > 0) {
|
if (stringMatches && stringMatches.length > 0) {
|
||||||
const result = stringMatches
|
const result = stringMatches
|
||||||
.map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
|
.map((m: string) => m.substring(1, m.length - 1).trim()) // Remove surrounding quotes
|
||||||
.filter((s: string) => s.length > 0);
|
.filter((s: string) => s.length > 0);
|
||||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback 2: Extract queries line by line
|
// Fallback: Try to extract key-value pairs from object notation manually
|
||||||
const lines = responseText.split('\n')
|
if (jsonStr.includes('{') && jsonStr.includes('}')) {
|
||||||
.map((line: string) => line.trim())
|
// Extract content between curly braces
|
||||||
.filter((line: string) =>
|
const objectContent = jsonStr.substring(
|
||||||
line.length > 0 &&
|
jsonStr.indexOf('{') + 1,
|
||||||
!line.startsWith('```') &&
|
jsonStr.lastIndexOf('}')
|
||||||
!line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
|
|
||||||
!line.match(/^\[|\]$/) // Skip lines that are just brackets
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if (lines.length > 0) {
|
// Split by commas that aren't inside quotes
|
||||||
// Remove numbering, quotes and other list markers from each line
|
const pairs: string[] = objectContent.split(/,(?=(?:[^"]*"[^"]*")*[^"]*$)/);
|
||||||
const result = lines.map((line: string) => {
|
|
||||||
return line
|
const result = pairs
|
||||||
.replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
|
.map(pair => {
|
||||||
.replace(/^[-*•]\s*/, '') // Remove bullet list markers
|
// Split by colon that isn't inside quotes
|
||||||
.replace(/^["']|["']$/g, '') // Remove surrounding quotes
|
const keyValue = pair.split(/:(?=(?:[^"]*"[^"]*")*[^"]*$)/);
|
||||||
.trim();
|
if (keyValue.length === 2) {
|
||||||
}).filter((s: string) => s.length > 0);
|
const key = keyValue[0].replace(/"/g, '').trim();
|
||||||
|
const value = keyValue[1].replace(/"/g, '').trim();
|
||||||
|
|
||||||
|
if (key && key.length > 3) {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value && value.length > 3) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.filter((s: string | null) => s !== null);
|
||||||
|
|
||||||
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
|
||||||
return result;
|
return result;
|
||||||
@ -269,40 +307,24 @@ Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`
|
|||||||
context?: string
|
context?: string
|
||||||
): Promise<SubQuery[]> {
|
): Promise<SubQuery[]> {
|
||||||
try {
|
try {
|
||||||
// Create a simple prompt for query decomposition
|
// Use the proven prompt format that was working before
|
||||||
const prompt = `Decompose the following query into 3-5 specific search queries that would be effective for vector search.
|
const prompt = `You are an AI assistant that decides what information needs to be retrieved from a user's knowledge base called TriliumNext Notes to answer the user's question.
|
||||||
|
Given the user's question, generate 3-5 specific search queries that would help find relevant information.
|
||||||
Your goal is to help find comprehensive information by breaking down the query into multiple search terms.
|
Each query should be focused on a different aspect of the question.
|
||||||
|
Avoid generating queries that are too broad, vague, or about a user's entire Note database, and make sure they are relevant to the user's question.
|
||||||
IMPORTANT: DO NOT just reword the original query. Create MULTIPLE DISTINCT queries that explore different aspects.
|
Format your answer as a JSON array of strings, with each string being a search query.
|
||||||
|
Example: ["exact topic mentioned", "related concept 1", "related concept 2"]`;
|
||||||
For example, if the query is "What are Docker containers?", good sub-queries would be:
|
|
||||||
1. "Docker container architecture and components"
|
|
||||||
2. "Docker vs virtual machines differences"
|
|
||||||
3. "Docker container use cases and benefits"
|
|
||||||
4. "Docker container deployment best practices"
|
|
||||||
|
|
||||||
Format your response as a JSON array of objects with 'text' and 'reason' properties.
|
|
||||||
Example: [
|
|
||||||
{"text": "Docker container architecture", "reason": "Understanding the technical structure"},
|
|
||||||
{"text": "Docker vs virtual machines", "reason": "Comparing with alternative technologies"},
|
|
||||||
{"text": "Docker container benefits", "reason": "Understanding advantages and use cases"},
|
|
||||||
{"text": "Docker deployment best practices", "reason": "Learning practical implementation"}
|
|
||||||
]
|
|
||||||
|
|
||||||
${context ? `\nContext: ${context}` : ''}
|
|
||||||
|
|
||||||
Query: ${query}`;
|
|
||||||
|
|
||||||
log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
|
log.info(`Sending decomposition prompt to LLM for query: "${query}"`);
|
||||||
|
|
||||||
const messages = [
|
const messages = [
|
||||||
{ role: "system" as const, content: prompt }
|
{ role: "system" as const, content: prompt },
|
||||||
|
{ role: "user" as const, content: query }
|
||||||
];
|
];
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
temperature: 0.7,
|
temperature: 0.3,
|
||||||
maxTokens: SEARCH_CONSTANTS.LIMITS.QUERY_PROCESSOR_MAX_TOKENS,
|
maxTokens: 300,
|
||||||
bypassFormatter: true,
|
bypassFormatter: true,
|
||||||
expectsJsonResponse: true,
|
expectsJsonResponse: true,
|
||||||
_bypassContextProcessing: true,
|
_bypassContextProcessing: true,
|
||||||
@ -315,78 +337,144 @@ Query: ${query}`;
|
|||||||
|
|
||||||
log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
|
log.info(`Received LLM response for decomposition: ${responseText.substring(0, 200)}...`);
|
||||||
|
|
||||||
// Try to parse the response as JSON
|
// Parse the response to extract the queries
|
||||||
let subQueries: SubQuery[] = [];
|
let searchQueries: string[] = [];
|
||||||
try {
|
try {
|
||||||
// Extract the JSON from the response
|
// Remove code blocks, quotes, and clean up the response text
|
||||||
const extractedJson = JsonExtractor.extract(responseText, {
|
let jsonStr = responseText
|
||||||
extractArrays: true,
|
.replace(/```(?:json)?|```/g, '') // Remove code block markers
|
||||||
applyFixes: true,
|
.replace(/[\u201C\u201D]/g, '"') // Replace smart quotes with straight quotes
|
||||||
useFallbacks: true
|
.trim();
|
||||||
});
|
|
||||||
|
|
||||||
log.info(`Extracted JSON: ${JSON.stringify(extractedJson).substring(0, 200)}...`);
|
log.info(`Cleaned JSON string: ${jsonStr}`);
|
||||||
|
|
||||||
if (Array.isArray(extractedJson) && extractedJson.length > 0) {
|
// Check if the text might contain a JSON structure (has curly braces or square brackets)
|
||||||
// Convert the extracted data to SubQuery objects
|
if ((jsonStr.includes('{') && jsonStr.includes('}')) || (jsonStr.includes('[') && jsonStr.includes(']'))) {
|
||||||
subQueries = extractedJson
|
// Try to extract the JSON structure
|
||||||
.filter(item => item && typeof item === 'object' && item.text)
|
let jsonMatch = jsonStr.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
||||||
.map(item => ({
|
if (jsonMatch) {
|
||||||
id: this.generateSubQueryId(),
|
jsonStr = jsonMatch[0];
|
||||||
text: item.text,
|
log.info(`Extracted JSON structure: ${jsonStr}`);
|
||||||
reason: item.reason || "Sub-aspect of the main question",
|
}
|
||||||
isAnswered: false
|
|
||||||
}));
|
|
||||||
|
|
||||||
log.info(`Successfully created ${subQueries.length} sub-queries from LLM response`);
|
// Try to parse the JSON
|
||||||
} else {
|
try {
|
||||||
log.info(`Failed to extract array of sub-queries from LLM response`);
|
const parsed = JSON.parse(jsonStr);
|
||||||
|
|
||||||
|
// Handle array format: ["query1", "query2"]
|
||||||
|
if (Array.isArray(parsed)) {
|
||||||
|
searchQueries = parsed
|
||||||
|
.map(q => typeof q === 'string' ? q.trim() : String(q).trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
log.info(`Extracted ${searchQueries.length} queries from JSON array`);
|
||||||
|
}
|
||||||
|
// Handle object format: {"query1": "reason1", "query2": "reason2"} or {"query1" : "query2"}
|
||||||
|
else if (typeof parsed === 'object' && parsed !== null) {
|
||||||
|
// Extract both keys and values as potential queries
|
||||||
|
const keys = Object.keys(parsed);
|
||||||
|
const values = Object.values(parsed);
|
||||||
|
|
||||||
|
// Add keys as queries
|
||||||
|
searchQueries = keys
|
||||||
|
.filter(key => key && key.length > 3)
|
||||||
|
.map(key => key.trim());
|
||||||
|
|
||||||
|
// Add values as queries if they're strings and not already included
|
||||||
|
values
|
||||||
|
.filter((val): val is string => typeof val === 'string' && val.length > 3)
|
||||||
|
.map(val => val.trim())
|
||||||
|
.forEach((val: string) => {
|
||||||
|
if (!searchQueries.includes(val)) {
|
||||||
|
searchQueries.push(val);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
log.info(`Extracted ${searchQueries.length} queries from JSON object`);
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
log.info(`JSON parse error: ${parseError}. Will use fallback parsing.`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
log.error(`Error parsing LLM response: ${error}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Always include the original query
|
// Fallback: Try to extract an array manually by splitting on commas between quotes
|
||||||
const hasOriginal = subQueries.some(sq => sq.text.toLowerCase() === query.toLowerCase());
|
if (searchQueries.length === 0 && jsonStr.includes('[') && jsonStr.includes(']')) {
|
||||||
if (!hasOriginal) {
|
const arrayContent = jsonStr.substring(
|
||||||
subQueries.push({
|
jsonStr.indexOf('[') + 1,
|
||||||
id: this.generateSubQueryId(),
|
jsonStr.lastIndexOf(']')
|
||||||
text: query,
|
);
|
||||||
reason: "Original query",
|
|
||||||
isAnswered: false
|
|
||||||
});
|
|
||||||
log.info(`Added original query to sub-queries list`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure we have at least 3 queries for better search coverage
|
// Use regex to match quoted strings, handling escaped quotes
|
||||||
if (subQueries.length < 3) {
|
const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
|
||||||
// Create some generic variants of the original query
|
if (stringMatches && stringMatches.length > 0) {
|
||||||
const genericVariants = [
|
searchQueries = stringMatches
|
||||||
{ text: `${query} examples and use cases`, reason: "Practical applications" },
|
.map((m: string) => m.substring(1, m.length - 1).trim()) // Remove surrounding quotes
|
||||||
{ text: `${query} concepts and definitions`, reason: "Conceptual understanding" },
|
.filter((s: string) => s.length > 0);
|
||||||
{ text: `${query} best practices`, reason: "Implementation guidance" }
|
log.info(`Extracted ${searchQueries.length} queries using regex`);
|
||||||
];
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Add variants until we have at least 3 queries
|
// Fallback: Try to extract key-value pairs from object notation manually
|
||||||
for (let i = 0; i < genericVariants.length && subQueries.length < 3; i++) {
|
if (searchQueries.length === 0 && jsonStr.includes('{') && jsonStr.includes('}')) {
|
||||||
subQueries.push({
|
// Extract content between curly braces
|
||||||
|
const objectContent = jsonStr.substring(
|
||||||
|
jsonStr.indexOf('{') + 1,
|
||||||
|
jsonStr.lastIndexOf('}')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Split by commas that aren't inside quotes
|
||||||
|
const pairs: string[] = objectContent.split(/,(?=(?:[^"]*"[^"]*")*[^"]*$)/);
|
||||||
|
|
||||||
|
for (const pair of pairs) {
|
||||||
|
// Split by colon that isn't inside quotes
|
||||||
|
const keyValue = pair.split(/:(?=(?:[^"]*"[^"]*")*[^"]*$)/);
|
||||||
|
if (keyValue.length === 2) {
|
||||||
|
const key = keyValue[0].replace(/"/g, '').trim();
|
||||||
|
const value = keyValue[1].replace(/"/g, '').trim();
|
||||||
|
|
||||||
|
if (key && key.length > 3 && !searchQueries.includes(key)) {
|
||||||
|
searchQueries.push(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value && value.length > 3 && !searchQueries.includes(value)) {
|
||||||
|
searchQueries.push(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(`Extracted ${searchQueries.length} queries from manual object parsing`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert search queries to SubQuery objects
|
||||||
|
if (searchQueries.length > 0) {
|
||||||
|
const subQueries = searchQueries.map((text, index) => ({
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: genericVariants[i].text,
|
text,
|
||||||
reason: genericVariants[i].reason,
|
reason: `Search query ${index + 1}`,
|
||||||
isAnswered: false
|
isAnswered: false
|
||||||
});
|
}));
|
||||||
}
|
|
||||||
|
|
||||||
log.info(`Added ${3 - subQueries.length} generic variants to ensure minimum 3 queries`);
|
// Always include the original query if not already included
|
||||||
|
const hasOriginal = subQueries.some(sq => sq.text.toLowerCase().includes(query.toLowerCase()) || query.toLowerCase().includes(sq.text.toLowerCase()));
|
||||||
|
if (!hasOriginal) {
|
||||||
|
subQueries.unshift({
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query.trim(),
|
||||||
|
reason: "Original query",
|
||||||
|
isAnswered: false
|
||||||
|
});
|
||||||
|
log.info(`Added original query to sub-queries list`);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
||||||
|
return subQueries;
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
log.error(`Error parsing search queries: ${parseError}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(`Final sub-queries for vector search: ${subQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
// Fallback if all extraction methods fail
|
||||||
return subQueries;
|
log.info(`Using fallback queries`);
|
||||||
} catch (error) {
|
return [
|
||||||
log.error(`Error in simpleQueryDecomposition: ${error}`);
|
|
||||||
|
|
||||||
// Return the original query plus some variants as fallback
|
|
||||||
const fallbackQueries = [
|
|
||||||
{
|
{
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: query,
|
text: query,
|
||||||
@ -395,20 +483,27 @@ Query: ${query}`;
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: `${query} overview`,
|
text: `${query.trim()} overview`,
|
||||||
reason: "General information",
|
reason: "General information",
|
||||||
isAnswered: false
|
isAnswered: false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: this.generateSubQueryId(),
|
id: this.generateSubQueryId(),
|
||||||
text: `${query} examples`,
|
text: `${query.trim()} examples`,
|
||||||
reason: "Practical examples",
|
reason: "Practical examples",
|
||||||
isAnswered: false
|
isAnswered: false
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
} catch (error) {
|
||||||
|
log.error(`Error in simpleQueryDecomposition: ${error}`);
|
||||||
|
|
||||||
log.info(`Using fallback queries due to error: ${fallbackQueries.map(sq => `"${sq.text}"`).join(', ')}`);
|
// Return the original query as fallback
|
||||||
return fallbackQueries;
|
return [{
|
||||||
|
id: this.generateSubQueryId(),
|
||||||
|
text: query,
|
||||||
|
reason: "Error occurred, using original query",
|
||||||
|
isAnswered: false
|
||||||
|
}];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user