do a better job at centralizing json extraction, and query "enhancer" search queries

2025-11-12 21:31:36 +08:00 · 2025-04-01 21:42:09 +00:00 · 2025-04-01 21:42:09 +00:00 · ed52d71729
commit ed52d71729
parent 5b3dca88d9
6 changed files with 507 additions and 124 deletions
--- a/src/services/llm/ai_interface.ts
+++ b/src/services/llm/ai_interface.ts
@ -23,6 +23,9 @@ export interface ChatCompletionOptions {
    presencePenalty?: number;
    showThinking?: boolean;
    systemPrompt?: string;
    preserveSystemPrompt?: boolean; // Whether to preserve existing system message
    bypassFormatter?: boolean; // Whether to bypass the message formatter entirely
    expectsJsonResponse?: boolean; // Whether this request expects a JSON response
    stream?: boolean; // Whether to stream the response
 }
--- a/src/services/llm/context/modules/query_enhancer.ts
+++ b/src/services/llm/context/modules/query_enhancer.ts
@ -4,6 +4,7 @@ import type { Message } from '../../ai_interface.js';
 import { CONTEXT_PROMPTS } from '../../constants/llm_prompt_constants.js';
 import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js';
 import type { IQueryEnhancer } from '../../interfaces/context_interfaces.js';
 import JsonExtractor from '../../utils/json_extractor.js';
 /**
 * Provides utilities for enhancing queries and generating search queries
@ -12,6 +13,15 @@ export class QueryEnhancer implements IQueryEnhancer {
    // Use the centralized query enhancer prompt
    private metaPrompt = CONTEXT_PROMPTS.QUERY_ENHANCER;
    /**
     * Get enhanced prompt with JSON formatting instructions
     */
    private getEnhancedPrompt(): string {
        return `${this.metaPrompt}
 IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
 Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
    }
    /**
     * Generate search queries to find relevant information for the user question
     *
@ -32,95 +42,38 @@ export class QueryEnhancer implements IQueryEnhancer {
            }
            const messages: Message[] = [
-                { role: "system", content: this.metaPrompt },
+                { role: "system", content: this.getEnhancedPrompt() },
                { role: "user", content: userQuestion }
            ];
            const options = {
                temperature: 0.3,
-                maxTokens: 300
+                maxTokens: 300,
                bypassFormatter: true, // Completely bypass formatter for query enhancement
                expectsJsonResponse: true // Explicitly request JSON-formatted response
            };
            // Get the response from the LLM
            const response = await llmService.generateChatCompletion(messages, options);
            const responseText = response.text; // Extract the text from the response object
-            try {
+            // Use the JsonExtractor to parse the response
-                // Remove code blocks, quotes, and clean up the response text
+            const queries = JsonExtractor.extract<string[]>(responseText, {
-                let jsonStr = responseText
+                extractArrays: true,
-                    .replace(/```(?:json)?|```/g, '') // Remove code block markers
+                minStringLength: 3,
-                    .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
+                applyFixes: true,
-                    .trim();
+                useFallbacks: true
            });
-                // Check if the text might contain a JSON array (has square brackets)
+            if (queries && queries.length > 0) {
-                if (jsonStr.includes('[') && jsonStr.includes(']')) {
+                log.info(`Extracted ${queries.length} queries using JsonExtractor`);
-                    // Extract just the array part if there's explanatory text
+                cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, queries);
-                    const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
+                return queries;
                    if (arrayMatch) {
                        jsonStr = arrayMatch[0];
                    }
                    // Try to parse the JSON
                    try {
                        const queries = JSON.parse(jsonStr);
                        if (Array.isArray(queries) && queries.length > 0) {
                            const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
                            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                            return result;
                        }
                    } catch (innerError) {
                        // If parsing fails, log it and continue to the fallback
                        log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
                    }
                }
                // Fallback 1: Try to extract an array manually by splitting on commas between quotes
                if (jsonStr.includes('[') && jsonStr.includes(']')) {
                    const arrayContent = jsonStr.substring(
                        jsonStr.indexOf('[') + 1,
                        jsonStr.lastIndexOf(']')
                    );
                    // Use regex to match quoted strings, handling escaped quotes
                    const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
                    if (stringMatches && stringMatches.length > 0) {
                        const result = stringMatches
                            .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
                            .filter((s: string) => s.length > 0);
                        cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                        return result;
                    }
                }
                // Fallback 2: Extract queries line by line
                const lines = responseText.split('\n')
                    .map((line: string) => line.trim())
                    .filter((line: string) =>
                        line.length > 0 &&
                        !line.startsWith('```') &&
                        !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
                        !line.match(/^\[|\]$/) // Skip lines that are just brackets
                    );
                if (lines.length > 0) {
                    // Remove numbering, quotes and other list markers from each line
                    const result = lines.map((line: string) => {
                        return line
                            .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
                            .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
                            .replace(/^["']|["']$/g, '') // Remove surrounding quotes
                            .trim();
                    }).filter((s: string) => s.length > 0);
                    cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
                    return result;
                }
            } catch (parseError) {
                log.error(`Error parsing search queries: ${parseError}`);
            }
            // If all else fails, just use the original question
            const fallback = [userQuestion];
            log.info(`No queries extracted, using fallback: "${userQuestion}"`);
            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
            return fallback;
        } catch (error: unknown) {
--- a/src/services/llm/formatters/ollama_formatter.ts
+++ b/src/services/llm/formatters/ollama_formatter.ts
@ -23,22 +23,30 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
    /**
     * Format messages for the Ollama API
     * @param messages Messages to format
     * @param systemPrompt Optional system prompt to use
     * @param context Optional context to include
     * @param preserveSystemPrompt When true, preserves existing system messages rather than replacing them
     */
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
        const formattedMessages: Message[] = [];
        // First identify user and system messages
        const systemMessages = messages.filter(msg => msg.role === 'system');
        const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
-        // Create base system message with instructions or use default
+        // Determine if we should preserve the existing system message
-        const basePrompt = systemPrompt || PROVIDER_PROMPTS.COMMON.DEFAULT_ASSISTANT_INTRO;
+        if (preserveSystemPrompt && systemMessages.length > 0) {
-
+            // Preserve the existing system message
-        // Always add a system message with the base prompt
+            formattedMessages.push(systemMessages[0]);
-        formattedMessages.push({
+        } else {
-            role: 'system',
+            // Use provided systemPrompt or default
-            content: basePrompt
+            const basePrompt = systemPrompt || PROVIDER_PROMPTS.COMMON.DEFAULT_ASSISTANT_INTRO;
-        });
+            formattedMessages.push({
                role: 'system',
                content: basePrompt
            });
        }
        // If we have context, inject it into the first user message
        if (context && userMessages.length > 0) {
--- a/src/services/llm/pipeline/interfaces/message_formatter.ts
+++ b/src/services/llm/pipeline/interfaces/message_formatter.ts
@ -9,9 +9,10 @@ export interface MessageFormatter {
     * @param messages Original messages
     * @param systemPrompt Optional system prompt to override
     * @param context Optional context to include
     * @param preserveSystemPrompt Optional flag to preserve existing system prompt
     * @returns Formatted messages optimized for the specific provider
     */
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[];
 }
 /**
@ -22,7 +23,7 @@ export abstract class BaseMessageFormatter implements MessageFormatter {
     * Format messages with system prompt and context
     * Each provider should override this method with their specific formatting strategy
     */
-    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[];
    /**
     * Helper method to extract existing system message from messages
@ -44,7 +45,7 @@ export abstract class BaseMessageFormatter implements MessageFormatter {
 * Optimizes message format for OpenAI models (GPT-3.5, GPT-4, etc.)
 */
 export class OpenAIMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
        const formattedMessages: Message[] = [];
        // OpenAI performs best with system message first, then context as a separate system message
@ -52,7 +53,11 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
        // Handle system message
        const existingSystem = this.getSystemMessage(messages);
-        if (systemPrompt || existingSystem) {
+
        if (preserveSystemPrompt && existingSystem) {
            // Use the existing system message
            formattedMessages.push(existingSystem);
        } else if (systemPrompt || existingSystem) {
            const systemContent = systemPrompt || existingSystem?.content || '';
            formattedMessages.push({
                role: 'system',
@ -80,7 +85,7 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
 * Optimizes message format for Claude models
 */
 export class AnthropicMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
        const formattedMessages: Message[] = [];
        // Anthropic performs best with a specific XML-like format for context and system instructions
@ -89,7 +94,9 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
        let systemContent = '';
        const existingSystem = this.getSystemMessage(messages);
-        if (systemPrompt || existingSystem) {
+        if (preserveSystemPrompt && existingSystem) {
            systemContent = existingSystem.content;
        } else if (systemPrompt || existingSystem) {
            systemContent = systemPrompt || existingSystem?.content || '';
        }
@ -118,7 +125,7 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
 * Optimizes message format for open-source models
 */
 export class OllamaMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
        const formattedMessages: Message[] = [];
        // Ollama format is closer to raw prompting and typically works better with
@ -156,12 +163,15 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
 * Default message formatter when provider is unknown
 */
 export class DefaultMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
        const formattedMessages: Message[] = [];
        // Handle system message
        const existingSystem = this.getSystemMessage(messages);
-        if (systemPrompt || existingSystem) {
+
        if (preserveSystemPrompt && existingSystem) {
            formattedMessages.push(existingSystem);
        } else if (systemPrompt || existingSystem) {
            const systemContent = systemPrompt || existingSystem?.content || '';
            formattedMessages.push({
                role: 'system',
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@ -48,10 +48,30 @@ export class OllamaService extends BaseAIService {
        const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt'));
        try {
-            // Use the formatter to prepare messages
+            // Determine whether to use the formatter or send messages directly
-            const formattedMessages = this.formatter.formatMessages(messages, systemPrompt);
+            let messagesToSend: Message[];
-            console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2));
+            if (opts.bypassFormatter) {
                // Bypass the formatter entirely - use messages as is
                messagesToSend = [...messages];
                console.log(`Bypassing formatter for Ollama request with ${messages.length} messages`);
            } else {
                // Use the formatter to prepare messages
                messagesToSend = this.formatter.formatMessages(
                    messages,
                    systemPrompt,
                    undefined, // context
                    opts.preserveSystemPrompt
                );
                console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(messagesToSend, null, 2));
            }
            // Check if this is a request that expects JSON response
            const expectsJsonResponse = opts.expectsJsonResponse || false;
            if (expectsJsonResponse) {
                console.log(`Request expects JSON response, adding response_format parameter`);
            }
            const response = await fetch(`${apiBase}/api/chat`, {
                method: 'POST',
@ -60,9 +80,11 @@ export class OllamaService extends BaseAIService {
                },
                body: JSON.stringify({
                    model,
-                    messages: formattedMessages,
+                    messages: messagesToSend,
                    options: {
-                        temperature
+                        temperature,
                        // Add response_format for requests that expect JSON
                        ...(expectsJsonResponse ? { response_format: { type: "json_object" } } : {})
                    },
                    stream: false
                })
--- a/src/services/llm/utils/json_extractor.ts
+++ b/src/services/llm/utils/json_extractor.ts
@ -0,0 +1,387 @@
 import log from '../../log.js';
 /**
 * Options for JSON extraction
 */
 export interface JsonExtractionOptions {
    /** Attempt to find and extract arrays as the primary target (for query enhancers, etc.) */
    extractArrays?: boolean;
    /** Minimum length for extracted strings to be considered valid */
    minStringLength?: number;
    /** Apply fixes to malformed JSON before parsing */
    applyFixes?: boolean;
    /** Whether to use fallback extraction methods when JSON parsing fails */
    useFallbacks?: boolean;
 }
 /**
 * Structure of a tool call extracted from an LLM response
 */
 export interface ExtractedToolCall {
    /** The name of the tool to call */
    tool_name: string;
    /** Parameters for the tool call */
    parameters: Record<string, any>;
    /** The original JSON string that was parsed */
    originalJson?: string;
 }
 /**
 * Utility class for extracting and parsing JSON from LLM responses
 * Handles malformed JSON, escaping issues, and provides fallback mechanisms
 */
 export class JsonExtractor {
    /**
     * Extract JSON from an LLM response
     *
     * @param text - The raw text from an LLM response
     * @param options - Options to control extraction behavior
     * @returns The parsed JSON object or array, or null if extraction failed
     */
    static extract<T = any>(text: string, options: JsonExtractionOptions = {}): T | null {
        const opts = {
            extractArrays: false,
            minStringLength: 3,
            applyFixes: true,
            useFallbacks: true,
            ...options
        };
        try {
            // Clean up the input text
            let cleanedText = this.cleanMarkdownAndFormatting(text);
            // Try to extract specific JSON structures if needed
            if (opts.extractArrays) {
                const arrayResult = this.extractArray(cleanedText, opts);
                if (arrayResult) {
                    return arrayResult as unknown as T;
                }
            }
            // Try direct JSON parsing with fixes if enabled
            if (opts.applyFixes) {
                const fixedResult = this.extractWithFixes(cleanedText);
                if (fixedResult !== null) {
                    return fixedResult as T;
                }
            }
            // Try direct JSON parsing without fixes
            try {
                return JSON.parse(cleanedText) as T;
            } catch (e) {
                // Fall through to fallbacks
            }
            // Use fallbacks if enabled
            if (opts.useFallbacks) {
                if (opts.extractArrays) {
                    const items = this.extractItemsAsFallback(text, opts.minStringLength);
                    if (items.length > 0) {
                        return items as unknown as T;
                    }
                }
                // If it looks like a JSON object but can't be parsed, try regex extraction
                if (cleanedText.includes('{') && cleanedText.includes('}')) {
                    const objectResult = this.extractObject(cleanedText);
                    if (objectResult) {
                        return objectResult as T;
                    }
                }
            }
            return null;
        } catch (error) {
            log.error(`JSON extraction error: ${error}`);
            return null;
        }
    }
    /**
     * Extract tool calls from an LLM response
     * Specifically designed to handle Ollama tool call format
     *
     * @param text - Raw text from the LLM response
     * @returns Array of tool calls or empty array if none found
     */
    static extractToolCalls(text: string): ExtractedToolCall[] {
        const toolCalls: ExtractedToolCall[] = [];
        try {
            // Clean up the text and find all JSON objects
            const cleanedText = this.cleanMarkdownAndFormatting(text);
            // Try to find complete JSON objects
            const jsonObjectMatches = this.findJsonObjects(cleanedText);
            for (const jsonString of jsonObjectMatches) {
                try {
                    // Try to fix and parse each potential JSON object
                    const fixedJson = this.applyJsonFixes(jsonString);
                    const parsedJson = JSON.parse(fixedJson);
                    // Check if this looks like a tool call
                    if (
                        parsedJson &&
                        typeof parsedJson === 'object' &&
                        parsedJson.tool_name &&
                        typeof parsedJson.tool_name === 'string' &&
                        parsedJson.parameters &&
                        typeof parsedJson.parameters === 'object'
                    ) {
                        toolCalls.push({
                            tool_name: parsedJson.tool_name,
                            parameters: parsedJson.parameters,
                            originalJson: jsonString
                        });
                    }
                } catch (e) {
                    // If this JSON object failed to parse, try more aggressive fixes
                    log.info(`Failed to parse potential tool call JSON: ${e}`);
                }
            }
            // If we couldn't find valid tool calls with the first approach, try regex pattern matching
            if (toolCalls.length === 0) {
                // Look for tool_name/parameters patterns in the text
                const toolNameMatch = text.match(/["']?tool_name["']?\s*:\s*["']([^"']+)["']/);
                const parametersMatch = text.match(/["']?parameters["']?\s*:\s*({[^}]+})/);
                if (toolNameMatch && parametersMatch) {
                    try {
                        const toolName = toolNameMatch[1];
                        const parametersStr = this.applyJsonFixes(parametersMatch[1]);
                        const parameters = JSON.parse(parametersStr);
                        toolCalls.push({
                            tool_name: toolName,
                            parameters,
                            originalJson: `{"tool_name":"${toolName}","parameters":${parametersStr}}`
                        });
                    } catch (e) {
                        log.info(`Failed to parse tool call with regex approach: ${e}`);
                    }
                }
            }
        } catch (error) {
            log.error(`Error extracting tool calls: ${error}`);
        }
        return toolCalls;
    }
    /**
     * Find all potential JSON objects in a text
     */
    private static findJsonObjects(text: string): string[] {
        const jsonObjects: string[] = [];
        let bracesCount = 0;
        let currentObject = '';
        let insideObject = false;
        // Scan through text character by character
        for (let i = 0; i < text.length; i++) {
            const char = text[i];
            if (char === '{') {
                bracesCount++;
                if (!insideObject) {
                    insideObject = true;
                    currentObject = '{';
                } else {
                    currentObject += char;
                }
            } else if (char === '}') {
                bracesCount--;
                currentObject += char;
                if (bracesCount === 0 && insideObject) {
                    jsonObjects.push(currentObject);
                    currentObject = '';
                    insideObject = false;
                }
            } else if (insideObject) {
                currentObject += char;
            }
        }
        return jsonObjects;
    }
    /**
     * Clean Markdown formatting and special characters from text
     */
    private static cleanMarkdownAndFormatting(text: string): string {
        return text
            .replace(/```(?:json)?|```/g, '') // Remove code block markers
            .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
            .trim();
    }
    /**
     * Extract an array from text using regex and pattern matching
     */
    private static extractArray(text: string, options: JsonExtractionOptions): string[] | null {
        // First attempt: Find JSON arrays via regex
        const arrayPattern = /\[((?:"(?:\\.|[^"\\])*"(?:\s*,\s*)?)+)\]/g;
        const matches = [...text.matchAll(arrayPattern)];
        if (matches.length > 0) {
            // Take the first complete array match
            const arrayContent = matches[0][1];
            // Extract all properly quoted strings from the array
            const stringPattern = /"((?:\\.|[^"\\])*)"/g;
            const stringMatches = [...arrayContent.matchAll(stringPattern)];
            if (stringMatches.length > 0) {
                const items = stringMatches
                    .map(m => m[1].trim())
                    .filter(s => s.length >= (options.minStringLength || 3));
                if (items.length > 0) {
                    log.info(`Successfully extracted ${items.length} items using regex pattern`);
                    return items;
                }
            }
        }
        // Second attempt: Try to extract array via standard JSON parsing with fixes
        if (text.includes('[') && text.includes(']')) {
            const arrayMatch = text.match(/\[[\s\S]*\]/);
            if (arrayMatch) {
                const arrayText = this.applyJsonFixes(arrayMatch[0]);
                try {
                    const array = JSON.parse(arrayText);
                    if (Array.isArray(array) && array.length > 0) {
                        const items = array
                            .map(item => typeof item === 'string' ? item : String(item))
                            .filter(s => s.length >= (options.minStringLength || 3));
                        if (items.length > 0) {
                            log.info(`Successfully parsed JSON array with ${items.length} items`);
                            return items;
                        }
                    }
                } catch (e) {
                    // Fall through to fallbacks
                }
            }
        }
        return null;
    }
    /**
     * Extract a JSON object using regex and pattern matching
     */
    private static extractObject(text: string): Record<string, any> | null {
        const objectMatch = text.match(/{[\s\S]*}/);
        if (!objectMatch) return null;
        const objectText = this.applyJsonFixes(objectMatch[0]);
        try {
            const parsed = JSON.parse(objectText);
            return parsed;
        } catch (e) {
            return null;
        }
    }
    /**
     * Apply fixes to malformed JSON text
     */
    private static applyJsonFixes(text: string): string {
        let fixed = text;
        // Fix common JSON formatting issues - replace newlines inside the JSON
        fixed = fixed.replace(/\r?\n/g, ' ');
        // Fix unclosed quotes - replace trailing commas before closing brackets
        fixed = fixed.replace(/,\s*]/g, ']');
        fixed = fixed.replace(/,\s*}/g, '}');
        // Fix quotes inside strings
        fixed = fixed.replace(/"([^"]*)"([^"]*)"([^"]*)"/g, '"$1\'$2\'$3"');
        // Fix missing commas between elements
        fixed = fixed.replace(/"([^"]*)"(?:\s+)"([^"]*)"/g, '"$1", "$2"');
        // Fix missing commas in arrays (quotes with only spaces between them)
        fixed = fixed.replace(/"([^"]*)"\s+"/g, '"$1", "');
        // Fix unclosed quotes before commas
        fixed = fixed.replace(/"([^"]*),\s*(?="|])/g, '"$1", ');
        return fixed;
    }
    /**
     * Extract with fixes and direct JSON parsing
     */
    private static extractWithFixes(text: string): any | null {
        try {
            const fixed = this.applyJsonFixes(text);
            return JSON.parse(fixed);
        } catch (e) {
            return null;
        }
    }
    /**
     * Extract items as a fallback using various patterns
     */
    private static extractItemsAsFallback(text: string, minLength: number = 3): string[] {
        const patterns = [
            /(?:^|\n)["'](.+?)["'](?:,|\n|$)/g,       // Quoted strings
            /(?:^|\n)\[["'](.+?)["']\](?:,|\n|$)/g,   // Single item arrays
            /(?:^|\n)(\d+\.\s*.+?)(?:\n|$)/g,         // Numbered list items
            /(?:^|\n)[-*•]\s*(.+?)(?:\n|$)/g          // Bullet list items
        ];
        const extractedItems = new Set<string>();
        // Try each pattern
        for (const pattern of patterns) {
            const matches = [...text.matchAll(pattern)];
            for (const match of matches) {
                if (match[1] && match[1].trim().length >= minLength) {
                    extractedItems.add(match[1].trim());
                }
            }
        }
        // Try line-by-line extraction as last resort
        if (extractedItems.size === 0) {
            const lines = text.split('\n')
                .map(line => line.trim())
                .filter(line =>
                    line.length >= minLength &&
                    !line.startsWith('```') &&
                    !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
                    !line.match(/^\[|\]$/) // Skip lines that are just brackets
                );
            for (const line of lines) {
                // Remove common formatting
                const cleaned = line
                    .replace(/^\d+\.?\s*/, '') // Remove numbered list markers
                    .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
                    .replace(/^["']|["']$/g, '') // Remove surrounding quotes
                    .trim();
                if (cleaned.length >= minLength) {
                    extractedItems.add(cleaned);
                }
            }
        }
        return Array.from(extractedItems);
    }
 }
 export default JsonExtractor;