From ed52d7172969ce5ad2b88464ff42d1145b122850 Mon Sep 17 00:00:00 2001
From: perf3ct <jonfuller2012@gmail.com>
Date: Tue, 1 Apr 2025 21:42:09 +0000
Subject: [PATCH] do a better job at centralizing json extraction, and query
 "enhancer" search queries

---
 src/services/llm/ai_interface.ts              |   3 +
 .../llm/context/modules/query_enhancer.ts     |  99 ++---
 .../llm/formatters/ollama_formatter.ts        |  26 +-
 .../pipeline/interfaces/message_formatter.ts  |  84 ++--
 src/services/llm/providers/ollama_service.ts  |  32 +-
 src/services/llm/utils/json_extractor.ts      | 387 ++++++++++++++++++
 6 files changed, 507 insertions(+), 124 deletions(-)
 create mode 100644 src/services/llm/utils/json_extractor.ts

diff --git a/src/services/llm/ai_interface.ts b/src/services/llm/ai_interface.ts
index c539f33a8..e93824aab 100644
--- a/src/services/llm/ai_interface.ts
+++ b/src/services/llm/ai_interface.ts
@@ -23,6 +23,9 @@ export interface ChatCompletionOptions {
     presencePenalty?: number;
     showThinking?: boolean;
     systemPrompt?: string;
+    preserveSystemPrompt?: boolean; // Whether to preserve existing system message
+    bypassFormatter?: boolean; // Whether to bypass the message formatter entirely
+    expectsJsonResponse?: boolean; // Whether this request expects a JSON response
     stream?: boolean; // Whether to stream the response
 }
 
diff --git a/src/services/llm/context/modules/query_enhancer.ts b/src/services/llm/context/modules/query_enhancer.ts
index 56453675c..e2f2eeefe 100644
--- a/src/services/llm/context/modules/query_enhancer.ts
+++ b/src/services/llm/context/modules/query_enhancer.ts
@@ -4,6 +4,7 @@ import type { Message } from '../../ai_interface.js';
 import { CONTEXT_PROMPTS } from '../../constants/llm_prompt_constants.js';
 import type { LLMServiceInterface } from '../../interfaces/agent_tool_interfaces.js';
 import type { IQueryEnhancer } from '../../interfaces/context_interfaces.js';
+import JsonExtractor from '../../utils/json_extractor.js';
 
 /**
  * Provides utilities for enhancing queries and generating search queries
@@ -12,6 +13,15 @@ export class QueryEnhancer implements IQueryEnhancer {
     // Use the centralized query enhancer prompt
     private metaPrompt = CONTEXT_PROMPTS.QUERY_ENHANCER;
 
+    /**
+     * Get enhanced prompt with JSON formatting instructions
+     */
+    private getEnhancedPrompt(): string {
+        return `${this.metaPrompt}
+IMPORTANT: You must respond with valid JSON arrays. Always include commas between array elements.
+Format your answer as a valid JSON array without markdown code blocks, like this: ["item1", "item2", "item3"]`;
+    }
+
     /**
      * Generate search queries to find relevant information for the user question
      *
@@ -32,95 +42,38 @@ export class QueryEnhancer implements IQueryEnhancer {
             }
 
             const messages: Message[] = [
-                { role: "system", content: this.metaPrompt },
+                { role: "system", content: this.getEnhancedPrompt() },
                 { role: "user", content: userQuestion }
             ];
 
             const options = {
                 temperature: 0.3,
-                maxTokens: 300
+                maxTokens: 300,
+                bypassFormatter: true, // Completely bypass formatter for query enhancement
+                expectsJsonResponse: true // Explicitly request JSON-formatted response
             };
 
             // Get the response from the LLM
             const response = await llmService.generateChatCompletion(messages, options);
             const responseText = response.text; // Extract the text from the response object
 
-            try {
-                // Remove code blocks, quotes, and clean up the response text
-                let jsonStr = responseText
-                    .replace(/```(?:json)?|```/g, '') // Remove code block markers
-                    .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
-                    .trim();
+            // Use the JsonExtractor to parse the response
+            const queries = JsonExtractor.extract<string[]>(responseText, {
+                extractArrays: true,
+                minStringLength: 3,
+                applyFixes: true,
+                useFallbacks: true
+            });
 
-                // Check if the text might contain a JSON array (has square brackets)
-                if (jsonStr.includes('[') && jsonStr.includes(']')) {
-                    // Extract just the array part if there's explanatory text
-                    const arrayMatch = jsonStr.match(/\[[\s\S]*\]/);
-                    if (arrayMatch) {
-                        jsonStr = arrayMatch[0];
-                    }
-
-                    // Try to parse the JSON
-                    try {
-                        const queries = JSON.parse(jsonStr);
-                        if (Array.isArray(queries) && queries.length > 0) {
-                            const result = queries.map(q => typeof q === 'string' ? q : String(q)).filter(Boolean);
-                            cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
-                            return result;
-                        }
-                    } catch (innerError) {
-                        // If parsing fails, log it and continue to the fallback
-                        log.info(`JSON parse error: ${innerError}. Will use fallback parsing for: ${jsonStr}`);
-                    }
-                }
-
-                // Fallback 1: Try to extract an array manually by splitting on commas between quotes
-                if (jsonStr.includes('[') && jsonStr.includes(']')) {
-                    const arrayContent = jsonStr.substring(
-                        jsonStr.indexOf('[') + 1,
-                        jsonStr.lastIndexOf(']')
-                    );
-
-                    // Use regex to match quoted strings, handling escaped quotes
-                    const stringMatches = arrayContent.match(/"((?:\\.|[^"\\])*)"/g);
-                    if (stringMatches && stringMatches.length > 0) {
-                        const result = stringMatches
-                            .map((m: string) => m.substring(1, m.length - 1)) // Remove surrounding quotes
-                            .filter((s: string) => s.length > 0);
-                        cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
-                        return result;
-                    }
-                }
-
-                // Fallback 2: Extract queries line by line
-                const lines = responseText.split('\n')
-                    .map((line: string) => line.trim())
-                    .filter((line: string) =>
-                        line.length > 0 &&
-                        !line.startsWith('```') &&
-                        !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
-                        !line.match(/^\[|\]$/) // Skip lines that are just brackets
-                    );
-
-                if (lines.length > 0) {
-                    // Remove numbering, quotes and other list markers from each line
-                    const result = lines.map((line: string) => {
-                        return line
-                            .replace(/^\d+\.?\s*/, '') // Remove numbered list markers (1., 2., etc)
-                            .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
-                            .replace(/^["']|["']$/g, '') // Remove surrounding quotes
-                            .trim();
-                    }).filter((s: string) => s.length > 0);
-
-                    cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, result);
-                    return result;
-                }
-            } catch (parseError) {
-                log.error(`Error parsing search queries: ${parseError}`);
+            if (queries && queries.length > 0) {
+                log.info(`Extracted ${queries.length} queries using JsonExtractor`);
+                cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, queries);
+                return queries;
             }
 
             // If all else fails, just use the original question
             const fallback = [userQuestion];
+            log.info(`No queries extracted, using fallback: "${userQuestion}"`);
             cacheManager.storeQueryResults(`searchQueries:${userQuestion}`, fallback);
             return fallback;
         } catch (error: unknown) {
diff --git a/src/services/llm/formatters/ollama_formatter.ts b/src/services/llm/formatters/ollama_formatter.ts
index 15216112c..91090cdd0 100644
--- a/src/services/llm/formatters/ollama_formatter.ts
+++ b/src/services/llm/formatters/ollama_formatter.ts
@@ -23,22 +23,30 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
 
     /**
      * Format messages for the Ollama API
+     * @param messages Messages to format
+     * @param systemPrompt Optional system prompt to use
+     * @param context Optional context to include
+     * @param preserveSystemPrompt When true, preserves existing system messages rather than replacing them
      */
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
         const formattedMessages: Message[] = [];
 
         // First identify user and system messages
         const systemMessages = messages.filter(msg => msg.role === 'system');
         const userMessages = messages.filter(msg => msg.role === 'user' || msg.role === 'assistant');
 
-        // Create base system message with instructions or use default
-        const basePrompt = systemPrompt || PROVIDER_PROMPTS.COMMON.DEFAULT_ASSISTANT_INTRO;
-
-        // Always add a system message with the base prompt
-        formattedMessages.push({
-            role: 'system',
-            content: basePrompt
-        });
+        // Determine if we should preserve the existing system message
+        if (preserveSystemPrompt && systemMessages.length > 0) {
+            // Preserve the existing system message
+            formattedMessages.push(systemMessages[0]);
+        } else {
+            // Use provided systemPrompt or default
+            const basePrompt = systemPrompt || PROVIDER_PROMPTS.COMMON.DEFAULT_ASSISTANT_INTRO;
+            formattedMessages.push({
+                role: 'system',
+                content: basePrompt
+            });
+        }
 
         // If we have context, inject it into the first user message
         if (context && userMessages.length > 0) {
diff --git a/src/services/llm/pipeline/interfaces/message_formatter.ts b/src/services/llm/pipeline/interfaces/message_formatter.ts
index c092eceef..9fc9f19f4 100644
--- a/src/services/llm/pipeline/interfaces/message_formatter.ts
+++ b/src/services/llm/pipeline/interfaces/message_formatter.ts
@@ -9,9 +9,10 @@ export interface MessageFormatter {
      * @param messages Original messages
      * @param systemPrompt Optional system prompt to override
      * @param context Optional context to include
+     * @param preserveSystemPrompt Optional flag to preserve existing system prompt
      * @returns Formatted messages optimized for the specific provider
      */
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[];
 }
 
 /**
@@ -22,15 +23,15 @@ export abstract class BaseMessageFormatter implements MessageFormatter {
      * Format messages with system prompt and context
      * Each provider should override this method with their specific formatting strategy
      */
-    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[];
-    
+    abstract formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[];
+
     /**
      * Helper method to extract existing system message from messages
      */
     protected getSystemMessage(messages: Message[]): Message | undefined {
         return messages.find(msg => msg.role === 'system');
     }
-    
+
     /**
      * Helper method to create a copy of messages without system message
      */
@@ -44,22 +45,26 @@ export abstract class BaseMessageFormatter implements MessageFormatter {
  * Optimizes message format for OpenAI models (GPT-3.5, GPT-4, etc.)
  */
 export class OpenAIMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
         const formattedMessages: Message[] = [];
-        
+
         // OpenAI performs best with system message first, then context as a separate system message
         // or appended to the original system message
-        
+
         // Handle system message
         const existingSystem = this.getSystemMessage(messages);
-        if (systemPrompt || existingSystem) {
+
+        if (preserveSystemPrompt && existingSystem) {
+            // Use the existing system message
+            formattedMessages.push(existingSystem);
+        } else if (systemPrompt || existingSystem) {
             const systemContent = systemPrompt || existingSystem?.content || '';
             formattedMessages.push({
                 role: 'system',
                 content: systemContent
             });
         }
-        
+
         // Add context as a system message with clear instruction
         if (context) {
             formattedMessages.push({
@@ -67,10 +72,10 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
                 content: `Please use the following context to respond to the user's messages:\n\n${context}`
             });
         }
-        
+
         // Add remaining messages (excluding system)
         formattedMessages.push(...this.getMessagesWithoutSystem(messages));
-        
+
         return formattedMessages;
     }
 }
@@ -80,24 +85,26 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
  * Optimizes message format for Claude models
  */
 export class AnthropicMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
         const formattedMessages: Message[] = [];
-        
+
         // Anthropic performs best with a specific XML-like format for context and system instructions
-        
+
         // Create system message with combined prompt and context if any
         let systemContent = '';
         const existingSystem = this.getSystemMessage(messages);
-        
-        if (systemPrompt || existingSystem) {
+
+        if (preserveSystemPrompt && existingSystem) {
+            systemContent = existingSystem.content;
+        } else if (systemPrompt || existingSystem) {
             systemContent = systemPrompt || existingSystem?.content || '';
         }
-        
+
         // For Claude, wrap context in XML tags for clear separation
         if (context) {
             systemContent += `\n\n<context>\n${context}\n</context>`;
         }
-        
+
         // Add system message if we have content
         if (systemContent) {
             formattedMessages.push({
@@ -105,10 +112,10 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
                 content: systemContent
             });
         }
-        
+
         // Add remaining messages (excluding system)
         formattedMessages.push(...this.getMessagesWithoutSystem(messages));
-        
+
         return formattedMessages;
     }
 }
@@ -118,25 +125,25 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
  * Optimizes message format for open-source models
  */
 export class OllamaMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
         const formattedMessages: Message[] = [];
-        
+
         // Ollama format is closer to raw prompting and typically works better with
         // context embedded in system prompt rather than as separate messages
-        
+
         // Build comprehensive system prompt
         let systemContent = '';
         const existingSystem = this.getSystemMessage(messages);
-        
+
         if (systemPrompt || existingSystem) {
             systemContent = systemPrompt || existingSystem?.content || '';
         }
-        
+
         // Add context to system prompt
         if (context) {
             systemContent += `\n\nReference information:\n${context}`;
         }
-        
+
         // Add system message if we have content
         if (systemContent) {
             formattedMessages.push({
@@ -144,10 +151,10 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
                 content: systemContent
             });
         }
-        
+
         // Add remaining messages (excluding system)
         formattedMessages.push(...this.getMessagesWithoutSystem(messages));
-        
+
         return formattedMessages;
     }
 }
@@ -156,19 +163,22 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
  * Default message formatter when provider is unknown
  */
 export class DefaultMessageFormatter extends BaseMessageFormatter {
-    formatMessages(messages: Message[], systemPrompt?: string, context?: string): Message[] {
+    formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
         const formattedMessages: Message[] = [];
-        
+
         // Handle system message
         const existingSystem = this.getSystemMessage(messages);
-        if (systemPrompt || existingSystem) {
+
+        if (preserveSystemPrompt && existingSystem) {
+            formattedMessages.push(existingSystem);
+        } else if (systemPrompt || existingSystem) {
             const systemContent = systemPrompt || existingSystem?.content || '';
             formattedMessages.push({
                 role: 'system',
                 content: systemContent
             });
         }
-        
+
         // Add context as a user message
         if (context) {
             formattedMessages.push({
@@ -176,10 +186,10 @@ export class DefaultMessageFormatter extends BaseMessageFormatter {
                 content: `Here is context to help you answer my questions: ${context}`
             });
         }
-        
+
         // Add user/assistant messages
         formattedMessages.push(...this.getMessagesWithoutSystem(messages));
-        
+
         return formattedMessages;
     }
 }
@@ -194,7 +204,7 @@ export class MessageFormatterFactory {
         ollama: new OllamaMessageFormatter(),
         default: new DefaultMessageFormatter()
     };
-    
+
     /**
      * Get the appropriate formatter for a provider
      * @param provider Provider name
@@ -203,7 +213,7 @@ export class MessageFormatterFactory {
     static getFormatter(provider: string): MessageFormatter {
         return this.formatters[provider] || this.formatters.default;
     }
-    
+
     /**
      * Register a custom formatter for a provider
      * @param provider Provider name
@@ -212,4 +222,4 @@ export class MessageFormatterFactory {
     static registerFormatter(provider: string, formatter: MessageFormatter): void {
         this.formatters[provider] = formatter;
     }
-}
\ No newline at end of file
+}
diff --git a/src/services/llm/providers/ollama_service.ts b/src/services/llm/providers/ollama_service.ts
index d8aab4598..9dddc3e1b 100644
--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@@ -48,10 +48,30 @@ export class OllamaService extends BaseAIService {
         const systemPrompt = this.getSystemPrompt(opts.systemPrompt || options.getOption('aiSystemPrompt'));
 
         try {
-            // Use the formatter to prepare messages
-            const formattedMessages = this.formatter.formatMessages(messages, systemPrompt);
+            // Determine whether to use the formatter or send messages directly
+            let messagesToSend: Message[];
 
-            console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(formattedMessages, null, 2));
+            if (opts.bypassFormatter) {
+                // Bypass the formatter entirely - use messages as is
+                messagesToSend = [...messages];
+                console.log(`Bypassing formatter for Ollama request with ${messages.length} messages`);
+            } else {
+                // Use the formatter to prepare messages
+                messagesToSend = this.formatter.formatMessages(
+                    messages,
+                    systemPrompt,
+                    undefined, // context
+                    opts.preserveSystemPrompt
+                );
+                console.log(`Sending to Ollama with formatted messages:`, JSON.stringify(messagesToSend, null, 2));
+            }
+
+            // Check if this is a request that expects JSON response
+            const expectsJsonResponse = opts.expectsJsonResponse || false;
+
+            if (expectsJsonResponse) {
+                console.log(`Request expects JSON response, adding response_format parameter`);
+            }
 
             const response = await fetch(`${apiBase}/api/chat`, {
                 method: 'POST',
@@ -60,9 +80,11 @@ export class OllamaService extends BaseAIService {
                 },
                 body: JSON.stringify({
                     model,
-                    messages: formattedMessages,
+                    messages: messagesToSend,
                     options: {
-                        temperature
+                        temperature,
+                        // Add response_format for requests that expect JSON
+                        ...(expectsJsonResponse ? { response_format: { type: "json_object" } } : {})
                     },
                     stream: false
                 })
diff --git a/src/services/llm/utils/json_extractor.ts b/src/services/llm/utils/json_extractor.ts
new file mode 100644
index 000000000..5196544c3
--- /dev/null
+++ b/src/services/llm/utils/json_extractor.ts
@@ -0,0 +1,387 @@
+import log from '../../log.js';
+
+/**
+ * Options for JSON extraction
+ */
+export interface JsonExtractionOptions {
+    /** Attempt to find and extract arrays as the primary target (for query enhancers, etc.) */
+    extractArrays?: boolean;
+    /** Minimum length for extracted strings to be considered valid */
+    minStringLength?: number;
+    /** Apply fixes to malformed JSON before parsing */
+    applyFixes?: boolean;
+    /** Whether to use fallback extraction methods when JSON parsing fails */
+    useFallbacks?: boolean;
+}
+
+/**
+ * Structure of a tool call extracted from an LLM response
+ */
+export interface ExtractedToolCall {
+    /** The name of the tool to call */
+    tool_name: string;
+    /** Parameters for the tool call */
+    parameters: Record<string, any>;
+    /** The original JSON string that was parsed */
+    originalJson?: string;
+}
+
+/**
+ * Utility class for extracting and parsing JSON from LLM responses
+ * Handles malformed JSON, escaping issues, and provides fallback mechanisms
+ */
+export class JsonExtractor {
+    /**
+     * Extract JSON from an LLM response
+     *
+     * @param text - The raw text from an LLM response
+     * @param options - Options to control extraction behavior
+     * @returns The parsed JSON object or array, or null if extraction failed
+     */
+    static extract<T = any>(text: string, options: JsonExtractionOptions = {}): T | null {
+        const opts = {
+            extractArrays: false,
+            minStringLength: 3,
+            applyFixes: true,
+            useFallbacks: true,
+            ...options
+        };
+
+        try {
+            // Clean up the input text
+            let cleanedText = this.cleanMarkdownAndFormatting(text);
+
+            // Try to extract specific JSON structures if needed
+            if (opts.extractArrays) {
+                const arrayResult = this.extractArray(cleanedText, opts);
+                if (arrayResult) {
+                    return arrayResult as unknown as T;
+                }
+            }
+
+            // Try direct JSON parsing with fixes if enabled
+            if (opts.applyFixes) {
+                const fixedResult = this.extractWithFixes(cleanedText);
+                if (fixedResult !== null) {
+                    return fixedResult as T;
+                }
+            }
+
+            // Try direct JSON parsing without fixes
+            try {
+                return JSON.parse(cleanedText) as T;
+            } catch (e) {
+                // Fall through to fallbacks
+            }
+
+            // Use fallbacks if enabled
+            if (opts.useFallbacks) {
+                if (opts.extractArrays) {
+                    const items = this.extractItemsAsFallback(text, opts.minStringLength);
+                    if (items.length > 0) {
+                        return items as unknown as T;
+                    }
+                }
+
+                // If it looks like a JSON object but can't be parsed, try regex extraction
+                if (cleanedText.includes('{') && cleanedText.includes('}')) {
+                    const objectResult = this.extractObject(cleanedText);
+                    if (objectResult) {
+                        return objectResult as T;
+                    }
+                }
+            }
+
+            return null;
+        } catch (error) {
+            log.error(`JSON extraction error: ${error}`);
+            return null;
+        }
+    }
+
+    /**
+     * Extract tool calls from an LLM response
+     * Specifically designed to handle Ollama tool call format
+     *
+     * @param text - Raw text from the LLM response
+     * @returns Array of tool calls or empty array if none found
+     */
+    static extractToolCalls(text: string): ExtractedToolCall[] {
+        const toolCalls: ExtractedToolCall[] = [];
+
+        try {
+            // Clean up the text and find all JSON objects
+            const cleanedText = this.cleanMarkdownAndFormatting(text);
+
+            // Try to find complete JSON objects
+            const jsonObjectMatches = this.findJsonObjects(cleanedText);
+
+            for (const jsonString of jsonObjectMatches) {
+                try {
+                    // Try to fix and parse each potential JSON object
+                    const fixedJson = this.applyJsonFixes(jsonString);
+                    const parsedJson = JSON.parse(fixedJson);
+
+                    // Check if this looks like a tool call
+                    if (
+                        parsedJson &&
+                        typeof parsedJson === 'object' &&
+                        parsedJson.tool_name &&
+                        typeof parsedJson.tool_name === 'string' &&
+                        parsedJson.parameters &&
+                        typeof parsedJson.parameters === 'object'
+                    ) {
+                        toolCalls.push({
+                            tool_name: parsedJson.tool_name,
+                            parameters: parsedJson.parameters,
+                            originalJson: jsonString
+                        });
+                    }
+                } catch (e) {
+                    // If this JSON object failed to parse, try more aggressive fixes
+                    log.info(`Failed to parse potential tool call JSON: ${e}`);
+                }
+            }
+
+            // If we couldn't find valid tool calls with the first approach, try regex pattern matching
+            if (toolCalls.length === 0) {
+                // Look for tool_name/parameters patterns in the text
+                const toolNameMatch = text.match(/["']?tool_name["']?\s*:\s*["']([^"']+)["']/);
+                const parametersMatch = text.match(/["']?parameters["']?\s*:\s*({[^}]+})/);
+
+                if (toolNameMatch && parametersMatch) {
+                    try {
+                        const toolName = toolNameMatch[1];
+                        const parametersStr = this.applyJsonFixes(parametersMatch[1]);
+                        const parameters = JSON.parse(parametersStr);
+
+                        toolCalls.push({
+                            tool_name: toolName,
+                            parameters,
+                            originalJson: `{"tool_name":"${toolName}","parameters":${parametersStr}}`
+                        });
+                    } catch (e) {
+                        log.info(`Failed to parse tool call with regex approach: ${e}`);
+                    }
+                }
+            }
+        } catch (error) {
+            log.error(`Error extracting tool calls: ${error}`);
+        }
+
+        return toolCalls;
+    }
+
+    /**
+     * Find all potential JSON objects in a text
+     */
+    private static findJsonObjects(text: string): string[] {
+        const jsonObjects: string[] = [];
+        let bracesCount = 0;
+        let currentObject = '';
+        let insideObject = false;
+
+        // Scan through text character by character
+        for (let i = 0; i < text.length; i++) {
+            const char = text[i];
+
+            if (char === '{') {
+                bracesCount++;
+                if (!insideObject) {
+                    insideObject = true;
+                    currentObject = '{';
+                } else {
+                    currentObject += char;
+                }
+            } else if (char === '}') {
+                bracesCount--;
+                currentObject += char;
+
+                if (bracesCount === 0 && insideObject) {
+                    jsonObjects.push(currentObject);
+                    currentObject = '';
+                    insideObject = false;
+                }
+            } else if (insideObject) {
+                currentObject += char;
+            }
+        }
+
+        return jsonObjects;
+    }
+
+    /**
+     * Clean Markdown formatting and special characters from text
+     */
+    private static cleanMarkdownAndFormatting(text: string): string {
+        return text
+            .replace(/```(?:json)?|```/g, '') // Remove code block markers
+            .replace(/[\u201C\u201D]/g, '"')  // Replace smart quotes with straight quotes
+            .trim();
+    }
+
+    /**
+     * Extract an array from text using regex and pattern matching
+     */
+    private static extractArray(text: string, options: JsonExtractionOptions): string[] | null {
+        // First attempt: Find JSON arrays via regex
+        const arrayPattern = /\[((?:"(?:\\.|[^"\\])*"(?:\s*,\s*)?)+)\]/g;
+        const matches = [...text.matchAll(arrayPattern)];
+
+        if (matches.length > 0) {
+            // Take the first complete array match
+            const arrayContent = matches[0][1];
+
+            // Extract all properly quoted strings from the array
+            const stringPattern = /"((?:\\.|[^"\\])*)"/g;
+            const stringMatches = [...arrayContent.matchAll(stringPattern)];
+
+            if (stringMatches.length > 0) {
+                const items = stringMatches
+                    .map(m => m[1].trim())
+                    .filter(s => s.length >= (options.minStringLength || 3));
+
+                if (items.length > 0) {
+                    log.info(`Successfully extracted ${items.length} items using regex pattern`);
+                    return items;
+                }
+            }
+        }
+
+        // Second attempt: Try to extract array via standard JSON parsing with fixes
+        if (text.includes('[') && text.includes(']')) {
+            const arrayMatch = text.match(/\[[\s\S]*\]/);
+            if (arrayMatch) {
+                const arrayText = this.applyJsonFixes(arrayMatch[0]);
+
+                try {
+                    const array = JSON.parse(arrayText);
+                    if (Array.isArray(array) && array.length > 0) {
+                        const items = array
+                            .map(item => typeof item === 'string' ? item : String(item))
+                            .filter(s => s.length >= (options.minStringLength || 3));
+
+                        if (items.length > 0) {
+                            log.info(`Successfully parsed JSON array with ${items.length} items`);
+                            return items;
+                        }
+                    }
+                } catch (e) {
+                    // Fall through to fallbacks
+                }
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Extract a JSON object using regex and pattern matching
+     */
+    private static extractObject(text: string): Record<string, any> | null {
+        const objectMatch = text.match(/{[\s\S]*}/);
+        if (!objectMatch) return null;
+
+        const objectText = this.applyJsonFixes(objectMatch[0]);
+
+        try {
+            const parsed = JSON.parse(objectText);
+            return parsed;
+        } catch (e) {
+            return null;
+        }
+    }
+
+    /**
+     * Apply fixes to malformed JSON text
+     */
+    private static applyJsonFixes(text: string): string {
+        let fixed = text;
+
+        // Fix common JSON formatting issues - replace newlines inside the JSON
+        fixed = fixed.replace(/\r?\n/g, ' ');
+
+        // Fix unclosed quotes - replace trailing commas before closing brackets
+        fixed = fixed.replace(/,\s*]/g, ']');
+        fixed = fixed.replace(/,\s*}/g, '}');
+
+        // Fix quotes inside strings
+        fixed = fixed.replace(/"([^"]*)"([^"]*)"([^"]*)"/g, '"$1\'$2\'$3"');
+
+        // Fix missing commas between elements
+        fixed = fixed.replace(/"([^"]*)"(?:\s+)"([^"]*)"/g, '"$1", "$2"');
+
+        // Fix missing commas in arrays (quotes with only spaces between them)
+        fixed = fixed.replace(/"([^"]*)"\s+"/g, '"$1", "');
+
+        // Fix unclosed quotes before commas
+        fixed = fixed.replace(/"([^"]*),\s*(?="|])/g, '"$1", ');
+
+        return fixed;
+    }
+
+    /**
+     * Extract with fixes and direct JSON parsing
+     */
+    private static extractWithFixes(text: string): any | null {
+        try {
+            const fixed = this.applyJsonFixes(text);
+            return JSON.parse(fixed);
+        } catch (e) {
+            return null;
+        }
+    }
+
+    /**
+     * Extract items as a fallback using various patterns
+     */
+    private static extractItemsAsFallback(text: string, minLength: number = 3): string[] {
+        const patterns = [
+            /(?:^|\n)["'](.+?)["'](?:,|\n|$)/g,       // Quoted strings
+            /(?:^|\n)\[["'](.+?)["']\](?:,|\n|$)/g,   // Single item arrays
+            /(?:^|\n)(\d+\.\s*.+?)(?:\n|$)/g,         // Numbered list items
+            /(?:^|\n)[-*•]\s*(.+?)(?:\n|$)/g          // Bullet list items
+        ];
+
+        const extractedItems = new Set<string>();
+
+        // Try each pattern
+        for (const pattern of patterns) {
+            const matches = [...text.matchAll(pattern)];
+            for (const match of matches) {
+                if (match[1] && match[1].trim().length >= minLength) {
+                    extractedItems.add(match[1].trim());
+                }
+            }
+        }
+
+        // Try line-by-line extraction as last resort
+        if (extractedItems.size === 0) {
+            const lines = text.split('\n')
+                .map(line => line.trim())
+                .filter(line =>
+                    line.length >= minLength &&
+                    !line.startsWith('```') &&
+                    !line.match(/^\d+\.?\s*$/) && // Skip numbered list markers alone
+                    !line.match(/^\[|\]$/) // Skip lines that are just brackets
+                );
+
+            for (const line of lines) {
+                // Remove common formatting
+                const cleaned = line
+                    .replace(/^\d+\.?\s*/, '') // Remove numbered list markers
+                    .replace(/^[-*•]\s*/, '')  // Remove bullet list markers
+                    .replace(/^["']|["']$/g, '') // Remove surrounding quotes
+                    .trim();
+
+                if (cleaned.length >= minLength) {
+                    extractedItems.add(cleaned);
+                }
+            }
+        }
+
+        return Array.from(extractedItems);
+    }
+}
+
+export default JsonExtractor;