centralize constants for message formatting

2025-10-18 12:41:54 +08:00 · 2025-04-01 19:33:53 +00:00 · 2025-04-01 19:33:53 +00:00 · 9719859a39
commit 9719859a39
parent 154d2905fa
6 changed files with 256 additions and 172 deletions
--- a/src/public/app/widgets/llm_chat_panel.ts
+++ b/src/public/app/widgets/llm_chat_panel.ts
@ -805,7 +805,7 @@ export default class LlmChatPanel extends BasicWidget {
            const allPrecedenceEnabled = precedenceList.every((p: string) => enabledProviders.includes(p));

            // Get embedding queue status
-            const embeddingStats = await server.get('embeddings/stats') as {
+            const embeddingStats = await server.get('llm/embeddings/stats') as {
                success: boolean,
                stats: {
                    totalNotesCount: number;
--- a/src/services/llm/constants/formatter_constants.ts
+++ b/src/services/llm/constants/formatter_constants.ts
@ -0,0 +1,159 @@
+/**
+ * Formatter Constants
+ *
+ * Constants related to message formatters for different LLM providers.
+ * This centralizes string formatting patterns, HTML cleaning options,
+ * and other formatter-specific constants that were previously hardcoded.
+ */
+
+/**
+ * HTML tag allowlists for different formatter strictness levels
+ */
+export const HTML_ALLOWED_TAGS = {
+    // Standard set used by most formatters
+    STANDARD: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
+
+    // Minimal set for providers with limited HTML support
+    MINIMAL: ['b', 'i', 'p', 'br', 'a'],
+
+    // Empty set for plain text only (Ollama)
+    NONE: []
+};
+
+/**
+ * HTML attribute allowlists
+ */
+export const HTML_ALLOWED_ATTRIBUTES = {
+    // Standard set of allowed attributes
+    STANDARD: {
+        'a': ['href']
+    },
+
+    // Empty set for plain text only
+    NONE: {}
+};
+
+/**
+ * HTML tag transformations
+ */
+export const HTML_TRANSFORMS = {
+    // Standard transformations
+    STANDARD: {
+        'h1': 'h2',
+        'h2': 'h3',
+        'div': 'p',
+        'span': 'span'
+    }
+};
+
+/**
+ * RegEx patterns for HTML to Markdown conversion
+ */
+export const HTML_TO_MARKDOWN_PATTERNS = {
+    // Headings
+    HEADING_1: { pattern: /<h1[^>]*>(.*?)<\/h1>/gi, replacement: '# $1\n' },
+    HEADING_2: { pattern: /<h2[^>]*>(.*?)<\/h2>/gi, replacement: '## $1\n' },
+    HEADING_3: { pattern: /<h3[^>]*>(.*?)<\/h3>/gi, replacement: '### $1\n' },
+    HEADING_4: { pattern: /<h4[^>]*>(.*?)<\/h4>/gi, replacement: '#### $1\n' },
+    HEADING_5: { pattern: /<h5[^>]*>(.*?)<\/h5>/gi, replacement: '##### $1\n' },
+
+    // Paragraph and line breaks
+    PARAGRAPH: { pattern: /<p[^>]*>(.*?)<\/p>/gi, replacement: '$1\n\n' },
+    BREAK: { pattern: /<br[^>]*>/gi, replacement: '\n' },
+
+    // Links and formatting
+    LINK: { pattern: /<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, replacement: '[$2]($1)' },
+    STRONG: { pattern: /<strong[^>]*>(.*?)<\/strong>/gi, replacement: '**$1**' },
+    BOLD: { pattern: /<b[^>]*>(.*?)<\/b>/gi, replacement: '**$1**' },
+    EMPHASIS: { pattern: /<em[^>]*>(.*?)<\/em>/gi, replacement: '*$1*' },
+    ITALIC: { pattern: /<i[^>]*>(.*?)<\/i>/gi, replacement: '*$1*' },
+
+    // Code
+    INLINE_CODE: { pattern: /<code[^>]*>(.*?)<\/code>/gi, replacement: '`$1`' },
+    CODE_BLOCK: { pattern: /<pre[^>]*>(.*?)<\/pre>/gi, replacement: '```\n$1\n```' },
+
+    // Clean up
+    ANY_REMAINING_TAG: { pattern: /<[^>]*>/g, replacement: '' },
+    EXCESSIVE_NEWLINES: { pattern: /\n{3,}/g, replacement: '\n\n' }
+};
+
+/**
+ * HTML entity replacements
+ */
+export const HTML_ENTITY_REPLACEMENTS = {
+    // Common HTML entities
+    NBSP: { pattern: /&nbsp;/g, replacement: ' ' },
+    LT: { pattern: /&lt;/g, replacement: '<' },
+    GT: { pattern: /&gt;/g, replacement: '>' },
+    AMP: { pattern: /&amp;/g, replacement: '&' },
+    QUOT: { pattern: /&quot;/g, replacement: '"' },
+    APOS: { pattern: /&#39;/g, replacement: "'" },
+    LDQUO: { pattern: /&ldquo;/g, replacement: '"' },
+    RDQUO: { pattern: /&rdquo;/g, replacement: '"' },
+    LSQUO: { pattern: /&lsquo;/g, replacement: "'" },
+    RSQUO: { pattern: /&rsquo;/g, replacement: "'" },
+    MDASH: { pattern: /&mdash;/g, replacement: '—' },
+    NDASH: { pattern: /&ndash;/g, replacement: '–' },
+    HELLIP: { pattern: /&hellip;/g, replacement: '…' }
+};
+
+/**
+ * Encoding issue fixes
+ */
+export const ENCODING_FIXES = {
+    // Common encoding issues
+    BROKEN_QUOTES: { pattern: /Γ\u00c2[\u00a3\u00a5]/g, replacement: '"' },
+
+    // Character replacements for Unicode
+    UNICODE_REPLACEMENTS: {
+        '\u00A0': ' ',  // Non-breaking space
+        '\u2018': "'",  // Left single quote
+        '\u2019': "'",  // Right single quote
+        '\u201C': '"',  // Left double quote
+        '\u201D': '"',  // Right double quote
+        '\u2013': '-',  // En dash
+        '\u2014': '--', // Em dash
+        '\u2022': '*',  // Bullet
+        '\u2026': '...' // Ellipsis
+    }
+};
+
+/**
+ * Ollama-specific cleaning patterns
+ */
+export const OLLAMA_CLEANING = {
+    // Replace fancy quotes
+    QUOTES: { pattern: /[""]/g, replacement: '"' },
+    APOSTROPHES: { pattern: /['']/g, replacement: "'" },
+
+    // Replace other Unicode characters
+    DASHES: { pattern: /[–—]/g, replacement: '-' },
+    BULLETS: { pattern: /[•]/g, replacement: '*' },
+    ELLIPSES: { pattern: /[…]/g, replacement: '...' },
+
+    // Remove non-ASCII characters
+    NON_ASCII: { pattern: /[^\x00-\x7F]/g, replacement: '' },
+
+    // Normalize whitespace
+    WHITESPACE: { pattern: /\s+/g, replacement: ' ' },
+    NEWLINE_WHITESPACE: { pattern: /\n\s+/g, replacement: '\n' }
+};
+
+/**
+ * Console log messages for formatters
+ */
+export const FORMATTER_LOGS = {
+    ANTHROPIC: {
+        PROCESSED: (before: number, after: number) => `Anthropic formatter: ${before} messages → ${after} messages`
+    },
+    OPENAI: {
+        PROCESSED: (before: number, after: number) => `OpenAI formatter: ${before} messages → ${after} messages`
+    },
+    OLLAMA: {
+        PROCESSED: (before: number, after: number) => `Ollama formatter processed ${before} messages into ${after} messages`
+    },
+    ERROR: {
+        CONTEXT_CLEANING: (provider: string) => `Error cleaning content for ${provider}:`,
+        ENCODING: 'Error fixing encoding issues:'
+    }
+};
--- a/src/services/llm/formatters/anthropic_formatter.ts
+++ b/src/services/llm/formatters/anthropic_formatter.ts
@ -3,6 +3,13 @@ import type { Message } from '../ai_interface.js';
 import { BaseMessageFormatter } from './base_formatter.js';
 import { PROVIDER_PROMPTS } from '../constants/llm_prompt_constants.js';
 import { LLM_CONSTANTS } from '../constants/provider_constants.js';
+import {
+    HTML_ALLOWED_TAGS,
+    HTML_ALLOWED_ATTRIBUTES,
+    FORMATTER_LOGS,
+    HTML_TO_MARKDOWN_PATTERNS,
+    HTML_ENTITY_REPLACEMENTS
+} from '../constants/formatter_constants.js';

 /**
 * Anthropic-specific message formatter
@ -144,7 +151,7 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
            }
        }

-        console.log(`Anthropic formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
+        console.log(FORMATTER_LOGS.ANTHROPIC.PROCESSED(messages.length, formattedMessages.length));
        return formattedMessages;
    }

@ -158,52 +165,31 @@ export class AnthropicMessageFormatter extends BaseMessageFormatter {
        try {
            // Convert HTML to a Claude-friendly format
            const cleaned = sanitizeHtml(content, {
-                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
-                allowedAttributes: {
-                    'a': ['href']
-                }
+                allowedTags: HTML_ALLOWED_TAGS.STANDARD,
+                allowedAttributes: HTML_ALLOWED_ATTRIBUTES.STANDARD
            });

            // Convert to markdown but preserve some structure
-            let markdown = cleaned
-                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
-                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
-                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
-                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
-                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
-                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
-                .replace(/<br[^>]*>/gi, '\n')
-                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
-                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
-                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
-                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
-                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
-                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
-                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
-                // Process lists
-                .replace(/<ul[^>]*>(.*?)<\/ul>/gs, (match, content) => {
-                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
-                })
-                .replace(/<ol[^>]*>(.*?)<\/ol>/gs, (match, content) => {
-                    let index = 1;
-                    return content.replace(/<li[^>]*>(.*?)<\/li>/gi, (m: string, item: string) => {
-                        return `${index++}. ${item}\n`;
-                    });
-                })
-                // Clean up any remaining HTML tags
-                .replace(/<[^>]*>/g, '')
-                // Clean up excessive newlines
-                .replace(/\n{3,}/g, '\n\n')
-                // Fix common HTML entities
-                .replace(/&nbsp;/g, ' ')
-                .replace(/&lt;/g, '<')
-                .replace(/&gt;/g, '>')
-                .replace(/&amp;/g, '&')
-                .replace(/&quot;/g, '"');
+            let markdown = cleaned;
+
+            // Apply all standard HTML to Markdown patterns
+            const patterns = HTML_TO_MARKDOWN_PATTERNS;
+            for (const pattern of Object.values(patterns)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }
+
+            // Process lists - use the parent class method
+            markdown = this.processListItems(markdown);
+
+            // Fix common HTML entities
+            const entityPatterns = HTML_ENTITY_REPLACEMENTS;
+            for (const pattern of Object.values(entityPatterns)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }

            return markdown.trim();
        } catch (error) {
-            console.error("Error cleaning content for Anthropic:", error);
+            console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("Anthropic"), error);
            return content; // Return original if cleaning fails
        }
    }
--- a/src/services/llm/formatters/base_formatter.ts
+++ b/src/services/llm/formatters/base_formatter.ts
@ -2,6 +2,15 @@ import sanitizeHtml from 'sanitize-html';
 import type { Message } from '../ai_interface.js';
 import type { MessageFormatter } from '../interfaces/message_formatter.js';
 import { DEFAULT_SYSTEM_PROMPT, PROVIDER_PROMPTS } from '../constants/llm_prompt_constants.js';
+import {
+    HTML_ALLOWED_TAGS,
+    HTML_ALLOWED_ATTRIBUTES,
+    HTML_TRANSFORMS,
+    HTML_TO_MARKDOWN_PATTERNS,
+    HTML_ENTITY_REPLACEMENTS,
+    ENCODING_FIXES,
+    FORMATTER_LOGS
+} from '../constants/formatter_constants.js';

 /**
 * Base formatter with common functionality for all providers
@ -41,61 +50,32 @@ export abstract class BaseMessageFormatter implements MessageFormatter {

            // Convert HTML to markdown for better readability
            const cleaned = sanitizeHtml(fixedContent, {
-                allowedTags: ['b', 'i', 'em', 'strong', 'a', 'p', 'br', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'code', 'pre'],
-                allowedAttributes: {
-                    'a': ['href']
-                },
-                transformTags: {
-                    'h1': 'h2',
-                    'h2': 'h3',
-                    'div': 'p',
-                    'span': 'span'
-                }
+                allowedTags: HTML_ALLOWED_TAGS.STANDARD,
+                allowedAttributes: HTML_ALLOWED_ATTRIBUTES.STANDARD,
+                transformTags: HTML_TRANSFORMS.STANDARD
            });

            // Process inline elements to markdown
-            let markdown = cleaned
-                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
-                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
-                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
-                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
-                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
-                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
-                .replace(/<br[^>]*>/gi, '\n')
-                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
-                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
-                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
-                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
-                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
-                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
-                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
-                // Clean up any remaining HTML tags
-                .replace(/<[^>]*>/g, '')
-                // Clean up excessive newlines
-                .replace(/\n{3,}/g, '\n\n');
+            let markdown = cleaned;
+
+            // Apply all HTML to Markdown patterns
+            const patterns = HTML_TO_MARKDOWN_PATTERNS;
+            for (const pattern of Object.values(patterns)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }

            // Process list items
            markdown = this.processListItems(markdown);

            // Fix common HTML entities
-            markdown = markdown
-                .replace(/&nbsp;/g, ' ')
-                .replace(/&lt;/g, '<')
-                .replace(/&gt;/g, '>')
-                .replace(/&amp;/g, '&')
-                .replace(/&quot;/g, '"')
-                .replace(/&#39;/g, "'")
-                .replace(/&ldquo;/g, '"')
-                .replace(/&rdquo;/g, '"')
-                .replace(/&lsquo;/g, "'")
-                .replace(/&rsquo;/g, "'")
-                .replace(/&mdash;/g, '—')
-                .replace(/&ndash;/g, '–')
-                .replace(/&hellip;/g, '…');
+            const entityPatterns = HTML_ENTITY_REPLACEMENTS;
+            for (const pattern of Object.values(entityPatterns)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }

            return markdown.trim();
        } catch (error) {
-            console.error("Error cleaning context content:", error);
+            console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("Base"), error);
            return content; // Return original if cleaning fails
        }
    }
@ -133,28 +113,18 @@ export abstract class BaseMessageFormatter implements MessageFormatter {

        try {
            // Fix common encoding issues
-            return content
-                // Fix broken quote characters
-                .replace(/Γ\u00c2[\u00a3\u00a5]/g, '"')
-                // Fix other common broken unicode
-                .replace(/[\u{0080}-\u{FFFF}]/gu, (match) => {
-                    // Some common replacements
-                    const replacements: Record<string, string> = {
-                        '\u00A0': ' ',  // Non-breaking space
-                        '\u2018': "'",  // Left single quote
-                        '\u2019': "'",  // Right single quote
-                        '\u201C': '"',  // Left double quote
-                        '\u201D': '"',  // Right double quote
-                        '\u2013': '-',  // En dash
-                        '\u2014': '--', // Em dash
-                        '\u2022': '*',  // Bullet
-                        '\u2026': '...' // Ellipsis
-                    };
+            let fixed = content.replace(ENCODING_FIXES.BROKEN_QUOTES.pattern, ENCODING_FIXES.BROKEN_QUOTES.replacement);

-                    return replacements[match] || match;
-                });
+            // Fix other common broken unicode
+            fixed = fixed.replace(/[\u{0080}-\u{FFFF}]/gu, (match) => {
+                // Use replacements from constants
+                const replacements = ENCODING_FIXES.UNICODE_REPLACEMENTS;
+                return replacements[match as keyof typeof replacements] || match;
+            });
+
+            return fixed;
        } catch (error) {
-            console.error('Error fixing encoding issues:', error);
+            console.error(FORMATTER_LOGS.ERROR.ENCODING, error);
            return content; // Return original if fixing fails
        }
    }
--- a/src/services/llm/formatters/ollama_formatter.ts
+++ b/src/services/llm/formatters/ollama_formatter.ts
@ -3,6 +3,12 @@ import { BaseMessageFormatter } from './base_formatter.js';
 import sanitizeHtml from 'sanitize-html';
 import { PROVIDER_PROMPTS, FORMATTING_PROMPTS } from '../constants/llm_prompt_constants.js';
 import { LLM_CONSTANTS } from '../constants/provider_constants.js';
+import {
+    HTML_ALLOWED_TAGS,
+    HTML_ALLOWED_ATTRIBUTES,
+    OLLAMA_CLEANING,
+    FORMATTER_LOGS
+} from '../constants/formatter_constants.js';

 /**
 * Ollama-specific message formatter
@ -66,7 +72,7 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
            }
        }

-        console.log(`Ollama formatter processed ${messages.length} messages into ${formattedMessages.length} messages`);
+        console.log(FORMATTER_LOGS.OLLAMA.PROCESSED(messages.length, formattedMessages.length));

        return formattedMessages;
    }
@ -85,30 +91,20 @@ export class OllamaMessageFormatter extends BaseMessageFormatter {
            // Then apply Ollama-specific aggressive cleaning
            // Remove any remaining HTML using sanitizeHtml
            let plaintext = sanitizeHtml(sanitized, {
-                allowedTags: [],
-                allowedAttributes: {},
+                allowedTags: HTML_ALLOWED_TAGS.NONE,
+                allowedAttributes: HTML_ALLOWED_ATTRIBUTES.NONE,
                textFilter: (text) => text
            });

-            // Then aggressively sanitize to plain ASCII and simple formatting
-            plaintext = plaintext
-                // Replace common problematic quotes with simple ASCII quotes
-                .replace(/[""]/g, '"')
-                .replace(/['']/g, "'")
-                // Replace other common Unicode characters
-                .replace(/[–—]/g, '-')
-                .replace(/[•]/g, '*')
-                .replace(/[…]/g, '...')
-                // Strip all non-ASCII characters
-                .replace(/[^\x00-\x7F]/g, '')
-                // Normalize whitespace
-                .replace(/\s+/g, ' ')
-                .replace(/\n\s+/g, '\n')
-                .trim();
+            // Apply all Ollama-specific cleaning patterns
+            const ollamaPatterns = OLLAMA_CLEANING;
+            for (const pattern of Object.values(ollamaPatterns)) {
+                plaintext = plaintext.replace(pattern.pattern, pattern.replacement);
+            }

-            return plaintext;
+            return plaintext.trim();
        } catch (error) {
-            console.error("Error cleaning context content for Ollama:", error);
+            console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("Ollama"), error);
            return content; // Return original if cleaning fails
        }
    }
--- a/src/services/llm/formatters/openai_formatter.ts
+++ b/src/services/llm/formatters/openai_formatter.ts
@ -3,6 +3,13 @@ import type { Message } from '../ai_interface.js';
 import { BaseMessageFormatter } from './base_formatter.js';
 import { PROVIDER_PROMPTS, FORMATTING_PROMPTS } from '../constants/llm_prompt_constants.js';
 import { LLM_CONSTANTS } from '../constants/provider_constants.js';
+import {
+    HTML_ALLOWED_TAGS,
+    HTML_ALLOWED_ATTRIBUTES,
+    HTML_TO_MARKDOWN_PATTERNS,
+    HTML_ENTITY_REPLACEMENTS,
+    FORMATTER_LOGS
+} from '../constants/formatter_constants.js';

 /**
 * OpenAI-specific message formatter
@ -72,7 +79,7 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
            });
        }

-        console.log(`OpenAI formatter: ${messages.length} messages → ${formattedMessages.length} messages`);
+        console.log(FORMATTER_LOGS.OPENAI.PROCESSED(messages.length, formattedMessages.length));
        return formattedMessages;
    }

@ -86,58 +93,24 @@ export class OpenAIMessageFormatter extends BaseMessageFormatter {
        try {
            // Convert HTML to Markdown for better readability
            const cleaned = sanitizeHtml(content, {
-                allowedTags: FORMATTING_PROMPTS.HTML_ALLOWED_TAGS,
-                allowedAttributes: {
-                    'a': ['href']
-                },
-                transformTags: {
-                    'h1': 'h2',
-                    'h2': 'h3',
-                    'div': 'p',
-                    'span': 'span'
-                }
+                allowedTags: HTML_ALLOWED_TAGS.STANDARD,
+                allowedAttributes: HTML_ALLOWED_ATTRIBUTES.STANDARD
            });

-            // Process inline elements to markdown with simpler approach
-            let markdown = cleaned
-                .replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n')
-                .replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n')
-                .replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n')
-                .replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n')
-                .replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n')
-                .replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
-                .replace(/<br[^>]*>/gi, '\n')
-                .replace(/<a[^>]*href=["'](.*?)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
-                .replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
-                .replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
-                .replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
-                .replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
-                .replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
-                .replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```')
-                // Clean up any remaining HTML tags
-                .replace(/<[^>]*>/g, '')
-                // Clean up excessive newlines
-                .replace(/\n{3,}/g, '\n\n');
+            // Apply all HTML to Markdown patterns
+            let markdown = cleaned;
+            for (const pattern of Object.values(HTML_TO_MARKDOWN_PATTERNS)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }

            // Fix common HTML entities
-            markdown = markdown
-                .replace(/&nbsp;/g, ' ')
-                .replace(/&lt;/g, '<')
-                .replace(/&gt;/g, '>')
-                .replace(/&amp;/g, '&')
-                .replace(/&quot;/g, '"')
-                .replace(/&#39;/g, "'")
-                .replace(/&ldquo;/g, '"')
-                .replace(/&rdquo;/g, '"')
-                .replace(/&lsquo;/g, "'")
-                .replace(/&rsquo;/g, "'")
-                .replace(/&mdash;/g, '—')
-                .replace(/&ndash;/g, '–')
-                .replace(/&hellip;/g, '…');
+            for (const pattern of Object.values(HTML_ENTITY_REPLACEMENTS)) {
+                markdown = markdown.replace(pattern.pattern, pattern.replacement);
+            }

            return markdown.trim();
        } catch (error) {
-            console.error("Error cleaning content for OpenAI:", error);
+            console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("OpenAI"), error);
            return content; // Return original if cleaning fails
        }
    }