fix(markdown): Make the math formula conversion ignore formulas inside code blocks

This commit is contained in:
SiriusXT 2025-05-21 17:15:54 +08:00
parent 6a9b44e4a1
commit 04bd541542
2 changed files with 59 additions and 28 deletions

View File

@ -206,6 +206,16 @@ $$`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
}); });
it("ignores math formulas inside code blocks and converts inline math expressions correctly", () => {
const result = markdownService.renderToHtml(trimIndentation`\
\`\`\`unknownlanguage
$$a+b$$
\`\`\`
`, "title");
expect(result).toBe(trimIndentation`\
<pre><code class="language-text-x-trilium-auto">$$a+b$$</code></pre>`);
});
it("converts specific inline math expression into Mathtex format", () => { it("converts specific inline math expression into Mathtex format", () => {
const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`; const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`;
const expected = /*html*/`<p>This is a formula: <span class="math-tex">\\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\)</span> inside a sentence.</p>`; const expected = /*html*/`<p>This is a formula: <span class="math-tex">\\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\)</span> inside a sentence.</p>`;

View File

@ -122,7 +122,7 @@ function renderToHtml(content: string, title: string) {
content = content.replaceAll("\\$", "\\\\$"); content = content.replaceAll("\\$", "\\\\$");
// Extract formulas and replace them with placeholders to prevent interference from Markdown rendering // Extract formulas and replace them with placeholders to prevent interference from Markdown rendering
const { processedText, formulaMap } = extractFormulas(content); const { processedText, placeholderMap: formulaMap } = extractFormulas(content);
let html = parse(processedText, { let html = parse(processedText, {
async: false, async: false,
@ -130,7 +130,7 @@ function renderToHtml(content: string, title: string) {
}) as string; }) as string;
// After rendering, replace placeholders back with the formula HTML // After rendering, replace placeholders back with the formula HTML
html = restoreFormulas(html, formulaMap); html = restoreFromMap(html, formulaMap);
// h1 handling needs to come before sanitization // h1 handling needs to come before sanitization
html = importUtils.handleH1(html, title); html = importUtils.handleH1(html, title);
@ -159,36 +159,57 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) {
return MIME_TYPE_AUTO; return MIME_TYPE_AUTO;
} }
function extractFormulas(text: string): { processedText: string, formulaMap: Map<string, string> } { function extractCodeBlocks(text: string): { processedText: string, placeholderMap: Map<string, string> } {
const formulaMap = new Map<string, string>(); const codeMap = new Map<string, string>();
let formulaId = 0; let id = 0;
const timestamp = Date.now();
// Display math // Multi-line code block and Inline code
text = text.replace(/(?<!\\)\$\$(.+?)\$\$/gs, (_, formula) => { text = text.replace(/```[\s\S]*?```/g, (m) => {
const key = `<!--FORMULA_BLOCK_${formulaId++}-->`; const key = `<!--CODE_BLOCK_${timestamp}_${id++}-->`;
formulaMap.set(key, `$$${formula}$$`); codeMap.set(key, m);
return key;
}).replace(/`[^`\n]+`/g, (m) => {
const key = `<!--INLINE_CODE_${timestamp}_${id++}-->`;
codeMap.set(key, m);
return key; return key;
}); });
// Inline math return { processedText: text, placeholderMap: codeMap };
text = text.replace(/(?<!\\)\$(.+?)\$/g, (_, formula) => {
const key = `<!--FORMULA_INLINE_${formulaId++}-->`;
formulaMap.set(key, `$${formula}$`);
return key;
});
return { processedText: text, formulaMap };
} }
function restoreFormulas(html: string, formulaMap: Map<string, string>): string { function extractFormulas(text: string): { processedText: string, placeholderMap: Map<string, string> } {
for (const [key, formula] of formulaMap.entries()) { // Protect the $ signs inside code blocks from being recognized as formulas.
const isBlock = formula.startsWith("$$"); const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text);
const inner = formula.replace(/^\${1,2}|\${1,2}$/g, "");
const rendered = isBlock const formulaMap = new Map<string, string>();
? `<span class="math-tex">\\[${inner}\\]</span>` let id = 0;
: `<span class="math-tex">\\(${inner}\\)</span>`; const timestamp = Date.now();
html = html.replaceAll(key, rendered);
} // Display math and Inline math
return html; let processedText = noCodeText.replace(/(?<!\\)\$\$((?:(?!\n{2,})[\s\S])+?)\$\$/g, (_, formula) => {
const key = `<!--FORMULA_BLOCK_${timestamp}_${id++}-->`;
const rendered = `<span class="math-tex">\\[${formula}\\]</span>`;
formulaMap.set(key, rendered);
return key;
}).replace(/(?<!\\)\$(.+?)\$/g, (_, formula) => {
const key = `<!--FORMULA_INLINE_${timestamp}_${id++}-->`;
const rendered = `<span class="math-tex">\\(${formula}\\)</span>`;
formulaMap.set(key, rendered);
return key;
});
processedText = restoreFromMap(processedText, codeMap);
return { processedText, placeholderMap: formulaMap };
}
function restoreFromMap(text: string, map: Map<string, string>): string {
if (map.size === 0) return text;
const pattern = [...map.keys()]
.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
.join('|');
return text.replace(new RegExp(pattern, 'g'), match => map.get(match) ?? match);
} }
const renderer = new CustomMarkdownRenderer({ async: false }); const renderer = new CustomMarkdownRenderer({ async: false });