fix(import): Unable to handle multi line mathematical formulas when importing markdown

This commit is contained in:
SiriusXT 2025-05-20 22:03:40 +08:00
parent 8f3d98b14b
commit 6e8e343301
2 changed files with 65 additions and 15 deletions

View File

@ -194,9 +194,33 @@ second line 2</code></pre><ul><li>Hello</li><li>world</li></ul><ol><li>Hello</li
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
it("converts multi-line display math expressions into Mathtex format", () => {
const input = `$$
\\sqrt{x^{2}+1} \\
+ \\frac{1}{2}
$$`;
const expected = /*html*/`<span class="math-tex">\\[
\\sqrt{x^{2}+1} \\
+ \\frac{1}{2}
\\]</span>`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
it("converts specific inline math expression into Mathtex format", () => {
const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`;
const expected = /*html*/`<p>This is a formula: <span class="math-tex">\\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\)</span> inside a sentence.</p>`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
it("converts math expressions inside list items into Mathtex format", () => {
const input = `- First item with formula: $E = mc^2$`;
const expected = /*html*/`<ul><li>First item with formula: <span class="math-tex">\\(E = mc^2\\)</span></li></ul>`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
it("converts display math expressions into Mathtex format", () => {
const input = `$$\sqrt{x^{2}+1}$$`;
const expected = /*html*/`<p><span class="math-tex">\\[\sqrt{x^{2}+1}\\]</span></p>`;
const expected = /*html*/`<span class="math-tex">\\[\sqrt{x^{2}+1}\\]</span>`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});

View File

@ -23,19 +23,7 @@ class CustomMarkdownRenderer extends Renderer {
}
paragraph(data: Tokens.Paragraph): string {
let text = super.paragraph(data).trimEnd();
if (text.includes("$")) {
// Display math
text = text.replaceAll(/(?<!\\)\$\$(.+)\$\$/g,
`<span class="math-tex">\\\[$1\\\]</span>`);
// Inline math
text = text.replaceAll(/(?<!\\)\$(.+?)\$/g,
`<span class="math-tex">\\\($1\\\)</span>`);
}
return text;
return super.paragraph(data).trimEnd();
}
code({ text, lang }: Tokens.Code): string {
@ -133,11 +121,17 @@ function renderToHtml(content: string, title: string) {
// Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere.
content = content.replaceAll("\\$", "\\\\$");
let html = parse(content, {
// Extract formulas and replace them with placeholders to prevent interference from Markdown rendering
const { processedText, formulaMap } = extractFormulas(content);
let html = parse(processedText, {
async: false,
renderer: renderer
}) as string;
// After rendering, replace placeholders back with the formula HTML
html = restoreFormulas(html, formulaMap);
// h1 handling needs to come before sanitization
html = importUtils.handleH1(html, title);
html = htmlSanitizer.sanitize(html);
@ -165,6 +159,38 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) {
return MIME_TYPE_AUTO;
}
function extractFormulas(text: string): { processedText: string, formulaMap: Map<string, string> } {
const formulaMap = new Map<string, string>();
let formulaId = 0;
// Display math
text = text.replace(/(?<!\\)\$\$(.+?)\$\$/gs, (_, formula) => {
const key = `<!--FORMULA_BLOCK_${formulaId++}-->`;
formulaMap.set(key, `$$${formula}$$`);
return key;
});
// Inline math
text = text.replace(/(?<!\\)\$(.+?)\$/g, (_, formula) => {
const key = `<!--FORMULA_INLINE_${formulaId++}-->`;
formulaMap.set(key, `$${formula}$`);
return key;
});
return { processedText: text, formulaMap };
}
function restoreFormulas(html: string, formulaMap: Map<string, string>): string {
for (const [key, formula] of formulaMap.entries()) {
const isBlock = formula.startsWith("$$");
const inner = formula.replace(/^\${1,2}|\${1,2}$/g, "");
const rendered = isBlock
? `<span class="math-tex">\\[${inner}\\]</span>`
: `<span class="math-tex">\\(${inner}\\)</span>`;
html = html.replaceAll(key, rendered);
}
return html;
}
const renderer = new CustomMarkdownRenderer({ async: false });
export default {