diff --git a/apps/server/src/services/import/markdown.spec.ts b/apps/server/src/services/import/markdown.spec.ts index 103c3fc6f..8768d3252 100644 --- a/apps/server/src/services/import/markdown.spec.ts +++ b/apps/server/src/services/import/markdown.spec.ts @@ -194,9 +194,43 @@ second line 2
  1. Hello
  2. { + const input = `$$ +\\sqrt{x^{2}+1} \\ ++ \\frac{1}{2} +$$`; + const expected = /*html*/`\\[ +\\sqrt{x^{2}+1} \\ ++ \\frac{1}{2} +\\]`; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); + + it("ignores math formulas inside code blocks and converts inline math expressions correctly", () => { + const result = markdownService.renderToHtml(trimIndentation`\ + \`\`\`unknownlanguage + $$a+b$$ + \`\`\` + `, "title"); + expect(result).toBe(trimIndentation`\ +
    $$a+b$$
    `); + }); + + it("converts specific inline math expression into Mathtex format", () => { + const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`; + const expected = /*html*/`

    This is a formula: \\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\) inside a sentence.

    `; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); + + it("converts math expressions inside list items into Mathtex format", () => { + const input = `- First item with formula: $E = mc^2$`; + const expected = /*html*/``; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); + it("converts display math expressions into Mathtex format", () => { const input = `$$\sqrt{x^{2}+1}$$`; - const expected = /*html*/`

    \\[\sqrt{x^{2}+1}\\]

    `; + const expected = /*html*/`\\[\sqrt{x^{2}+1}\\]`; expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); }); @@ -240,7 +274,7 @@ second line 2
    1. Hello
    2. { - const input = trimIndentation`\ + const input = trimIndentation`\ - [x] Hello - [ ] World`; const expected = ``; diff --git a/apps/server/src/services/import/markdown.ts b/apps/server/src/services/import/markdown.ts index 8d1e7ed34..e9ad6c4ae 100644 --- a/apps/server/src/services/import/markdown.ts +++ b/apps/server/src/services/import/markdown.ts @@ -23,19 +23,7 @@ class CustomMarkdownRenderer extends Renderer { } paragraph(data: Tokens.Paragraph): string { - let text = super.paragraph(data).trimEnd(); - - if (text.includes("$")) { - // Display math - text = text.replaceAll(/(?\\\[$1\\\]`); - - // Inline math - text = text.replaceAll(/(?\\\($1\\\)`); - } - - return text; + return super.paragraph(data).trimEnd(); } code({ text, lang }: Tokens.Code): string { @@ -133,11 +121,17 @@ function renderToHtml(content: string, title: string) { // Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere. content = content.replaceAll("\\$", "\\\\$"); - let html = parse(content, { + // Extract formulas and replace them with placeholders to prevent interference from Markdown rendering + const { processedText, placeholderMap: formulaMap } = extractFormulas(content); + + let html = parse(processedText, { async: false, renderer: renderer }) as string; + // After rendering, replace placeholders back with the formula HTML + html = restoreFromMap(html, formulaMap); + // h1 handling needs to come before sanitization html = importUtils.handleH1(html, title); html = htmlSanitizer.sanitize(html); @@ -165,6 +159,59 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) { return MIME_TYPE_AUTO; } +function extractCodeBlocks(text: string): { processedText: string, placeholderMap: Map } { + const codeMap = new Map(); + let id = 0; + const timestamp = Date.now(); + + // Multi-line code block and Inline code + text = text.replace(/```[\s\S]*?```/g, (m) => { + const key = ``; + codeMap.set(key, m); + return key; + }).replace(/`[^`\n]+`/g, (m) => { + const key = ``; + codeMap.set(key, m); + return key; + }); + + return { processedText: text, placeholderMap: codeMap }; +} + +function extractFormulas(text: string): { processedText: string, placeholderMap: Map } { + // Protect the $ signs inside code blocks from being recognized as formulas. + const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text); + + const formulaMap = new Map(); + let id = 0; + const timestamp = Date.now(); + + // Display math and Inline math + let processedText = noCodeText.replace(/(? { + const key = ``; + const rendered = `\\[${formula}\\]`; + formulaMap.set(key, rendered); + return key; + }).replace(/(? { + const key = ``; + const rendered = `\\(${formula}\\)`; + formulaMap.set(key, rendered); + return key; + }); + + processedText = restoreFromMap(processedText, codeMap); + + return { processedText, placeholderMap: formulaMap }; +} + +function restoreFromMap(text: string, map: Map): string { + if (map.size === 0) return text; + const pattern = [...map.keys()] + .map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')) + .join('|'); + return text.replace(new RegExp(pattern, 'g'), match => map.get(match) ?? match); +} + const renderer = new CustomMarkdownRenderer({ async: false }); export default {