From 6e8e343301fb41aefdc6d0bbad4b64f709ec4821 Mon Sep 17 00:00:00 2001 From: SiriusXT <1160925501@qq.com> Date: Tue, 20 May 2025 22:03:40 +0800 Subject: [PATCH 1/3] fix(import): Unable to handle multi line mathematical formulas when importing markdown --- .../src/services/import/markdown.spec.ts | 26 ++++++++- apps/server/src/services/import/markdown.ts | 54 ++++++++++++++----- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/apps/server/src/services/import/markdown.spec.ts b/apps/server/src/services/import/markdown.spec.ts index 103c3fc6f..8edf1c521 100644 --- a/apps/server/src/services/import/markdown.spec.ts +++ b/apps/server/src/services/import/markdown.spec.ts @@ -194,9 +194,33 @@ second line 2
  1. Hello
  2. { + const input = `$$ +\\sqrt{x^{2}+1} \\ ++ \\frac{1}{2} +$$`; + const expected = /*html*/`\\[ +\\sqrt{x^{2}+1} \\ ++ \\frac{1}{2} +\\]`; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); + + it("converts specific inline math expression into Mathtex format", () => { + const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`; + const expected = /*html*/`

    This is a formula: \\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\) inside a sentence.

    `; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); + + it("converts math expressions inside list items into Mathtex format", () => { + const input = `- First item with formula: $E = mc^2$`; + const expected = /*html*/``; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); +}); + it("converts display math expressions into Mathtex format", () => { const input = `$$\sqrt{x^{2}+1}$$`; - const expected = /*html*/`

    \\[\sqrt{x^{2}+1}\\]

    `; + const expected = /*html*/`\\[\sqrt{x^{2}+1}\\]`; expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); }); diff --git a/apps/server/src/services/import/markdown.ts b/apps/server/src/services/import/markdown.ts index 8d1e7ed34..e49f5eaa4 100644 --- a/apps/server/src/services/import/markdown.ts +++ b/apps/server/src/services/import/markdown.ts @@ -23,19 +23,7 @@ class CustomMarkdownRenderer extends Renderer { } paragraph(data: Tokens.Paragraph): string { - let text = super.paragraph(data).trimEnd(); - - if (text.includes("$")) { - // Display math - text = text.replaceAll(/(?\\\[$1\\\]`); - - // Inline math - text = text.replaceAll(/(?\\\($1\\\)`); - } - - return text; + return super.paragraph(data).trimEnd(); } code({ text, lang }: Tokens.Code): string { @@ -132,12 +120,18 @@ class CustomMarkdownRenderer extends Renderer { function renderToHtml(content: string, title: string) { // Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere. content = content.replaceAll("\\$", "\\\\$"); + + // Extract formulas and replace them with placeholders to prevent interference from Markdown rendering + const { processedText, formulaMap } = extractFormulas(content); - let html = parse(content, { + let html = parse(processedText, { async: false, renderer: renderer }) as string; + // After rendering, replace placeholders back with the formula HTML + html = restoreFormulas(html, formulaMap); + // h1 handling needs to come before sanitization html = importUtils.handleH1(html, title); html = htmlSanitizer.sanitize(html); @@ -165,6 +159,38 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) { return MIME_TYPE_AUTO; } +function extractFormulas(text: string): { processedText: string, formulaMap: Map } { + const formulaMap = new Map(); + let formulaId = 0; + + // Display math + text = text.replace(/(? { + const key = ``; + formulaMap.set(key, `$$${formula}$$`); + return key; + }); + + // Inline math + text = text.replace(/(? { + const key = ``; + formulaMap.set(key, `$${formula}$`); + return key; + }); + return { processedText: text, formulaMap }; +} + +function restoreFormulas(html: string, formulaMap: Map): string { + for (const [key, formula] of formulaMap.entries()) { + const isBlock = formula.startsWith("$$"); + const inner = formula.replace(/^\${1,2}|\${1,2}$/g, ""); + const rendered = isBlock + ? `\\[${inner}\\]` + : `\\(${inner}\\)`; + html = html.replaceAll(key, rendered); + } + return html; +} + const renderer = new CustomMarkdownRenderer({ async: false }); export default { From 6a9b44e4a159c37ad3ccd677d8e8d5fc8cd4c61d Mon Sep 17 00:00:00 2001 From: SiriusXT <1160925501@qq.com> Date: Tue, 20 May 2025 22:14:30 +0800 Subject: [PATCH 2/3] Fixed indentation --- apps/server/src/services/import/markdown.spec.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/server/src/services/import/markdown.spec.ts b/apps/server/src/services/import/markdown.spec.ts index 8edf1c521..fc38cdca0 100644 --- a/apps/server/src/services/import/markdown.spec.ts +++ b/apps/server/src/services/import/markdown.spec.ts @@ -213,10 +213,10 @@ $$`; }); it("converts math expressions inside list items into Mathtex format", () => { - const input = `- First item with formula: $E = mc^2$`; - const expected = /*html*/`
    • First item with formula: \\(E = mc^2\\)
    `; - expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); -}); + const input = `- First item with formula: $E = mc^2$`; + const expected = /*html*/`
    • First item with formula: \\(E = mc^2\\)
    `; + expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); + }); it("converts display math expressions into Mathtex format", () => { const input = `$$\sqrt{x^{2}+1}$$`; From 04bd5415423934dbf37505f86de41389f6d83fef Mon Sep 17 00:00:00 2001 From: SiriusXT <1160925501@qq.com> Date: Wed, 21 May 2025 17:15:54 +0800 Subject: [PATCH 3/3] fix(markdown): Make the math formula conversion ignore formulas inside code blocks --- .../src/services/import/markdown.spec.ts | 12 ++- apps/server/src/services/import/markdown.ts | 75 ++++++++++++------- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/apps/server/src/services/import/markdown.spec.ts b/apps/server/src/services/import/markdown.spec.ts index fc38cdca0..8768d3252 100644 --- a/apps/server/src/services/import/markdown.spec.ts +++ b/apps/server/src/services/import/markdown.spec.ts @@ -206,6 +206,16 @@ $$`; expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected); }); + it("ignores math formulas inside code blocks and converts inline math expressions correctly", () => { + const result = markdownService.renderToHtml(trimIndentation`\ + \`\`\`unknownlanguage + $$a+b$$ + \`\`\` + `, "title"); + expect(result).toBe(trimIndentation`\ +
    $$a+b$$
    `); + }); + it("converts specific inline math expression into Mathtex format", () => { const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`; const expected = /*html*/`

    This is a formula: \\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\) inside a sentence.

    `; @@ -264,7 +274,7 @@ $$`; }); it("imports todo lists properly", () => { - const input = trimIndentation`\ + const input = trimIndentation`\ - [x] Hello - [ ] World`; const expected = `
    `; diff --git a/apps/server/src/services/import/markdown.ts b/apps/server/src/services/import/markdown.ts index e49f5eaa4..e9ad6c4ae 100644 --- a/apps/server/src/services/import/markdown.ts +++ b/apps/server/src/services/import/markdown.ts @@ -120,9 +120,9 @@ class CustomMarkdownRenderer extends Renderer { function renderToHtml(content: string, title: string) { // Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere. content = content.replaceAll("\\$", "\\\\$"); - + // Extract formulas and replace them with placeholders to prevent interference from Markdown rendering - const { processedText, formulaMap } = extractFormulas(content); + const { processedText, placeholderMap: formulaMap } = extractFormulas(content); let html = parse(processedText, { async: false, @@ -130,7 +130,7 @@ function renderToHtml(content: string, title: string) { }) as string; // After rendering, replace placeholders back with the formula HTML - html = restoreFormulas(html, formulaMap); + html = restoreFromMap(html, formulaMap); // h1 handling needs to come before sanitization html = importUtils.handleH1(html, title); @@ -159,36 +159,57 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) { return MIME_TYPE_AUTO; } -function extractFormulas(text: string): { processedText: string, formulaMap: Map } { - const formulaMap = new Map(); - let formulaId = 0; +function extractCodeBlocks(text: string): { processedText: string, placeholderMap: Map } { + const codeMap = new Map(); + let id = 0; + const timestamp = Date.now(); - // Display math - text = text.replace(/(? { - const key = ``; - formulaMap.set(key, `$$${formula}$$`); + // Multi-line code block and Inline code + text = text.replace(/```[\s\S]*?```/g, (m) => { + const key = ``; + codeMap.set(key, m); + return key; + }).replace(/`[^`\n]+`/g, (m) => { + const key = ``; + codeMap.set(key, m); return key; }); - // Inline math - text = text.replace(/(? { - const key = ``; - formulaMap.set(key, `$${formula}$`); - return key; - }); - return { processedText: text, formulaMap }; + return { processedText: text, placeholderMap: codeMap }; } -function restoreFormulas(html: string, formulaMap: Map): string { - for (const [key, formula] of formulaMap.entries()) { - const isBlock = formula.startsWith("$$"); - const inner = formula.replace(/^\${1,2}|\${1,2}$/g, ""); - const rendered = isBlock - ? `\\[${inner}\\]` - : `\\(${inner}\\)`; - html = html.replaceAll(key, rendered); - } - return html; +function extractFormulas(text: string): { processedText: string, placeholderMap: Map } { + // Protect the $ signs inside code blocks from being recognized as formulas. + const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text); + + const formulaMap = new Map(); + let id = 0; + const timestamp = Date.now(); + + // Display math and Inline math + let processedText = noCodeText.replace(/(? { + const key = ``; + const rendered = `\\[${formula}\\]`; + formulaMap.set(key, rendered); + return key; + }).replace(/(? { + const key = ``; + const rendered = `\\(${formula}\\)`; + formulaMap.set(key, rendered); + return key; + }); + + processedText = restoreFromMap(processedText, codeMap); + + return { processedText, placeholderMap: formulaMap }; +} + +function restoreFromMap(text: string, map: Map): string { + if (map.size === 0) return text; + const pattern = [...map.keys()] + .map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')) + .join('|'); + return text.replace(new RegExp(pattern, 'g'), match => map.get(match) ?? match); } const renderer = new CustomMarkdownRenderer({ async: false });