From 04bd5415423934dbf37505f86de41389f6d83fef Mon Sep 17 00:00:00 2001
From: SiriusXT <1160925501@qq.com>
Date: Wed, 21 May 2025 17:15:54 +0800
Subject: [PATCH] fix(markdown): Make the math formula conversion ignore
formulas inside code blocks
---
.../src/services/import/markdown.spec.ts | 12 ++-
apps/server/src/services/import/markdown.ts | 75 ++++++++++++-------
2 files changed, 59 insertions(+), 28 deletions(-)
diff --git a/apps/server/src/services/import/markdown.spec.ts b/apps/server/src/services/import/markdown.spec.ts
index fc38cdca0..8768d3252 100644
--- a/apps/server/src/services/import/markdown.spec.ts
+++ b/apps/server/src/services/import/markdown.spec.ts
@@ -206,6 +206,16 @@ $$`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
+ it("ignores math formulas inside code blocks and converts inline math expressions correctly", () => {
+ const result = markdownService.renderToHtml(trimIndentation`\
+ \`\`\`unknownlanguage
+ $$a+b$$
+ \`\`\`
+ `, "title");
+ expect(result).toBe(trimIndentation`\
+
$$a+b$$
`);
+ });
+
it("converts specific inline math expression into Mathtex format", () => {
const input = `This is a formula: $\\mathcal{L}_{task} + \\mathcal{L}_{od}$ inside a sentence.`;
const expected = /*html*/`This is a formula: \\(\\mathcal{L}_{task} + \\mathcal{L}_{od}\\) inside a sentence.
`;
@@ -264,7 +274,7 @@ $$`;
});
it("imports todo lists properly", () => {
- const input = trimIndentation`\
+ const input = trimIndentation`\
- [x] Hello
- [ ] World`;
const expected = ``;
diff --git a/apps/server/src/services/import/markdown.ts b/apps/server/src/services/import/markdown.ts
index e49f5eaa4..e9ad6c4ae 100644
--- a/apps/server/src/services/import/markdown.ts
+++ b/apps/server/src/services/import/markdown.ts
@@ -120,9 +120,9 @@ class CustomMarkdownRenderer extends Renderer {
function renderToHtml(content: string, title: string) {
// Double-escape slashes in math expression because they are otherwise consumed by the parser somewhere.
content = content.replaceAll("\\$", "\\\\$");
-
+
// Extract formulas and replace them with placeholders to prevent interference from Markdown rendering
- const { processedText, formulaMap } = extractFormulas(content);
+ const { processedText, placeholderMap: formulaMap } = extractFormulas(content);
let html = parse(processedText, {
async: false,
@@ -130,7 +130,7 @@ function renderToHtml(content: string, title: string) {
}) as string;
// After rendering, replace placeholders back with the formula HTML
- html = restoreFormulas(html, formulaMap);
+ html = restoreFromMap(html, formulaMap);
// h1 handling needs to come before sanitization
html = importUtils.handleH1(html, title);
@@ -159,36 +159,57 @@ function getNormalizedMimeFromMarkdownLanguage(language: string | undefined) {
return MIME_TYPE_AUTO;
}
-function extractFormulas(text: string): { processedText: string, formulaMap: Map } {
- const formulaMap = new Map();
- let formulaId = 0;
+function extractCodeBlocks(text: string): { processedText: string, placeholderMap: Map } {
+ const codeMap = new Map();
+ let id = 0;
+ const timestamp = Date.now();
- // Display math
- text = text.replace(/(? {
- const key = ``;
- formulaMap.set(key, `$$${formula}$$`);
+ // Multi-line code block and Inline code
+ text = text.replace(/```[\s\S]*?```/g, (m) => {
+ const key = ``;
+ codeMap.set(key, m);
+ return key;
+ }).replace(/`[^`\n]+`/g, (m) => {
+ const key = ``;
+ codeMap.set(key, m);
return key;
});
- // Inline math
- text = text.replace(/(? {
- const key = ``;
- formulaMap.set(key, `$${formula}$`);
- return key;
- });
- return { processedText: text, formulaMap };
+ return { processedText: text, placeholderMap: codeMap };
}
-function restoreFormulas(html: string, formulaMap: Map): string {
- for (const [key, formula] of formulaMap.entries()) {
- const isBlock = formula.startsWith("$$");
- const inner = formula.replace(/^\${1,2}|\${1,2}$/g, "");
- const rendered = isBlock
- ? `\\[${inner}\\]`
- : `\\(${inner}\\)`;
- html = html.replaceAll(key, rendered);
- }
- return html;
+function extractFormulas(text: string): { processedText: string, placeholderMap: Map } {
+ // Protect the $ signs inside code blocks from being recognized as formulas.
+ const { processedText: noCodeText, placeholderMap: codeMap } = extractCodeBlocks(text);
+
+ const formulaMap = new Map();
+ let id = 0;
+ const timestamp = Date.now();
+
+ // Display math and Inline math
+ let processedText = noCodeText.replace(/(? {
+ const key = ``;
+ const rendered = `\\[${formula}\\]`;
+ formulaMap.set(key, rendered);
+ return key;
+ }).replace(/(? {
+ const key = ``;
+ const rendered = `\\(${formula}\\)`;
+ formulaMap.set(key, rendered);
+ return key;
+ });
+
+ processedText = restoreFromMap(processedText, codeMap);
+
+ return { processedText, placeholderMap: formulaMap };
+}
+
+function restoreFromMap(text: string, map: Map): string {
+ if (map.size === 0) return text;
+ const pattern = [...map.keys()]
+ .map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
+ .join('|');
+ return text.replace(new RegExp(pattern, 'g'), match => map.get(match) ?? match);
}
const renderer = new CustomMarkdownRenderer({ async: false });