From a441ea2461ec307704a8e55cdb72d5dcb15708c9 Mon Sep 17 00:00:00 2001 From: Elian Doran Date: Tue, 11 Mar 2025 17:12:48 +0200 Subject: [PATCH] feat(md): rewrite image URL with spaces --- src/services/export/md.spec.ts | 6 +++++ src/services/export/md.ts | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/services/export/md.spec.ts b/src/services/export/md.spec.ts index 400ddd4cc..405cf9db7 100644 --- a/src/services/export/md.spec.ts +++ b/src/services/export/md.spec.ts @@ -98,4 +98,10 @@ describe("Markdown export", () => { ###### Heading 6`; expect(markdownExportService.toMarkdown(html)).toBe(expected); }); + + it("rewrites image URL with spaces", () => { + const html = ``; + const expected = `![](Hello%20world%20%20.png)`; + expect(markdownExportService.toMarkdown(html)).toBe(expected); + }); }); diff --git a/src/services/export/md.ts b/src/services/export/md.ts index 669871ff9..7512655b0 100644 --- a/src/services/export/md.ts +++ b/src/services/export/md.ts @@ -30,6 +30,7 @@ function toMarkdown(content: string) { }); // Filter is heavily based on: https://github.com/mixmark-io/turndown/issues/274#issuecomment-458730974 instance.addRule("fencedCodeBlock", fencedCodeBlockFilter); + instance.addRule("img", buildImageFilter()); instance.use(turndownPluginGfm.gfm); } @@ -52,6 +53,50 @@ function rewriteLanguageTag(source: string) { } } +// TODO: Remove once upstream delivers a fix for https://github.com/mixmark-io/turndown/issues/467. +function buildImageFilter() { + const ESCAPE_PATTERNS = { + before: /([\\*`[\]_]|(?:^[-+>])|(?:^~~~)|(?:^#{1-6}))/g, + after: /((?:^\d+(?=\.)))/ + } + + const escapePattern = new RegExp('(?:' + ESCAPE_PATTERNS.before.source + '|' + ESCAPE_PATTERNS.after.source + ')', 'g'); + + function escapeMarkdown (content: string) { + return content.replace(escapePattern, function (match, before, after) { + return before ? '\\' + before : after + '\\' + }) + } + + function escapeLinkDestination(destination: string) { + return destination + .replace(/([()])/g, '\\$1') + .replace(/ /g, "%20"); + } + + function escapeLinkTitle (title: string) { + return title.replace(/"/g, '\\"') + } + + function cleanAttribute (attribute: string) { + return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '' + } + + const imageFilter: TurndownService.Rule = { + filter: "img", + replacement(content, node) { + const untypedNode = (node as any); + const alt = escapeMarkdown(cleanAttribute(untypedNode.getAttribute('alt'))) + const src = escapeLinkDestination(untypedNode.getAttribute('src') || '') + const title = cleanAttribute(untypedNode.getAttribute('title')) + const titlePart = title ? ' "' + escapeLinkTitle(title) + '"' : '' + + return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : '' + } + }; + return imageFilter; +} + export default { toMarkdown };