feat(md): rewrite image URL with spaces

This commit is contained in:
Elian Doran 2025-03-11 17:12:48 +02:00
parent f67e2ebcd5
commit a441ea2461
No known key found for this signature in database
2 changed files with 51 additions and 0 deletions

View File

@ -98,4 +98,10 @@ describe("Markdown export", () => {
###### Heading 6`;
expect(markdownExportService.toMarkdown(html)).toBe(expected);
});
it("rewrites image URL with spaces", () => {
const html = `<img src="Hello world .png"/>`;
const expected = `![](Hello%20world%20%20.png)`;
expect(markdownExportService.toMarkdown(html)).toBe(expected);
});
});

View File

@ -30,6 +30,7 @@ function toMarkdown(content: string) {
});
// Filter is heavily based on: https://github.com/mixmark-io/turndown/issues/274#issuecomment-458730974
instance.addRule("fencedCodeBlock", fencedCodeBlockFilter);
instance.addRule("img", buildImageFilter());
instance.use(turndownPluginGfm.gfm);
}
@ -52,6 +53,50 @@ function rewriteLanguageTag(source: string) {
}
}
// TODO: Remove once upstream delivers a fix for https://github.com/mixmark-io/turndown/issues/467.
function buildImageFilter() {
const ESCAPE_PATTERNS = {
before: /([\\*`[\]_]|(?:^[-+>])|(?:^~~~)|(?:^#{1-6}))/g,
after: /((?:^\d+(?=\.)))/
}
const escapePattern = new RegExp('(?:' + ESCAPE_PATTERNS.before.source + '|' + ESCAPE_PATTERNS.after.source + ')', 'g');
function escapeMarkdown (content: string) {
return content.replace(escapePattern, function (match, before, after) {
return before ? '\\' + before : after + '\\'
})
}
function escapeLinkDestination(destination: string) {
return destination
.replace(/([()])/g, '\\$1')
.replace(/ /g, "%20");
}
function escapeLinkTitle (title: string) {
return title.replace(/"/g, '\\"')
}
function cleanAttribute (attribute: string) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
}
const imageFilter: TurndownService.Rule = {
filter: "img",
replacement(content, node) {
const untypedNode = (node as any);
const alt = escapeMarkdown(cleanAttribute(untypedNode.getAttribute('alt')))
const src = escapeLinkDestination(untypedNode.getAttribute('src') || '')
const title = cleanAttribute(untypedNode.getAttribute('title'))
const titlePart = title ? ' "' + escapeLinkTitle(title) + '"' : ''
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
};
return imageFilter;
}
export default {
toMarkdown
};