diff --git a/src/services/html_sanitizer.ts b/src/services/html_sanitizer.ts index ec76883b8..965c1bd33 100644 --- a/src/services/html_sanitizer.ts +++ b/src/services/html_sanitizer.ts @@ -43,7 +43,10 @@ function sanitize(dirtyHtml: string) { 'mumble', 'nfs', 'onenote', 'pop', 'rmi', 's3', 'sftp', 'skype', 'sms', 'spotify', 'steam', 'svn', 'udp', 'view-source', 'vnc', 'ws', 'wss', 'xmpp', 'jdbc', 'slack' ], - transformTags, + nonTextTags: [ + 'head' + ], + transformTags }); } diff --git a/src/services/import/single.ts b/src/services/import/single.ts index 465d3b7a8..c20fec3b0 100644 --- a/src/services/import/single.ts +++ b/src/services/import/single.ts @@ -149,15 +149,20 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote) } function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) { - const title = utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces); let content = file.buffer.toString("utf-8"); - if (taskContext?.data?.safeImport) { - content = htmlSanitizer.sanitize(content); - } + // Try to get title from HTML first, fall back to filename + // We do this before sanitization since that turns all

s into

+ const htmlTitle = importUtils.extractHtmlTitle(content); + const title = htmlTitle || utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces); content = importUtils.handleH1(content, title); + if (taskContext?.data?.safeImport) { + content = htmlSanitizer.sanitize(content); + } + + const {note} = noteService.createNewNote({ parentNoteId: parentNote.noteId, title, @@ -166,9 +171,9 @@ function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) { mime: 'text/html', isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(), }); - + taskContext.increaseProgressCount(); - + return note; } diff --git a/src/services/import/utils.ts b/src/services/import/utils.ts index b85700230..ec4bbf35a 100644 --- a/src/services/import/utils.ts +++ b/src/services/import/utils.ts @@ -1,7 +1,7 @@ "use strict"; function handleH1(content: string, title: string) { - content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { + content = content.replace(/]*>([^<]*)<\/h1>/gi, (match, text) => { if (title.trim() === text.trim()) { return ""; // remove whole H1 tag } else { @@ -11,6 +11,12 @@ function handleH1(content: string, title: string) { return content; } +function extractHtmlTitle(content: string): string | null { + const titleMatch = content.match(/]*>([^<]+)<\/title>/i); + return titleMatch ? titleMatch[1].trim() : null; +} + export default { - handleH1 + handleH1, + extractHtmlTitle };