diff --git a/src/services/html_sanitizer.ts b/src/services/html_sanitizer.ts
index ec76883b8..965c1bd33 100644
--- a/src/services/html_sanitizer.ts
+++ b/src/services/html_sanitizer.ts
@@ -43,7 +43,10 @@ function sanitize(dirtyHtml: string) {
'mumble', 'nfs', 'onenote', 'pop', 'rmi', 's3', 'sftp', 'skype', 'sms', 'spotify', 'steam', 'svn', 'udp',
'view-source', 'vnc', 'ws', 'wss', 'xmpp', 'jdbc', 'slack'
],
- transformTags,
+ nonTextTags: [
+ 'head'
+ ],
+ transformTags
});
}
diff --git a/src/services/import/single.ts b/src/services/import/single.ts
index 465d3b7a8..c20fec3b0 100644
--- a/src/services/import/single.ts
+++ b/src/services/import/single.ts
@@ -149,15 +149,20 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote)
}
function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
- const title = utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces);
let content = file.buffer.toString("utf-8");
- if (taskContext?.data?.safeImport) {
- content = htmlSanitizer.sanitize(content);
- }
+ // Try to get title from HTML first, fall back to filename
+ // We do this before sanitization since that turns all
s into
+ const htmlTitle = importUtils.extractHtmlTitle(content);
+ const title = htmlTitle || utils.getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces);
content = importUtils.handleH1(content, title);
+ if (taskContext?.data?.safeImport) {
+ content = htmlSanitizer.sanitize(content);
+ }
+
+
const {note} = noteService.createNewNote({
parentNoteId: parentNote.noteId,
title,
@@ -166,9 +171,9 @@ function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
mime: 'text/html',
isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
});
-
+
taskContext.increaseProgressCount();
-
+
return note;
}
diff --git a/src/services/import/utils.ts b/src/services/import/utils.ts
index b85700230..ec4bbf35a 100644
--- a/src/services/import/utils.ts
+++ b/src/services/import/utils.ts
@@ -1,7 +1,7 @@
"use strict";
function handleH1(content: string, title: string) {
- content = content.replace(/([^<]*)<\/h1>/gi, (match, text) => {
+ content = content.replace(/]*>([^<]*)<\/h1>/gi, (match, text) => {
if (title.trim() === text.trim()) {
return ""; // remove whole H1 tag
} else {
@@ -11,6 +11,12 @@ function handleH1(content: string, title: string) {
return content;
}
+function extractHtmlTitle(content: string): string | null {
+ const titleMatch = content.match(/]*>([^<]+)<\/title>/i);
+ return titleMatch ? titleMatch[1].trim() : null;
+}
+
export default {
- handleH1
+ handleH1,
+ extractHtmlTitle
};