677 lines
22 KiB
JavaScript
Raw Normal View History

2020-03-20 21:57:16 +01:00
"use strict";
const BAttribute = require('../../becca/entities/battribute');
2020-03-20 21:57:16 +01:00
const utils = require('../../services/utils');
const log = require('../../services/log');
const noteService = require('../../services/notes');
const attributeService = require('../../services/attributes');
const BBranch = require('../../becca/entities/bbranch');
2020-03-20 21:57:16 +01:00
const path = require('path');
const commonmark = require('commonmark');
const protectedSessionService = require('../protected_session');
const mimeService = require("./mime");
const treeService = require("../tree");
const yauzl = require("yauzl");
2020-06-30 23:37:06 +02:00
const htmlSanitizer = require('../html_sanitizer');
2021-06-29 22:15:57 +02:00
const becca = require("../../becca/becca");
2023-03-16 12:17:55 +01:00
const BAttachment = require("../../becca/entities/battachment");
2020-03-20 21:57:16 +01:00
/**
* @param {TaskContext} taskContext
* @param {Buffer} fileBuffer
* @param {BNote} importRootNote
2023-01-05 23:38:41 +01:00
* @returns {Promise<*>}
2020-03-20 21:57:16 +01:00
*/
2020-06-27 00:40:35 +02:00
async function importZip(taskContext, fileBuffer, importRootNote) {
2023-05-06 15:07:38 +02:00
/** @type {Object.<string, string>} maps from original noteId (in ZIP file) to newly generated noteId */
2020-03-20 21:57:16 +01:00
const noteIdMap = {};
/** @type {Object.<string, string>} maps from original attachmentId (in ZIP file) to newly generated attachmentId */
const attachmentIdMap = {};
2020-03-20 21:57:16 +01:00
const attributes = [];
// path => noteId, used only when meta file is not available
/** @type {Object.<string, string>} path => noteId | attachmentId */
2020-03-20 21:57:16 +01:00
const createdPaths = { '/': importRootNote.noteId, '\\': importRootNote.noteId };
const mdReader = new commonmark.Parser();
const mdWriter = new commonmark.HtmlRenderer();
let metaFile = null;
2023-05-06 15:07:38 +02:00
/** @type {BNote} */
2020-03-20 21:57:16 +01:00
let firstNote = null;
2023-05-06 15:07:38 +02:00
/** @type {Set.<string>} */
const createdNoteIds = new Set();
2020-03-20 21:57:16 +01:00
function getNewNoteId(origNoteId) {
if (!origNoteId.trim()) {
// this probably shouldn't happen, but still good to have this precaution
return "empty_note_id";
2020-03-20 21:57:16 +01:00
}
if (origNoteId === 'root' || origNoteId.startsWith("_")) {
// these "named" noteIds don't differ between Trilium instances
return origNoteId;
}
2020-03-20 21:57:16 +01:00
if (!noteIdMap[origNoteId]) {
noteIdMap[origNoteId] = utils.newEntityId();
}
return noteIdMap[origNoteId];
}
function getNewAttachmentId(origAttachmentId) {
if (!origAttachmentId.trim()) {
// this probably shouldn't happen, but still good to have this precaution
return "empty_attachment_id";
}
if (!attachmentIdMap[origAttachmentId]) {
attachmentIdMap[origAttachmentId] = utils.newEntityId();
}
return attachmentIdMap[origAttachmentId];
}
/**
* @param {NoteMeta} parentNoteMeta
* @param {string} dataFileName
*/
function getAttachmentMeta(parentNoteMeta, dataFileName) {
for (const noteMeta of parentNoteMeta.children) {
for (const attachmentMeta of noteMeta.attachments || []) {
if (attachmentMeta.dataFileName === dataFileName) {
return {
parentNoteMeta,
noteMeta,
attachmentMeta
};
}
}
}
return {};
}
/** @returns {{noteMeta: NoteMeta|undefined, parentNoteMeta: NoteMeta|undefined, attachmentMeta: AttachmentMeta|undefined}} */
2020-03-20 21:57:16 +01:00
function getMeta(filePath) {
if (!metaFile) {
return {};
}
const pathSegments = filePath.split(/[\/\\]/g);
/** @type {NoteMeta} */
2020-03-20 21:57:16 +01:00
let cursor = {
isImportRoot: true,
children: metaFile.files
};
/** @type {NoteMeta} */
2020-03-20 21:57:16 +01:00
let parent;
for (const segment of pathSegments) {
if (!cursor?.children?.length) {
2020-03-20 21:57:16 +01:00
return {};
}
parent = cursor;
2023-01-25 09:55:29 +01:00
cursor = parent.children.find(file => file.dataFileName === segment || file.dirFileName === segment);
if (!cursor) {
return getAttachmentMeta(parent, segment);
}
2020-03-20 21:57:16 +01:00
}
return {
parentNoteMeta: parent,
noteMeta: cursor
2020-03-20 21:57:16 +01:00
};
}
2023-05-06 15:07:38 +02:00
/**
* @param {string} filePath
* @param {NoteMeta} parentNoteMeta
* @return {string}
*/
2020-06-20 12:31:38 +02:00
function getParentNoteId(filePath, parentNoteMeta) {
2020-03-20 21:57:16 +01:00
let parentNoteId;
if (parentNoteMeta) {
parentNoteId = parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId);
}
else {
const parentPath = path.dirname(filePath);
if (parentPath === '.') {
parentNoteId = importRootNote.noteId;
} else if (parentPath in createdPaths) {
2020-03-20 21:57:16 +01:00
parentNoteId = createdPaths[parentPath];
} else {
2023-06-30 11:18:34 +02:00
// ZIP allows creating out of order records - i.e., file in a directory can appear in the ZIP stream before the actual directory
2020-06-20 12:31:38 +02:00
parentNoteId = saveDirectory(parentPath);
2020-03-20 21:57:16 +01:00
}
}
return parentNoteId;
}
2023-05-06 15:07:38 +02:00
/**
* @param {NoteMeta} noteMeta
* @param {string} filePath
* @return {string}
*/
2020-03-20 21:57:16 +01:00
function getNoteId(noteMeta, filePath) {
if (noteMeta) {
return getNewNoteId(noteMeta.noteId);
}
// in case we lack metadata, we treat e.g. "Programming.html" and "Programming" as the same note
// (one data file, the other directory for children)
const filePathNoExt = utils.removeTextFileExtension(filePath);
2020-03-20 21:57:16 +01:00
if (filePathNoExt in createdPaths) {
return createdPaths[filePathNoExt];
}
const noteId = utils.newEntityId();
2020-03-20 21:57:16 +01:00
createdPaths[filePathNoExt] = noteId;
return noteId;
}
function detectFileTypeAndMime(taskContext, filePath) {
const mime = mimeService.getMime(filePath) || "application/octet-stream";
const type = mimeService.getType(taskContext.data, mime);
return { mime, type };
}
2023-05-06 15:07:38 +02:00
/**
* @param {BNote} note
* @param {NoteMeta} noteMeta
*/
2020-06-20 12:31:38 +02:00
function saveAttributes(note, noteMeta) {
2020-03-20 21:57:16 +01:00
if (!noteMeta) {
return;
}
for (const attr of noteMeta.attributes) {
attr.noteId = note.noteId;
2020-06-27 00:40:35 +02:00
if (attr.type === 'label-definition') {
attr.type = 'label';
attr.name = `label:${attr.name}`;
2020-06-27 00:40:35 +02:00
}
else if (attr.type === 'relation-definition') {
attr.type = 'label';
attr.name = `relation:${attr.name}`;
2020-06-27 00:40:35 +02:00
}
2020-03-20 21:57:16 +01:00
if (!attributeService.isAttributeType(attr.type)) {
log.error(`Unrecognized attribute type ${attr.type}`);
2020-03-20 21:57:16 +01:00
continue;
}
if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) {
// these relations are created automatically and as such don't need to be duplicated in the import
continue;
}
if (attr.type === 'relation') {
attr.value = getNewNoteId(attr.value);
}
if (taskContext.data.safeImport && attributeService.isAttributeDangerous(attr.type, attr.name)) {
attr.name = `disabled:${attr.name}`;
2020-03-20 21:57:16 +01:00
}
if (taskContext.data.safeImport) {
attr.name = htmlSanitizer.sanitize(attr.name);
attr.value = htmlSanitizer.sanitize(attr.value);
}
2020-03-20 21:57:16 +01:00
attributes.push(attr);
}
}
2020-06-20 12:31:38 +02:00
function saveDirectory(filePath) {
2020-03-20 21:57:16 +01:00
const { parentNoteMeta, noteMeta } = getMeta(filePath);
const noteId = getNoteId(noteMeta, filePath);
if (becca.getNote(noteId)) {
2020-03-20 21:57:16 +01:00
return;
}
const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
const {note} = noteService.createNewNote({
2020-03-20 21:57:16 +01:00
parentNoteId: parentNoteId,
title: noteTitle,
content: '',
noteId: noteId,
2022-12-25 14:10:12 +01:00
type: resolveNoteType(noteMeta?.type),
2020-03-20 21:57:16 +01:00
mime: noteMeta ? noteMeta.mime : 'text/html',
prefix: noteMeta ? noteMeta.prefix : '',
isExpanded: noteMeta ? noteMeta.isExpanded : false,
notePosition: (noteMeta && firstNote) ? noteMeta.notePosition : undefined,
2020-03-20 21:57:16 +01:00
isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
});
2020-03-20 21:57:16 +01:00
2023-05-06 15:07:38 +02:00
createdNoteIds.add(note.noteId);
2020-06-20 12:31:38 +02:00
saveAttributes(note, noteMeta);
2020-03-20 21:57:16 +01:00
firstNote = firstNote || note;
2020-03-20 21:57:16 +01:00
return noteId;
}
/**
* @returns {{attachmentId: string}|{noteId: string}}
*/
function getEntityIdFromRelativeUrl(url, filePath) {
2020-03-20 21:57:16 +01:00
while (url.startsWith("./")) {
url = url.substr(2);
}
let absUrl = path.dirname(filePath);
while (url.startsWith("../")) {
absUrl = path.dirname(absUrl);
url = url.substr(3);
}
if (absUrl === '.') {
absUrl = '';
}
absUrl += `${absUrl.length > 0 ? '/' : ''}${url}`;
2020-03-20 21:57:16 +01:00
const { noteMeta, attachmentMeta } = getMeta(absUrl);
if (attachmentMeta) {
return {
attachmentId: getNewAttachmentId(attachmentMeta.attachmentId)
};
} else { // don't check for noteMeta since it's not mandatory for notes
return {
noteId: getNoteId(noteMeta, absUrl)
};
}
2020-03-20 21:57:16 +01:00
}
2023-05-06 15:07:38 +02:00
/**
* @param {string} content
* @param {string} noteTitle
* @param {string} filePath
* @param {NoteMeta} noteMeta
* @return {string}
*/
2022-12-26 22:51:16 +01:00
function processTextNoteContent(content, noteTitle, filePath, noteMeta) {
2022-12-26 10:38:31 +01:00
function isUrlAbsolute(url) {
return /^(?:[a-z]+:)?\/\//i.test(url);
}
content = removeTrilumTags(content);
2022-12-26 10:38:31 +01:00
content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
if (noteTitle.trim() === text.trim()) {
return ""; // remove whole H1 tag
} else {
return `<h2>${text}</h2>`;
}
});
content = htmlSanitizer.sanitize(content);
content = content.replace(/<html.*<body[^>]*>/gis, "");
content = content.replace(/<\/body>.*<\/html>/gis, "");
content = content.replace(/src="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url).trim();
2022-12-26 10:38:31 +01:00
} catch (e) {
log.error(`Cannot parse image URL '${url}', keeping original. Error: ${e.message}.`);
2022-12-26 10:38:31 +01:00
return `src="${url}"`;
}
if (isUrlAbsolute(url) || url.startsWith("/")) {
return match;
}
const target = getEntityIdFromRelativeUrl(url, filePath);
2022-12-26 10:38:31 +01:00
if (target.noteId) {
return `src="api/images/${target.noteId}/${path.basename(url)}"`;
} else if (target.attachmentId) {
return `src="api/attachments/${target.attachmentId}/image/${path.basename(url)}"`;
} else {
return match;
}
2022-12-26 10:38:31 +01:00
});
content = content.replace(/href="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url).trim();
2022-12-26 10:38:31 +01:00
} catch (e) {
log.error(`Cannot parse link URL '${url}', keeping original. Error: ${e.message}.`);
2022-12-26 10:38:31 +01:00
return `href="${url}"`;
}
2022-12-26 22:51:16 +01:00
if (url.startsWith('#') // already a note path (probably)
|| isUrlAbsolute(url)) {
2022-12-26 10:38:31 +01:00
return match;
}
const target = getEntityIdFromRelativeUrl(url, filePath);
2022-12-26 10:38:31 +01:00
if (!target.noteId) {
return match;
}
2020-03-20 21:57:16 +01:00
// FIXME for linking attachments
return `href="#root/${target.noteId}"`;
2022-12-26 10:38:31 +01:00
});
if (noteMeta) {
const includeNoteLinks = (noteMeta.attributes || [])
.filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
for (const link of includeNoteLinks) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
}
}
2023-02-19 21:34:37 +01:00
content = content.trim();
2022-12-26 10:38:31 +01:00
return content;
}
function removeTrilumTags(content) {
const tagsToRemove = [
'<h1 data-trilium-h1>([^<]*)<\/h1>',
'<title data-trilium-title>([^<]*)<\/title>'
]
for (const tag of tagsToRemove) {
let re = new RegExp(tag, "gi");
content = content.replace(re, '');
}
return content;
}
2023-05-06 15:07:38 +02:00
/**
* @param {NoteMeta} noteMeta
* @param {string} type
* @param {string} mime
* @param {string|Buffer} content
* @param {string} noteTitle
* @param {string} filePath
* @return {string}
*/
2022-12-26 22:51:16 +01:00
function processNoteContent(noteMeta, type, mime, content, noteTitle, filePath) {
if (noteMeta?.format === 'markdown'
|| (!noteMeta && taskContext.data.textImportedAsText && ['text/markdown', 'text/x-markdown'].includes(mime))) {
const parsed = mdReader.parse(content);
content = mdWriter.render(parsed);
}
if (type === 'text') {
content = processTextNoteContent(content, noteTitle, filePath, noteMeta);
}
if (type === 'relationMap' && noteMeta) {
const relationMapLinks = (noteMeta.attributes || [])
.filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink');
// this will replace relation map links
for (const link of relationMapLinks) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
}
}
return content;
}
/**
* @param {string} filePath
* @param {Buffer} content
*/
2020-06-20 12:31:38 +02:00
function saveNote(filePath, content) {
const { parentNoteMeta, noteMeta, attachmentMeta } = getMeta(filePath);
2020-03-20 21:57:16 +01:00
2022-12-26 22:51:16 +01:00
if (noteMeta?.noImport) {
2020-03-20 21:57:16 +01:00
return;
}
const noteId = getNoteId(noteMeta, filePath);
2023-01-25 09:55:29 +01:00
2023-03-16 11:03:28 +01:00
if (attachmentMeta) {
2023-03-16 12:17:55 +01:00
const attachment = new BAttachment({
attachmentId: getNewAttachmentId(attachmentMeta.attachmentId),
2023-03-16 12:11:00 +01:00
parentId: noteId,
title: attachmentMeta.title,
role: attachmentMeta.role,
mime: attachmentMeta.mime,
position: attachmentMeta.position
});
2023-03-16 12:17:55 +01:00
attachment.setContent(content);
return;
}
2020-06-20 12:31:38 +02:00
const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
2020-03-20 21:57:16 +01:00
if (!parentNoteId) {
2023-05-06 15:07:38 +02:00
throw new Error(`Cannot find parentNoteId for '${filePath}'`);
}
2022-12-26 22:51:16 +01:00
if (noteMeta?.isClone) {
2021-12-27 13:37:51 +01:00
if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) {
new BBranch({
2021-12-27 13:37:51 +01:00
noteId,
parentNoteId,
isExpanded: noteMeta.isExpanded,
prefix: noteMeta.prefix,
notePosition: noteMeta.notePosition
}).save();
}
2020-03-20 21:57:16 +01:00
return;
}
let { type, mime } = noteMeta ? noteMeta : detectFileTypeAndMime(taskContext, filePath);
2022-12-26 22:51:16 +01:00
type = resolveNoteType(type);
2020-03-20 21:57:16 +01:00
if (type !== 'file' && type !== 'image') {
content = content.toString("utf-8");
2020-03-20 21:57:16 +01:00
}
const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
2020-03-20 21:57:16 +01:00
2022-12-26 22:51:16 +01:00
content = processNoteContent(noteMeta, type, mime, content, noteTitle, filePath);
2020-03-20 21:57:16 +01:00
2021-05-02 11:23:58 +02:00
let note = becca.getNote(noteId);
2020-03-20 21:57:16 +01:00
const isProtected = importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable();
2020-03-20 21:57:16 +01:00
if (note) {
// only skeleton was created because of altered order of cloned notes in ZIP, we need to update
// https://github.com/zadam/trilium/issues/2440
if (note.type === undefined) {
note.type = type;
note.mime = mime;
note.title = noteTitle;
note.isProtected = isProtected;
note.save();
}
2020-06-20 12:31:38 +02:00
note.setContent(content);
2021-12-27 13:37:51 +01:00
if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) {
new BBranch({
2021-12-27 13:37:51 +01:00
noteId,
parentNoteId,
isExpanded: noteMeta.isExpanded,
prefix: noteMeta.prefix,
notePosition: noteMeta.notePosition
}).save();
}
2020-03-20 21:57:16 +01:00
}
else {
2020-06-20 12:31:38 +02:00
({note} = noteService.createNewNote({
2020-03-20 21:57:16 +01:00
parentNoteId: parentNoteId,
title: noteTitle,
content: content,
noteId,
type,
mime,
prefix: noteMeta ? noteMeta.prefix : '',
isExpanded: noteMeta ? noteMeta.isExpanded : false,
// root notePosition should be ignored since it relates to the original document
// now import root should be placed after existing notes into new parent
notePosition: (noteMeta && firstNote) ? noteMeta.notePosition : undefined,
isProtected: isProtected,
2020-03-20 21:57:16 +01:00
}));
2023-05-06 15:07:38 +02:00
createdNoteIds.add(note.noteId);
2020-06-20 12:31:38 +02:00
saveAttributes(note, noteMeta);
2020-03-20 21:57:16 +01:00
firstNote = firstNote || note;
2020-03-20 21:57:16 +01:00
}
if (!noteMeta && (type === 'file' || type === 'image')) {
attributes.push({
noteId,
type: 'label',
name: 'originalFileName',
value: path.basename(filePath)
});
}
}
// we're running two passes to make sure that the meta file is loaded before the rest of the files is processed.
2020-06-27 00:40:35 +02:00
await readZipFile(fileBuffer, async (zipfile, entry) => {
const filePath = normalizeFilePath(entry.fileName);
if (filePath === '!!!meta.json') {
2020-06-27 00:40:35 +02:00
const content = await readContent(zipfile, entry);
metaFile = JSON.parse(content.toString("utf-8"));
}
zipfile.readEntry();
});
2020-06-27 00:40:35 +02:00
await readZipFile(fileBuffer, async (zipfile, entry) => {
const filePath = normalizeFilePath(entry.fileName);
if (/\/$/.test(entry.fileName)) {
2020-06-20 12:31:38 +02:00
saveDirectory(filePath);
}
else if (filePath !== '!!!meta.json') {
2020-06-27 00:40:35 +02:00
const content = await readContent(zipfile, entry);
2020-06-20 12:31:38 +02:00
saveNote(filePath, content);
}
taskContext.increaseProgressCount();
zipfile.readEntry();
});
2020-03-20 21:57:16 +01:00
2023-05-06 15:07:38 +02:00
for (const noteId of createdNoteIds) {
const note = becca.getNote(noteId);
await noteService.asyncPostProcessContent(note, note.getContent());
2020-03-20 21:57:16 +01:00
if (!metaFile) {
2023-05-06 15:07:38 +02:00
// if there's no meta file, then the notes are created based on the order in that zip file but that
2022-12-26 10:38:31 +01:00
// is usually quite random, so we sort the notes in the way they would appear in the file manager
treeService.sortNotes(noteId, 'title', false, true);
2020-03-20 21:57:16 +01:00
}
taskContext.increaseProgressCount();
}
// we're saving attributes and links only now so that all relation and link target notes
// are already in the database (we don't want to have "broken" relations, not even transitionally)
for (const attr of attributes) {
2022-12-26 10:38:31 +01:00
if (attr.type !== 'relation' || attr.value in becca.notes) {
new BAttribute(attr).save();
2020-03-20 21:57:16 +01:00
}
else {
2022-12-26 10:38:31 +01:00
log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`);
2020-03-20 21:57:16 +01:00
}
}
return firstNote;
}
2022-12-26 22:51:16 +01:00
/** @returns {string} path without leading or trailing slash and backslashes converted to forward ones */
function normalizeFilePath(filePath) {
filePath = filePath.replace(/\\/g, "/");
if (filePath.startsWith("/")) {
filePath = filePath.substr(1);
}
if (filePath.endsWith("/")) {
filePath = filePath.substr(0, filePath.length - 1);
}
return filePath;
}
/** @returns {Promise<Buffer>} */
2022-12-26 22:51:16 +01:00
function streamToBuffer(stream) {
const chunks = [];
stream.on('data', chunk => chunks.push(chunk));
return new Promise((res, rej) => stream.on('end', () => res(Buffer.concat(chunks))));
}
/** @returns {Promise<Buffer>} */
2022-12-26 22:51:16 +01:00
function readContent(zipfile, entry) {
return new Promise((res, rej) => {
zipfile.openReadStream(entry, function(err, readStream) {
if (err) rej(err);
streamToBuffer(readStream).then(res);
});
});
}
function readZipFile(buffer, processEntryCallback) {
return new Promise((res, rej) => {
yauzl.fromBuffer(buffer, {lazyEntries: true, validateEntrySizes: false}, function(err, zipfile) {
if (err) throw err;
zipfile.readEntry();
zipfile.on("entry", entry => processEntryCallback(zipfile, entry));
zipfile.on("end", res);
});
});
}
2022-12-07 23:37:40 +01:00
function resolveNoteType(type) {
// BC for ZIPs created in Triliun 0.57 and older
if (type === 'relation-map') {
type = 'relationMap';
} else if (type === 'note-map') {
type = 'noteMap';
} else if (type === 'web-view') {
type = 'webView';
}
return type || "text";
2022-12-07 23:37:40 +01:00
}
2020-03-20 21:57:16 +01:00
module.exports = {
importZip
2020-06-07 23:55:55 +02:00
};