373 lines
13 KiB
JavaScript
Raw Normal View History

2018-11-05 00:06:17 +01:00
const sax = require("sax");
const stream = require('stream');
const {Throttle} = require('stream-throttle');
const log = require("../log");
const utils = require("../utils");
const sql = require("../sql");
const noteService = require("../notes");
const imageService = require("../image");
2019-02-25 21:22:57 +01:00
const protectedSessionService = require('../protected_session');
2020-06-30 23:37:06 +02:00
const htmlSanitizer = require("../html_sanitizer");
2020-11-17 22:35:20 +01:00
const attributeService = require("../attributes");
2022-12-23 14:18:40 +01:00
const {sanitizeAttributeName} = require("../sanitize_attribute_name.js");
2018-11-05 00:06:17 +01:00
// date format is e.g. 20181121T193703Z
function parseDate(text) {
// insert - and : to make it ISO format
text = `${text.substr(0, 4)}-${text.substr(4, 2)}-${text.substr(6, 2)} ${text.substr(9, 2)}:${text.substr(11, 2)}:${text.substr(13, 2)}.000Z`;
2018-11-05 00:06:17 +01:00
return text;
}
let note = {};
let resource;
2020-06-20 12:31:38 +02:00
function importEnex(taskContext, file, parentNote) {
2018-11-05 00:06:17 +01:00
const saxStream = sax.createStream(true);
const rootNoteTitle = file.originalname.toLowerCase().endsWith(".enex")
? file.originalname.substr(0, file.originalname.length - 5)
: file.originalname;
// root note is new note into all ENEX/notebook's notes will be imported
2021-01-14 21:52:44 +01:00
const rootNote = noteService.createNewNote({
2019-11-16 11:09:52 +01:00
parentNoteId: parentNote.noteId,
title: rootNoteTitle,
content: "",
type: 'text',
2019-02-25 21:22:57 +01:00
mime: 'text/html',
isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
2021-01-14 21:52:44 +01:00
}).note;
2019-11-16 17:56:49 +01:00
function extractContent(content) {
const openingNoteIndex = content.indexOf('<en-note>');
2018-11-05 00:06:17 +01:00
2019-11-16 17:56:49 +01:00
if (openingNoteIndex !== -1) {
content = content.substr(openingNoteIndex + 9);
}
const closingNoteIndex = content.lastIndexOf('</en-note>');
2019-11-16 17:56:49 +01:00
if (closingNoteIndex !== -1) {
content = content.substr(0, closingNoteIndex);
}
2019-11-16 17:56:49 +01:00
content = content.trim();
2018-11-05 00:06:17 +01:00
// workaround for https://github.com/ckeditor/ckeditor5-list/issues/116
content = content.replace(/<li>\s*<div>/g, "<li>");
content = content.replace(/<\/div>\s*<\/li>/g, "</li>");
2018-11-05 00:06:17 +01:00
// workaround for https://github.com/ckeditor/ckeditor5-list/issues/115
content = content.replace(/<ul>\s*<ul>/g, "<ul><li><ul>");
content = content.replace(/<\/li>\s*<ul>/g, "<ul>");
content = content.replace(/<\/ul>\s*<\/ul>/g, "</ul></li></ul>");
content = content.replace(/<\/ul>\s*<li>/g, "</ul></li><li>");
content = content.replace(/<ol>\s*<ol>/g, "<ol><li><ol>");
content = content.replace(/<\/li>\s*<ol>/g, "<ol>");
content = content.replace(/<\/ol>\s*<\/ol>/g, "</ol></li></ol>");
content = content.replace(/<\/ol>\s*<li>/g, "</ol></li><li>");
// Replace en-todo with unicode ballot box
content = content.replace(/<en-todo\s+checked="true"\/>/g, "\u2611 ");
content = content.replace(/<en-todo(\s+checked="false")?\/>/g, "\u2610 ");
2018-11-05 00:06:17 +01:00
// Replace OneNote converted checkboxes with unicode ballot box based
// on known hash of checkboxes for regular, p1, and p2 checkboxes
content = content.replace(/<en-media alt="To Do( priority [12])?" hash="(74de5d3d1286f01bac98d32a09f601d9|4a19d3041585e11643e808d68dd3e72f|8e17580123099ac6515c3634b1f6f9a1)"( type="[a-z\/]*"| width="\d+"| height="\d+")*\/>/g, "\u2610 ");
content = content.replace(/<en-media alt="To Do( priority [12])?" hash="(5069b775461e471a47ce04ace6e1c6ae|7912ee9cec35fc3dba49edb63a9ed158|3a05f4f006a6eaf2627dae5ed8b8013b)"( type="[a-z\/]*"| width="\d+"| height="\d+")*\/>/g, "\u2611 ");
2020-06-30 23:37:06 +02:00
content = htmlSanitizer.sanitize(content);
2018-11-05 00:06:17 +01:00
return content;
}
const path = [];
function getCurrentTag() {
if (path.length >= 1) {
return path[path.length - 1];
}
}
function getPreviousTag() {
if (path.length >= 2) {
return path[path.length - 2];
}
}
saxStream.on("error", e => {
// unhandled errors will throw, since this is a proper node
// event emitter.
log.error(`error when parsing ENEX file: ${e}`);
2018-11-05 00:06:17 +01:00
// clear the error
this._parser.error = null;
this._parser.resume();
});
saxStream.on("text", text => {
const currentTag = getCurrentTag();
const previousTag = getPreviousTag();
if (previousTag === 'note-attributes') {
2020-11-17 22:35:20 +01:00
let labelName = currentTag;
if (labelName === 'source-url') {
labelName = 'pageUrl';
2020-11-17 22:35:20 +01:00
}
2022-12-23 14:18:40 +01:00
labelName = sanitizeAttributeName(labelName);
2020-11-17 22:35:20 +01:00
2018-11-05 00:06:17 +01:00
note.attributes.push({
type: 'label',
2020-11-17 22:35:20 +01:00
name: labelName,
2018-11-05 00:06:17 +01:00
value: text
});
}
else if (previousTag === 'resource-attributes') {
if (currentTag === 'file-name') {
resource.attributes.push({
type: 'label',
name: 'originalFileName',
value: text
});
resource.title = text;
}
else if (currentTag === 'source-url') {
resource.attributes.push({
type: 'label',
name: 'pageUrl',
2018-11-05 00:06:17 +01:00
value: text
});
}
}
else if (previousTag === 'resource') {
if (currentTag === 'data') {
text = text.replace(/\s/g, '');
// resource can be chunked into multiple events: https://github.com/zadam/trilium/issues/3424
// it would probably make sense to do this in a more global way since it can in theory affect any field,
// not just data
resource.content = (resource.content || "") + text;
2018-11-05 00:06:17 +01:00
}
else if (currentTag === 'mime') {
resource.mime = text.toLowerCase();
2018-11-05 00:06:17 +01:00
}
}
else if (previousTag === 'note') {
if (currentTag === 'title') {
note.title = text;
} else if (currentTag === 'created') {
2019-03-12 20:58:31 +01:00
note.utcDateCreated = parseDate(text);
2018-11-05 00:06:17 +01:00
} else if (currentTag === 'updated') {
note.utcDateModified = parseDate(text);
2018-11-05 00:06:17 +01:00
} else if (currentTag === 'tag') {
note.attributes.push({
type: 'label',
2022-12-23 14:18:40 +01:00
name: sanitizeAttributeName(text),
2018-11-05 00:06:17 +01:00
value: ''
})
}
// unknown tags are just ignored
}
});
saxStream.on("attribute", attr => {
// an attribute. attr has "name" and "value"
});
saxStream.on("opentag", tag => {
path.push(tag.name);
if (tag.name === 'note') {
note = {
content: "",
// it's an array, not a key-value object because we don't know if attributes can be duplicated
attributes: [],
resources: []
};
}
else if (tag.name === 'resource') {
resource = {
title: "resource",
attributes: []
};
note.resources.push(resource);
}
});
2020-06-20 12:31:38 +02:00
function updateDates(noteId, utcDateCreated, utcDateModified) {
// it's difficult to force custom dateCreated and dateModified to Note entity so we do it post-creation with SQL
2020-06-20 12:31:38 +02:00
sql.execute(`
UPDATE notes
SET dateCreated = ?,
utcDateCreated = ?,
dateModified = ?,
utcDateModified = ?
WHERE noteId = ?`,
[utcDateCreated, utcDateCreated, utcDateModified, utcDateModified, noteId]);
2020-06-20 12:31:38 +02:00
sql.execute(`
UPDATE note_contents
SET utcDateModified = ?
WHERE noteId = ?`,
[utcDateModified, noteId]);
}
2020-06-20 12:31:38 +02:00
function saveNote() {
// make a copy because stream continues with the next call and note gets overwritten
let {title, content, attributes, resources, utcDateCreated, utcDateModified} = note;
2018-11-05 00:06:17 +01:00
2019-11-16 17:56:49 +01:00
content = extractContent(content);
2018-11-05 00:06:17 +01:00
2020-06-28 23:10:45 +02:00
const noteEntity = noteService.createNewNote({
2019-11-16 11:09:52 +01:00
parentNoteId: rootNote.noteId,
title,
content,
2019-03-12 20:58:31 +01:00
utcDateCreated,
2018-11-05 00:06:17 +01:00
type: 'text',
2019-02-25 21:22:57 +01:00
mime: 'text/html',
isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
2020-06-28 23:10:45 +02:00
}).note;
2018-11-05 00:06:17 +01:00
2019-11-16 17:56:49 +01:00
for (const attr of attributes) {
2020-06-20 12:31:38 +02:00
noteEntity.addAttribute(attr.type, attr.name, attr.value);
2019-11-16 11:09:52 +01:00
}
utcDateCreated = utcDateCreated || noteEntity.utcDateCreated;
// sometime date modified is not present in ENEX, then use date created
utcDateModified = utcDateModified || utcDateCreated;
taskContext.increaseProgressCount();
2019-02-10 19:36:03 +01:00
2018-11-05 00:06:17 +01:00
for (const resource of resources) {
2020-11-17 22:35:20 +01:00
if (!resource.content) {
continue;
}
resource.content = utils.fromBase64(resource.content);
const hash = utils.md5(resource.content);
// skip all checked/unchecked checkboxes from OneNote
if (['74de5d3d1286f01bac98d32a09f601d9',
'4a19d3041585e11643e808d68dd3e72f',
'8e17580123099ac6515c3634b1f6f9a1',
'5069b775461e471a47ce04ace6e1c6ae',
'7912ee9cec35fc3dba49edb63a9ed158',
'3a05f4f006a6eaf2627dae5ed8b8013b'].includes(hash)) {
continue;
}
const mediaRegex = new RegExp(`<en-media hash="${hash}"[^>]*>`, 'g');
resource.mime = resource.mime || "application/octet-stream";
2020-06-20 12:31:38 +02:00
const createFileNote = () => {
2020-09-16 20:32:20 +02:00
const resourceNote = noteService.createNewNote({
2019-11-16 11:09:52 +01:00
parentNoteId: noteEntity.noteId,
title: resource.title,
content: resource.content,
2019-02-10 19:36:03 +01:00
type: 'file',
2019-02-25 21:22:57 +01:00
mime: resource.mime,
isProtected: parentNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
2020-09-16 20:32:20 +02:00
}).note;
2019-02-10 19:36:03 +01:00
2019-11-16 11:09:52 +01:00
for (const attr of resource.attributes) {
resourceNote.addAttribute(attr.type, attr.name, attr.value);
2019-11-16 11:09:52 +01:00
}
2020-06-20 12:31:38 +02:00
updateDates(resourceNote.noteId, utcDateCreated, utcDateModified);
taskContext.increaseProgressCount();
2019-02-10 19:36:03 +01:00
const resourceLink = `<a href="#root/${resourceNote.noteId}">${utils.escapeHtml(resource.title)}</a>`;
content = content.replace(mediaRegex, resourceLink);
2019-02-06 21:29:23 +01:00
};
2020-06-28 23:10:45 +02:00
if (resource.mime && resource.mime.startsWith('image/')) {
2019-02-10 19:36:03 +01:00
try {
const originalName = (resource.title && resource.title !== 'resource')
? resource.title
: `image.${resource.mime.substr(6)}`; // default if real name is not present
2020-06-20 12:31:38 +02:00
const {url, note: imageNote} = imageService.saveImage(noteEntity.noteId, resource.content, originalName, taskContext.data.shrinkImages);
for (const attr of resource.attributes) {
if (attr.name !== 'originalFileName') { // this one is already saved in imageService
imageNote.addAttribute(attr.type, attr.name, attr.value);
}
}
2020-06-20 12:31:38 +02:00
updateDates(imageNote.noteId, utcDateCreated, utcDateModified);
2019-02-10 19:36:03 +01:00
const imageLink = `<img src="${url}">`;
content = content.replace(mediaRegex, imageLink);
if (!content.includes(imageLink)) {
2019-02-10 19:36:03 +01:00
// if there wasn't any match for the reference, we'll add the image anyway
// otherwise image would be removed since no note would include it
content += imageLink;
2019-02-10 19:36:03 +01:00
}
} catch (e) {
log.error(`error when saving image from ENEX file: ${e}`);
2020-06-20 12:31:38 +02:00
createFileNote();
}
2019-02-10 19:36:03 +01:00
} else {
2020-06-20 12:31:38 +02:00
createFileNote();
}
2018-11-05 00:06:17 +01:00
}
2020-06-30 23:37:06 +02:00
content = htmlSanitizer.sanitize(content);
// save updated content with links to files/images
2020-06-20 12:31:38 +02:00
noteEntity.setContent(content);
2020-06-20 12:31:38 +02:00
noteService.scanForLinks(noteEntity);
2020-06-20 12:31:38 +02:00
updateDates(noteEntity.noteId, utcDateCreated, utcDateModified);
2018-11-05 00:06:17 +01:00
}
2020-11-19 13:30:39 +01:00
saxStream.on("closetag", tag => {
path.pop();
if (tag === 'note') {
saveNote();
}
});
2018-11-05 00:06:17 +01:00
saxStream.on("opencdata", () => {
//console.log("opencdata");
});
saxStream.on("cdata", text => {
note.content += text;
});
saxStream.on("closecdata", () => {
//console.log("closecdata");
});
return new Promise((resolve, reject) =>
{
// resolve only when we parse the whole document AND saving of all notes have been finished
2020-11-18 21:30:56 +01:00
saxStream.on("end", () => resolve(rootNote));
2018-11-05 00:06:17 +01:00
const bufferStream = new stream.PassThrough();
bufferStream.end(file.buffer);
bufferStream
// rate limiting to improve responsiveness during / after import
2020-11-18 21:30:56 +01:00
.pipe(new Throttle({rate: 500000}))
.pipe(saxStream);
2018-11-05 00:06:17 +01:00
});
}
2020-06-20 12:31:38 +02:00
module.exports = { importEnex };