mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-18 08:13:40 +08:00
feat(import/zip): support UTF-16 LE with BOM (closes #1241)
This commit is contained in:
parent
c925ae5f15
commit
bedc61c3d0
Binary file not shown.
@ -3,14 +3,12 @@
|
|||||||
import type BNote from "../../becca/entities/bnote.js";
|
import type BNote from "../../becca/entities/bnote.js";
|
||||||
import type TaskContext from "../task_context.js";
|
import type TaskContext from "../task_context.js";
|
||||||
|
|
||||||
import chardet from "chardet";
|
|
||||||
import stripBom from "strip-bom";
|
|
||||||
import noteService from "../../services/notes.js";
|
import noteService from "../../services/notes.js";
|
||||||
import imageService from "../../services/image.js";
|
import imageService from "../../services/image.js";
|
||||||
import protectedSessionService from "../protected_session.js";
|
import protectedSessionService from "../protected_session.js";
|
||||||
import markdownService from "./markdown.js";
|
import markdownService from "./markdown.js";
|
||||||
import mimeService from "./mime.js";
|
import mimeService from "./mime.js";
|
||||||
import { getNoteTitle } from "../../services/utils.js";
|
import { getNoteTitle, processStringOrBuffer } from "../../services/utils.js";
|
||||||
import importUtils from "./utils.js";
|
import importUtils from "./utils.js";
|
||||||
import htmlSanitizer from "../html_sanitizer.js";
|
import htmlSanitizer from "../html_sanitizer.js";
|
||||||
import type { File } from "./common.js";
|
import type { File } from "./common.js";
|
||||||
@ -148,21 +146,6 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote)
|
|||||||
return note;
|
return note;
|
||||||
}
|
}
|
||||||
|
|
||||||
function processStringOrBuffer(data: string | Buffer) {
|
|
||||||
if (!Buffer.isBuffer(data)) {
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
const detectedEncoding = chardet.detect(data);
|
|
||||||
switch (detectedEncoding) {
|
|
||||||
case "UTF-16LE":
|
|
||||||
return stripBom(data.toString("utf-16le"));
|
|
||||||
case "UTF-8":
|
|
||||||
default:
|
|
||||||
return data.toString("utf-8");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
|
function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
|
||||||
let content = processStringOrBuffer(file.buffer);
|
let content = processStringOrBuffer(file.buffer);
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { beforeAll, describe, expect, it } from "vitest";
|
||||||
import fs from "fs";
|
import fs from "fs";
|
||||||
import path from "path";
|
import path from "path";
|
||||||
import { fileURLToPath } from "url";
|
import { fileURLToPath } from "url";
|
||||||
@ -12,35 +12,46 @@ import sql_init from "../sql_init.js";
|
|||||||
import { initializeTranslations } from "../i18n.js";
|
import { initializeTranslations } from "../i18n.js";
|
||||||
const scriptDir = dirname(fileURLToPath(import.meta.url));
|
const scriptDir = dirname(fileURLToPath(import.meta.url));
|
||||||
|
|
||||||
describe("processNoteContent", () => {
|
async function testImport(fileName: string) {
|
||||||
it("treats single MDX as Markdown in ZIP as text note", async () => {
|
const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", fileName));
|
||||||
const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", "mdx.zip"));
|
const taskContext = TaskContext.getInstance("import-mdx", "import", {
|
||||||
const taskContext = TaskContext.getInstance("import-mdx", "import", {
|
textImportedAsText: true
|
||||||
textImportedAsText: true
|
});
|
||||||
});
|
|
||||||
|
|
||||||
await new Promise<void>((resolve, reject) => {
|
return new Promise<{ importedNote: BNote; rootNote: BNote }>((resolve, reject) => {
|
||||||
cls.init(async () => {
|
cls.init(async () => {
|
||||||
initializeTranslations();
|
const rootNote = becca.getNote("root");
|
||||||
sql_init.initializeDb();
|
if (!rootNote) {
|
||||||
await sql_init.dbReady;
|
expect(rootNote).toBeTruthy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const rootNote = becca.getNote("root");
|
const importedNote = await zip.importZip(taskContext, mdxSample, rootNote as BNote);
|
||||||
if (!rootNote) {
|
resolve({
|
||||||
expect(rootNote).toBeTruthy();
|
importedNote,
|
||||||
return;
|
rootNote
|
||||||
}
|
|
||||||
|
|
||||||
const importedNote = await zip.importZip(taskContext, mdxSample, rootNote as BNote);
|
|
||||||
try {
|
|
||||||
expect(importedNote.mime).toBe("text/mdx");
|
|
||||||
expect(importedNote.type).toBe("text");
|
|
||||||
expect(importedNote.title).toBe("Text Note");
|
|
||||||
} catch (e) {
|
|
||||||
reject(e);
|
|
||||||
}
|
|
||||||
resolve();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("processNoteContent", () => {
|
||||||
|
beforeAll(async () => {
|
||||||
|
initializeTranslations();
|
||||||
|
sql_init.initializeDb();
|
||||||
|
await sql_init.dbReady;
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats single MDX as Markdown in ZIP as text note", async () => {
|
||||||
|
const { importedNote } = await testImport("mdx.zip");
|
||||||
|
expect(importedNote.mime).toBe("text/mdx");
|
||||||
|
expect(importedNote.type).toBe("text");
|
||||||
|
expect(importedNote.title).toBe("Text Note");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("can import email from Microsoft Outlook with UTF-16 with BOM", async () => {
|
||||||
|
const { rootNote, importedNote } = await testImport("IREN.Reports.Q2.FY25.Results_files.zip");
|
||||||
|
const htmlNote = rootNote.children.find((ch) => ch.title === "IREN Reports Q2 FY25 Results");
|
||||||
|
expect(htmlNote?.getContent().toString().substring(0, 4)).toEqual("<div");
|
||||||
|
});
|
||||||
})
|
})
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
import BAttribute from "../../becca/entities/battribute.js";
|
import BAttribute from "../../becca/entities/battribute.js";
|
||||||
import { removeTextFileExtension, newEntityId, getNoteTitle } from "../../services/utils.js";
|
import { removeTextFileExtension, newEntityId, getNoteTitle, processStringOrBuffer } from "../../services/utils.js";
|
||||||
import log from "../../services/log.js";
|
import log from "../../services/log.js";
|
||||||
import noteService from "../../services/notes.js";
|
import noteService from "../../services/notes.js";
|
||||||
import attributeService from "../../services/attributes.js";
|
import attributeService from "../../services/attributes.js";
|
||||||
@ -457,7 +457,7 @@ async function importZip(taskContext: TaskContext, fileBuffer: Buffer, importRoo
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (type !== "file" && type !== "image") {
|
if (type !== "file" && type !== "image") {
|
||||||
content = content.toString("utf-8");
|
content = processStringOrBuffer(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
const noteTitle = getNoteTitle(filePath, taskContext.data?.replaceUnderscoresWithSpaces || false, noteMeta);
|
const noteTitle = getNoteTitle(filePath, taskContext.data?.replaceUnderscoresWithSpaces || false, noteMeta);
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
|
import chardet from "chardet";
|
||||||
|
import stripBom from "strip-bom";
|
||||||
import crypto from "crypto";
|
import crypto from "crypto";
|
||||||
import { generator } from "rand-token";
|
import { generator } from "rand-token";
|
||||||
import unescape from "unescape";
|
import unescape from "unescape";
|
||||||
@ -330,6 +332,36 @@ function compareVersions(v1: string, v2: string): number {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For buffers, they are scanned for a supported encoding and decoded (UTF-8, UTF-16). In some cases, the BOM is also stripped.
|
||||||
|
*
|
||||||
|
* For strings, they are returned immediately without any transformation.
|
||||||
|
*
|
||||||
|
* For nullish values, an empty string is returned.
|
||||||
|
*
|
||||||
|
* @param data the string or buffer to process.
|
||||||
|
* @returns the string representation of the buffer, or the same string is it's a string.
|
||||||
|
*/
|
||||||
|
export function processStringOrBuffer(data: string | Buffer | null) {
|
||||||
|
if (!data) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Buffer.isBuffer(data)) {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
const detectedEncoding = chardet.detect(data);
|
||||||
|
console.log("Detected as ", detectedEncoding);
|
||||||
|
switch (detectedEncoding) {
|
||||||
|
case "UTF-16LE":
|
||||||
|
return stripBom(data.toString("utf-16le"));
|
||||||
|
case "UTF-8":
|
||||||
|
default:
|
||||||
|
return data.toString("utf-8");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
compareVersions,
|
compareVersions,
|
||||||
crash,
|
crash,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user