diff --git a/src/services/import/samples/IREN.Reports.Q2.FY25.Results_files.zip b/src/services/import/samples/IREN.Reports.Q2.FY25.Results_files.zip new file mode 100644 index 000000000..86c9de9b5 Binary files /dev/null and b/src/services/import/samples/IREN.Reports.Q2.FY25.Results_files.zip differ diff --git a/src/services/import/single.ts b/src/services/import/single.ts index 85bd3d48a..b572aea7f 100644 --- a/src/services/import/single.ts +++ b/src/services/import/single.ts @@ -3,14 +3,12 @@ import type BNote from "../../becca/entities/bnote.js"; import type TaskContext from "../task_context.js"; -import chardet from "chardet"; -import stripBom from "strip-bom"; import noteService from "../../services/notes.js"; import imageService from "../../services/image.js"; import protectedSessionService from "../protected_session.js"; import markdownService from "./markdown.js"; import mimeService from "./mime.js"; -import { getNoteTitle } from "../../services/utils.js"; +import { getNoteTitle, processStringOrBuffer } from "../../services/utils.js"; import importUtils from "./utils.js"; import htmlSanitizer from "../html_sanitizer.js"; import type { File } from "./common.js"; @@ -148,21 +146,6 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote) return note; } -function processStringOrBuffer(data: string | Buffer) { - if (!Buffer.isBuffer(data)) { - return data; - } - - const detectedEncoding = chardet.detect(data); - switch (detectedEncoding) { - case "UTF-16LE": - return stripBom(data.toString("utf-16le")); - case "UTF-8": - default: - return data.toString("utf-8"); - } -} - function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) { let content = processStringOrBuffer(file.buffer); diff --git a/src/services/import/zip.spec.ts b/src/services/import/zip.spec.ts index c29459f93..67f0175f3 100644 --- a/src/services/import/zip.spec.ts +++ b/src/services/import/zip.spec.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "vitest"; +import { beforeAll, describe, expect, it } from "vitest"; import fs from "fs"; import path from "path"; import { fileURLToPath } from "url"; @@ -12,35 +12,46 @@ import sql_init from "../sql_init.js"; import { initializeTranslations } from "../i18n.js"; const scriptDir = dirname(fileURLToPath(import.meta.url)); -describe("processNoteContent", () => { - it("treats single MDX as Markdown in ZIP as text note", async () => { - const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", "mdx.zip")); - const taskContext = TaskContext.getInstance("import-mdx", "import", { - textImportedAsText: true - }); +async function testImport(fileName: string) { + const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", fileName)); + const taskContext = TaskContext.getInstance("import-mdx", "import", { + textImportedAsText: true + }); - await new Promise((resolve, reject) => { - cls.init(async () => { - initializeTranslations(); - sql_init.initializeDb(); - await sql_init.dbReady; + return new Promise<{ importedNote: BNote; rootNote: BNote }>((resolve, reject) => { + cls.init(async () => { + const rootNote = becca.getNote("root"); + if (!rootNote) { + expect(rootNote).toBeTruthy(); + return; + } - const rootNote = becca.getNote("root"); - if (!rootNote) { - expect(rootNote).toBeTruthy(); - return; - } - - const importedNote = await zip.importZip(taskContext, mdxSample, rootNote as BNote); - try { - expect(importedNote.mime).toBe("text/mdx"); - expect(importedNote.type).toBe("text"); - expect(importedNote.title).toBe("Text Note"); - } catch (e) { - reject(e); - } - resolve(); + const importedNote = await zip.importZip(taskContext, mdxSample, rootNote as BNote); + resolve({ + importedNote, + rootNote }); }); }); +} + +describe("processNoteContent", () => { + beforeAll(async () => { + initializeTranslations(); + sql_init.initializeDb(); + await sql_init.dbReady; + }); + + it("treats single MDX as Markdown in ZIP as text note", async () => { + const { importedNote } = await testImport("mdx.zip"); + expect(importedNote.mime).toBe("text/mdx"); + expect(importedNote.type).toBe("text"); + expect(importedNote.title).toBe("Text Note"); + }); + + it("can import email from Microsoft Outlook with UTF-16 with BOM", async () => { + const { rootNote, importedNote } = await testImport("IREN.Reports.Q2.FY25.Results_files.zip"); + const htmlNote = rootNote.children.find((ch) => ch.title === "IREN Reports Q2 FY25 Results"); + expect(htmlNote?.getContent().toString().substring(0, 4)).toEqual("