feat(import/single): support UTF-16 LE with BOM for code notes

This commit is contained in:
Elian Doran 2025-02-22 01:01:15 +02:00
parent fd4f35e879
commit cadd78524c
No known key found for this signature in database
3 changed files with 20 additions and 9 deletions

Binary file not shown.

View File

@ -10,15 +10,17 @@ import cls from "../cls.js";
import sql_init from "../sql_init.js"; import sql_init from "../sql_init.js";
import { initializeTranslations } from "../i18n.js"; import { initializeTranslations } from "../i18n.js";
import single from "./single.js"; import single from "./single.js";
import stripBom from "strip-bom";
const scriptDir = dirname(fileURLToPath(import.meta.url)); const scriptDir = dirname(fileURLToPath(import.meta.url));
async function testImport(fileName: string, mimetype: string): Promise<BNote> { async function testImport(fileName: string, mimetype: string) {
const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", fileName)); const buffer = fs.readFileSync(path.join(scriptDir, "samples", fileName));
const taskContext = TaskContext.getInstance("import-mdx", "import", { const taskContext = TaskContext.getInstance("import-mdx", "import", {
textImportedAsText: true textImportedAsText: true,
codeImportedAsCode: true
}); });
return new Promise<BNote>((resolve, reject) => { return new Promise<{ buffer: Buffer, importedNote: BNote }>((resolve, reject) => {
cls.init(async () => { cls.init(async () => {
const rootNote = becca.getNote("root"); const rootNote = becca.getNote("root");
if (!rootNote) { if (!rootNote) {
@ -28,9 +30,12 @@ async function testImport(fileName: string, mimetype: string): Promise<BNote> {
const importedNote = single.importSingleFile(taskContext, { const importedNote = single.importSingleFile(taskContext, {
originalname: fileName, originalname: fileName,
mimetype, mimetype,
buffer: mdxSample buffer: buffer
}, rootNote as BNote); }, rootNote as BNote);
resolve(importedNote); resolve({
buffer,
importedNote
});
}); });
}); });
} }
@ -43,16 +48,22 @@ describe("processNoteContent", () => {
}); });
it("treats single MDX as Markdown", async () => { it("treats single MDX as Markdown", async () => {
const importedNote = await testImport("Text Note.mdx", "text/mdx"); const { importedNote } = await testImport("Text Note.mdx", "text/mdx");
expect(importedNote.mime).toBe("text/html"); expect(importedNote.mime).toBe("text/html");
expect(importedNote.type).toBe("text"); expect(importedNote.type).toBe("text");
expect(importedNote.title).toBe("Text Note"); expect(importedNote.title).toBe("Text Note");
}); });
it("supports HTML note with UTF-16 (w/ BOM) from Microsoft Outlook", async () => { it("supports HTML note with UTF-16 (w/ BOM) from Microsoft Outlook", async () => {
const importedNote = await testImport("IREN Reports Q2 FY25 Results.htm", "text/html"); const { importedNote } = await testImport("IREN Reports Q2 FY25 Results.htm", "text/html");
expect(importedNote.mime).toBe("text/html"); expect(importedNote.mime).toBe("text/html");
expect(importedNote.title).toBe("IREN Reports Q2 FY25 Results"); expect(importedNote.title).toBe("IREN Reports Q2 FY25 Results");
expect(importedNote.getContent().toString().substring(0, 5)).toEqual("<html"); expect(importedNote.getContent().toString().substring(0, 5)).toEqual("<html");
}); });
it("supports code note with UTF-16", async () => {
const { importedNote, buffer } = await testImport("UTF-16LE Code Note.json", "application/json");
expect(importedNote.mime).toBe("application/json");
expect(importedNote.getContent().toString()).toStrictEqual(stripBom(buffer.toString("utf-16le")));
});
}) })

View File

@ -71,7 +71,7 @@ function importFile(taskContext: TaskContext, file: File, parentNote: BNote) {
function importCodeNote(taskContext: TaskContext, file: File, parentNote: BNote) { function importCodeNote(taskContext: TaskContext, file: File, parentNote: BNote) {
const title = getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces); const title = getNoteTitle(file.originalname, !!taskContext.data?.replaceUnderscoresWithSpaces);
const content = file.buffer.toString("utf-8"); const content = processStringOrBuffer(file.buffer);
const detectedMime = mimeService.getMime(file.originalname) || file.mimetype; const detectedMime = mimeService.getMime(file.originalname) || file.mimetype;
const mime = mimeService.normalizeMimeType(detectedMime); const mime = mimeService.normalizeMimeType(detectedMime);