import type { Element } from "ckeditor5"; import type { Node, Editor } from "ckeditor5"; import { Plugin } from "ckeditor5"; /* * This code is an adaptation of https://github.com/antoniotejada/Trilium-SyntaxHighlightWidget with additional improvements, such as: * * - support for selecting the language manually; * - support for determining the language automatically, if a special language is selected ("Auto-detected"); * - limit for highlighting. */ export default class SyntaxHighlighting extends Plugin { private config!: EditorConfig; private hljs!: HighlightJs; async init() { const config = this.editor.config.get("syntaxHighlighting") as EditorConfig | null; if (!config || !config.enabled) { return; } this.config = config; this.hljs = await config.loadHighlightJs(); this.initTextEditor(this.editor); } initTextEditor(textEditor: Editor) { log("initTextEditor"); const document = textEditor.model.document; // Create a conversion from model to view that converts // hljs:hljsClassName:uniqueId into a span with hljsClassName // See the list of hljs class names at // https://github.com/highlightjs/highlight.js/blob/6b8c831f00c4e87ecd2189ebbd0bb3bbdde66c02/docs/css-classes-reference.rst textEditor.conversion.for("editingDowncast").markerToHighlight({ model: "hljs", view: ({ markerName }) => { dbg("markerName " + markerName); // markerName has the pattern addMarker:cssClassName:uniqueId const [, cssClassName, id] = markerName.split(":"); // The original code at // https://github.com/ckeditor/ckeditor5/blob/master/packages/ckeditor5-find-and-replace/src/findandreplaceediting.js // has this comment // Marker removal from the view has a bug: // https://github.com/ckeditor/ckeditor5/issues/7499 // A minimal option is to return a new object for each converted marker... return { name: "span", classes: [cssClassName], attributes: { // ...however, adding a unique attribute should be future-proof.. "data-syntax-result": id } }; } }); // XXX This is done at BalloonEditor.create time, so it assumes this // document is always attached to this textEditor, empirically that // seems to be the case even with two splits showing the same note, // it's not clear if CKEditor5 has apis to attach and detach // documents around document.registerPostFixer((writer) => { log("postFixer"); // Postfixers are a simpler way of tracking changes than onchange // See // https://github.com/ckeditor/ckeditor5/blob/b53d2a4b49679b072f4ae781ac094e7e831cfb14/packages/ckeditor5-block-quote/src/blockquoteediting.js#L54 const changes = document.differ.getChanges(); let dirtyCodeBlocks = new Set(); function lookForCodeBlocks(node: Element | Node) { if (!("getChildren" in node)) { return; } for (const child of node.getChildren()) { if (child.is("element", "paragraph")) { continue; } if (child.is("element", "codeBlock")) { dirtyCodeBlocks.add(child); } else if (child.childCount > 0) { lookForCodeBlocks(child); } } } for (const change of changes) { dbg("change " + JSON.stringify(change)); if (change.name !== "paragraph" && change.name !== "codeBlock" && change?.position?.nodeAfter && change.position.nodeAfter.childCount > 0) { /* * We need to look for code blocks recursively, as they can be placed within a
due to * general HTML support or normally underneath other elements such as tables, blockquotes, etc. */ lookForCodeBlocks(change.position.nodeAfter); } else if (change.type == "insert" && change.name == "codeBlock") { // A new code block was inserted const codeBlock = change.position?.nodeAfter; // Even if it's a new codeblock, it needs dirtying in case // it already has children, like when pasting one or more // full codeblocks, undoing a delete, changing the language, // etc (the postfixer won't get later changes for those). if (codeBlock) { log("dirtying inserted codeBlock " + JSON.stringify(codeBlock.toJSON())); dirtyCodeBlocks.add(codeBlock); } } else if (change.type == "remove" && change.name == "codeBlock" && change.position) { // An existing codeblock was removed, do nothing. Note the // node is no longer in the editor so the codeblock cannot // be inspected here. No need to dirty the codeblock since // it has been removed log("removing codeBlock at path " + JSON.stringify(change.position.toJSON())); } else if ((change.type == "remove" || change.type == "insert") && change?.position?.parent.is("element", "codeBlock")) { // Text was added or removed from the codeblock, force a // highlight const codeBlock = change.position.parent; log("dirtying codeBlock " + JSON.stringify(codeBlock.toJSON())); dirtyCodeBlocks.add(codeBlock); } } for (let codeBlock of dirtyCodeBlocks) { this.highlightCodeBlock(codeBlock, writer); } // Adding markers doesn't modify the document data so no need for // postfixers to run again return false; }); } /** * This implements highlighting via ephemeral markers (not stored in the * document). * * XXX Another option would be to use formatting markers, which would have * the benefit of making it work for readonly notes. On the flip side, * the formatting would be stored with the note and it would need a * way to remove that formatting when editing back the note. */ highlightCodeBlock(codeBlock: Node, writer: Writer) { log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON())); const model = codeBlock.root.document.model; // Can't invoke addMarker with an already existing marker name, // clear all highlight markers first. Marker names follow the // pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1 const codeBlockRange = model.createRangeIn(codeBlock); for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) { dbg("removing marker " + marker.name); writer.removeMarker(marker.name); } // Don't highlight if plaintext (note this needs to remove the markers // above first, in case this was a switch from non plaintext to // plaintext) const mimeType = codeBlock.getAttribute("language"); if (mimeType == "text-plain") { // XXX There's actually a plaintext language that could be used // if you wanted the non-highlight formatting of // highlight.js css applied, see // https://github.com/highlightjs/highlight.js/issues/700 log("not highlighting plaintext codeblock"); return; } // Find the corresponding language for the given mimetype. const highlightJsLanguage = this.config.mapLanguageName(mimeType); if (mimeType !== this.config.defaultMimeType && !highlightJsLanguage) { console.warn(`Unsupported highlight.js for mime type ${mimeType}.`); return; } // Don't highlight if the code is too big, as the typing performance will be highly degraded. if (codeBlock.childCount >= HIGHLIGHT_MAX_BLOCK_COUNT) { return; } // highlight.js needs the full text without HTML tags, eg for the // text // #include // the highlighted html is // #include <stdio.h> // But CKEditor codeblocks have
instead of \n // Do a two pass algorithm: // - First pass collect the codeblock children text, change
to // \n // - invoke highlight.js on the collected text generating html // - Second pass parse the highlighted html spans and match each // char to the CodeBlock text. Issue addMarker CKEditor calls for // each span // XXX This is brittle and assumes how highlight.js generates html // (blanks, which characters escapes, etc), a better approach // would be to use highlight.js beta api TreeTokenizer? // Collect all the text nodes to pass to the highlighter Text is // direct children of the codeBlock let text = ""; for (let i = 0; i < codeBlock.childCount; ++i) { let child = codeBlock.getChild(i); // We only expect text and br elements here if (child.is("$text")) { dbg("child text " + child.data); text += child.data; } else if (child.is("element") && child.name == "softBreak") { dbg("softBreak"); text += "\n"; } else { warn("Unkown child " + JSON.stringify(child.toJSON())); } } let highlightRes; if (mimeType === this.config.defaultMimeType) { highlightRes = this.hljs.highlightAuto(text); } else { highlightRes = this.hljs.highlight(text, { language: highlightJsLanguage }); } dbg("text\n" + text); dbg("html\n" + highlightRes.value); let iHtml = 0; let html = highlightRes.value; let spanStack = []; let iChild = -1; let childText = ""; let child = null; let iChildText = 0; while (iHtml < html.length) { // Advance the text index and fetch a new child if necessary if (iChildText >= childText.length) { iChild++; if (iChild < codeBlock.childCount) { dbg("Fetching child " + iChild); child = codeBlock.getChild(iChild); if (child.is("$text")) { dbg("child text " + child.data); childText = child.data; iChildText = 0; } else if (child.is("element", "softBreak")) { dbg("softBreak"); iChildText = 0; childText = "\n"; } else { warn("child unknown!!!"); } } else { // Don't bail if beyond the last children, since there's // still html text, it must be a closing span tag that // needs to be dealt with below childText = ""; } } // This parsing is made slightly simpler and faster by only // expecting and tags in the highlighted html if (html[iHtml] == "<" && html[iHtml + 1] != "/") { // new span, note they can be nested eg C preprocessor lines // are inside a hljs-meta span, hljs-title function names // inside a hljs-function span, etc let iStartQuot = html.indexOf('"', iHtml + 1); let iEndQuot = html.indexOf('"', iStartQuot + 1); let className = html.slice(iStartQuot + 1, iEndQuot); // XXX highlight js uses scope for Python "title function_", // etc for now just use the first style only // See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes let iBlank = className.indexOf(" "); if (iBlank > 0) { className = className.slice(0, iBlank); } dbg("Found span start " + className); iHtml = html.indexOf(">", iHtml) + 1; // push the span let posStart = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); spanStack.push({ className: className, posStart: posStart }); } else if (html[iHtml] == "<" && html[iHtml + 1] == "/") { // Done with this span, pop the span and mark the range iHtml = html.indexOf(">", iHtml + 1) + 1; let stackTop = spanStack.pop(); let posStart = stackTop?.posStart; let className = stackTop?.className; let posEnd = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); let range = writer.createRange(posStart, posEnd); let markerName = "hljs:" + className + ":" + markerCounter; // Use an incrementing number for the uniqueId, random of // 10000000 is known to cause collisions with a few // codeblocks of 10s of lines on real notes (each line is // one or more marker). // Wrap-around for good measure so all numbers are positive // XXX Another option is to catch the exception and retry or // go through the markers and get the largest + 1 markerCounter = (markerCounter + 1) & 0xffffff; dbg("Found span end " + className); dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON())); writer.addMarker(markerName, { range: range, usingOperation: false }); } else { // Text, we should also have text in the children assert(iChild < codeBlock.childCount && iChildText < childText.length, "Found text in html with no corresponding child text!!!!"); if (html[iHtml] == "&") { // highlight.js only encodes // .replace(/&/g, '&') // .replace(//g, '>') // .replace(/"/g, '"') // .replace(/'/g, '''); // see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5 let iAmpEnd = html.indexOf(";", iHtml); dbg(html.slice(iHtml, iAmpEnd)); iHtml = iAmpEnd + 1; } else { // regular text dbg(html[iHtml]); iHtml++; } iChildText++; } } } } const HIGHLIGHT_MAX_BLOCK_COUNT = 500; const tag = "SyntaxHighlightWidget"; const debugLevels = ["error", "warn", "info", "log", "debug"]; const debugLevel = debugLevels.indexOf("warn"); let warn = function (...args: unknown[]) {}; if (debugLevel >= debugLevels.indexOf("warn")) { warn = console.warn.bind(console, tag + ": "); } let info = function (...args: unknown[]) {}; if (debugLevel >= debugLevels.indexOf("info")) { info = console.info.bind(console, tag + ": "); } let log = function (...args: unknown[]) {}; if (debugLevel >= debugLevels.indexOf("log")) { log = console.log.bind(console, tag + ": "); } let dbg = function (...args: unknown[]) {}; if (debugLevel >= debugLevels.indexOf("debug")) { dbg = console.debug.bind(console, tag + ": "); } function assert(e: boolean, msg?: string) { console.assert(e, tag + ": " + msg); } // TODO: Should this be scoped to note? let markerCounter = 0;