diff --git a/apps/client/src/services/mime_types.ts b/apps/client/src/services/mime_types.ts index 8becf7c5e..f0f318141 100644 --- a/apps/client/src/services/mime_types.ts +++ b/apps/client/src/services/mime_types.ts @@ -38,7 +38,7 @@ let mimeToHighlightJsMapping: Record | null = null; * @param mimeType The MIME type of the code block, in the CKEditor-normalized format (e.g. `text-c-src` instead of `text/c-src`). * @returns the corresponding highlight.js tag, for example `c` for `text-c-src`. */ -function getHighlightJsNameForMime(mimeType: string) { +export function getHighlightJsNameForMime(mimeType: string) { if (!mimeToHighlightJsMapping) { const mimeTypes = getMimeTypes(); mimeToHighlightJsMapping = {}; diff --git a/apps/client/src/widgets/type_widgets/ckeditor/config.ts b/apps/client/src/widgets/type_widgets/ckeditor/config.ts index 0167d9b66..b4ba9633a 100644 --- a/apps/client/src/widgets/type_widgets/ckeditor/config.ts +++ b/apps/client/src/widgets/type_widgets/ckeditor/config.ts @@ -1,5 +1,9 @@ +import library_loader from "../../../services/library_loader.js"; import { ALLOWED_PROTOCOLS } from "../../../services/link.js"; +import { MIME_TYPE_AUTO } from "../../../services/mime_type_definitions.js"; +import { getHighlightJsNameForMime } from "../../../services/mime_types.js"; import options from "../../../services/options.js"; +import { isSyntaxHighlightEnabled } from "../../../services/syntax_highlight.js"; import utils from "../../../services/utils.js"; import emojiDefinitionsUrl from "@triliumnext/ckeditor5/emoji_definitions/en.json?external"; @@ -99,6 +103,15 @@ export function buildConfig() { emoji: { definitionsUrl: emojiDefinitionsUrl }, + syntaxHighlighting: { + async loadHighlightJs() { + await library_loader.requireLibrary(library_loader.HIGHLIGHT_JS); + return hljs; + }, + mapLanguageName: getHighlightJsNameForMime, + defaultMimeType: MIME_TYPE_AUTO, + enabled: isSyntaxHighlightEnabled + }, // This value must be kept in sync with the language defined in webpack.config.js. language: "en" }; diff --git a/apps/client/src/widgets/type_widgets/ckeditor/syntax_highlight.ts b/apps/client/src/widgets/type_widgets/ckeditor/syntax_highlight.ts deleted file mode 100644 index 2324aab23..000000000 --- a/apps/client/src/widgets/type_widgets/ckeditor/syntax_highlight.ts +++ /dev/null @@ -1,361 +0,0 @@ -/* - * This code is an adaptation of https://github.com/antoniotejada/Trilium-SyntaxHighlightWidget with additional improvements, such as: - * - * - support for selecting the language manually; - * - support for determining the language automatically, if a special language is selected ("Auto-detected"); - * - limit for highlighting. - * - * TODO: Generally this class can be done directly in the CKEditor repository. - */ - -import type { CKTextEditor } from "@triliumnext/ckeditor5"; -import library_loader from "../../../services/library_loader.js"; -import mime_types from "../../../services/mime_types.js"; -import { isSyntaxHighlightEnabled } from "../../../services/syntax_highlight.js"; - -export async function initSyntaxHighlighting(editor: CKTextEditor) { - if (!isSyntaxHighlightEnabled) { - return; - } - - await library_loader.requireLibrary(library_loader.HIGHLIGHT_JS); - initTextEditor(editor); -} - -const HIGHLIGHT_MAX_BLOCK_COUNT = 500; - -const tag = "SyntaxHighlightWidget"; -const debugLevels = ["error", "warn", "info", "log", "debug"]; -const debugLevel = debugLevels.indexOf("warn"); - -let warn = function (...args: unknown[]) {}; -if (debugLevel >= debugLevels.indexOf("warn")) { - warn = console.warn.bind(console, tag + ": "); -} - -let info = function (...args: unknown[]) {}; -if (debugLevel >= debugLevels.indexOf("info")) { - info = console.info.bind(console, tag + ": "); -} - -let log = function (...args: unknown[]) {}; -if (debugLevel >= debugLevels.indexOf("log")) { - log = console.log.bind(console, tag + ": "); -} - -let dbg = function (...args: unknown[]) {}; -if (debugLevel >= debugLevels.indexOf("debug")) { - dbg = console.debug.bind(console, tag + ": "); -} - -function assert(e: boolean, msg?: string) { - console.assert(e, tag + ": " + msg); -} - -// TODO: Should this be scoped to note? -let markerCounter = 0; - -function initTextEditor(textEditor: CKTextEditor) { - log("initTextEditor"); - - const document = textEditor.model.document; - - // Create a conversion from model to view that converts - // hljs:hljsClassName:uniqueId into a span with hljsClassName - // See the list of hljs class names at - // https://github.com/highlightjs/highlight.js/blob/6b8c831f00c4e87ecd2189ebbd0bb3bbdde66c02/docs/css-classes-reference.rst - - textEditor.conversion.for("editingDowncast").markerToHighlight({ - model: "hljs", - view: ({ markerName }) => { - dbg("markerName " + markerName); - // markerName has the pattern addMarker:cssClassName:uniqueId - const [, cssClassName, id] = markerName.split(":"); - - // The original code at - // https://github.com/ckeditor/ckeditor5/blob/master/packages/ckeditor5-find-and-replace/src/findandreplaceediting.js - // has this comment - // Marker removal from the view has a bug: - // https://github.com/ckeditor/ckeditor5/issues/7499 - // A minimal option is to return a new object for each converted marker... - return { - name: "span", - classes: [cssClassName], - attributes: { - // ...however, adding a unique attribute should be future-proof.. - "data-syntax-result": id - } - }; - } - }); - - // XXX This is done at BalloonEditor.create time, so it assumes this - // document is always attached to this textEditor, empirically that - // seems to be the case even with two splits showing the same note, - // it's not clear if CKEditor5 has apis to attach and detach - // documents around - document.registerPostFixer(function (writer) { - log("postFixer"); - // Postfixers are a simpler way of tracking changes than onchange - // See - // https://github.com/ckeditor/ckeditor5/blob/b53d2a4b49679b072f4ae781ac094e7e831cfb14/packages/ckeditor5-block-quote/src/blockquoteediting.js#L54 - const changes = document.differ.getChanges(); - let dirtyCodeBlocks = new Set(); - - function lookForCodeBlocks(node: CKNode) { - for (const child of node._children) { - if (child.is("element", "paragraph")) { - continue; - } - - if (child.is("element", "codeBlock")) { - dirtyCodeBlocks.add(child); - } else if (child.childCount > 0) { - lookForCodeBlocks(child); - } - } - } - - for (const change of changes) { - dbg("change " + JSON.stringify(change)); - - if (change.name !== "paragraph" && change.name !== "codeBlock" && change?.position?.nodeAfter && change.position.nodeAfter.childCount > 0) { - /* - * We need to look for code blocks recursively, as they can be placed within a
due to - * general HTML support or normally underneath other elements such as tables, blockquotes, etc. - */ - lookForCodeBlocks(change.position.nodeAfter); - } else if (change.type == "insert" && change.name == "codeBlock") { - // A new code block was inserted - const codeBlock = change.position?.nodeAfter; - // Even if it's a new codeblock, it needs dirtying in case - // it already has children, like when pasting one or more - // full codeblocks, undoing a delete, changing the language, - // etc (the postfixer won't get later changes for those). - if (codeBlock) { - log("dirtying inserted codeBlock " + JSON.stringify(codeBlock.toJSON())); - dirtyCodeBlocks.add(codeBlock); - } - } else if (change.type == "remove" && change.name == "codeBlock" && change.position) { - // An existing codeblock was removed, do nothing. Note the - // node is no longer in the editor so the codeblock cannot - // be inspected here. No need to dirty the codeblock since - // it has been removed - log("removing codeBlock at path " + JSON.stringify(change.position.toJSON())); - } else if ((change.type == "remove" || change.type == "insert") && change?.position?.parent.is("element", "codeBlock")) { - // Text was added or removed from the codeblock, force a - // highlight - const codeBlock = change.position.parent; - log("dirtying codeBlock " + JSON.stringify(codeBlock.toJSON())); - dirtyCodeBlocks.add(codeBlock); - } - } - for (let codeBlock of dirtyCodeBlocks) { - highlightCodeBlock(codeBlock, writer); - } - // Adding markers doesn't modify the document data so no need for - // postfixers to run again - return false; - }); - - // This assumes the document is empty and a explicit call to highlight - // is not necessary here. Empty documents have a single children of type - // paragraph with no text - assert(document.getRoot().childCount == 1 && document.getRoot().getChild(0).name == "paragraph" && document.getRoot().getChild(0).isEmpty); -} - -/** - * This implements highlighting via ephemeral markers (not stored in the - * document). - * - * XXX Another option would be to use formatting markers, which would have - * the benefit of making it work for readonly notes. On the flip side, - * the formatting would be stored with the note and it would need a - * way to remove that formatting when editing back the note. - */ -function highlightCodeBlock(codeBlock: CKNode, writer: Writer) { - log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON())); - const model = codeBlock.root.document.model; - - // Can't invoke addMarker with an already existing marker name, - // clear all highlight markers first. Marker names follow the - // pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1 - const codeBlockRange = model.createRangeIn(codeBlock); - for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) { - dbg("removing marker " + marker.name); - writer.removeMarker(marker.name); - } - - // Don't highlight if plaintext (note this needs to remove the markers - // above first, in case this was a switch from non plaintext to - // plaintext) - const mimeType = codeBlock.getAttribute("language"); - if (mimeType == "text-plain") { - // XXX There's actually a plaintext language that could be used - // if you wanted the non-highlight formatting of - // highlight.js css applied, see - // https://github.com/highlightjs/highlight.js/issues/700 - log("not highlighting plaintext codeblock"); - return; - } - - // Find the corresponding language for the given mimetype. - const highlightJsLanguage = mime_types.getHighlightJsNameForMime(mimeType); - - if (mimeType !== mime_types.MIME_TYPE_AUTO && !highlightJsLanguage) { - console.warn(`Unsupported highlight.js for mime type ${mimeType}.`); - return; - } - - // Don't highlight if the code is too big, as the typing performance will be highly degraded. - if (codeBlock.childCount >= HIGHLIGHT_MAX_BLOCK_COUNT) { - return; - } - - // highlight.js needs the full text without HTML tags, eg for the - // text - // #include - // the highlighted html is - // #include <stdio.h> - // But CKEditor codeblocks have
instead of \n - - // Do a two pass algorithm: - // - First pass collect the codeblock children text, change
to - // \n - // - invoke highlight.js on the collected text generating html - // - Second pass parse the highlighted html spans and match each - // char to the CodeBlock text. Issue addMarker CKEditor calls for - // each span - - // XXX This is brittle and assumes how highlight.js generates html - // (blanks, which characters escapes, etc), a better approach - // would be to use highlight.js beta api TreeTokenizer? - - // Collect all the text nodes to pass to the highlighter Text is - // direct children of the codeBlock - let text = ""; - for (let i = 0; i < codeBlock.childCount; ++i) { - let child = codeBlock.getChild(i); - - // We only expect text and br elements here - if (child.is("$text")) { - dbg("child text " + child.data); - text += child.data; - } else if (child.is("element") && child.name == "softBreak") { - dbg("softBreak"); - text += "\n"; - } else { - warn("Unkown child " + JSON.stringify(child.toJSON())); - } - } - - let highlightRes; - if (mimeType === mime_types.MIME_TYPE_AUTO) { - highlightRes = hljs.highlightAuto(text); - } else { - highlightRes = hljs.highlight(text, { language: highlightJsLanguage }); - } - dbg("text\n" + text); - dbg("html\n" + highlightRes.value); - - let iHtml = 0; - let html = highlightRes.value; - let spanStack = []; - let iChild = -1; - let childText = ""; - let child = null; - let iChildText = 0; - - while (iHtml < html.length) { - // Advance the text index and fetch a new child if necessary - if (iChildText >= childText.length) { - iChild++; - if (iChild < codeBlock.childCount) { - dbg("Fetching child " + iChild); - child = codeBlock.getChild(iChild); - if (child.is("$text")) { - dbg("child text " + child.data); - childText = child.data; - iChildText = 0; - } else if (child.is("element", "softBreak")) { - dbg("softBreak"); - iChildText = 0; - childText = "\n"; - } else { - warn("child unknown!!!"); - } - } else { - // Don't bail if beyond the last children, since there's - // still html text, it must be a closing span tag that - // needs to be dealt with below - childText = ""; - } - } - - // This parsing is made slightly simpler and faster by only - // expecting and tags in the highlighted html - if (html[iHtml] == "<" && html[iHtml + 1] != "/") { - // new span, note they can be nested eg C preprocessor lines - // are inside a hljs-meta span, hljs-title function names - // inside a hljs-function span, etc - let iStartQuot = html.indexOf('"', iHtml + 1); - let iEndQuot = html.indexOf('"', iStartQuot + 1); - let className = html.slice(iStartQuot + 1, iEndQuot); - // XXX highlight js uses scope for Python "title function_", - // etc for now just use the first style only - // See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes - let iBlank = className.indexOf(" "); - if (iBlank > 0) { - className = className.slice(0, iBlank); - } - dbg("Found span start " + className); - - iHtml = html.indexOf(">", iHtml) + 1; - - // push the span - let posStart = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); - spanStack.push({ className: className, posStart: posStart }); - } else if (html[iHtml] == "<" && html[iHtml + 1] == "/") { - // Done with this span, pop the span and mark the range - iHtml = html.indexOf(">", iHtml + 1) + 1; - - let stackTop = spanStack.pop(); - let posStart = stackTop?.posStart; - let className = stackTop?.className; - let posEnd = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); - let range = writer.createRange(posStart, posEnd); - let markerName = "hljs:" + className + ":" + markerCounter; - // Use an incrementing number for the uniqueId, random of - // 10000000 is known to cause collisions with a few - // codeblocks of 10s of lines on real notes (each line is - // one or more marker). - // Wrap-around for good measure so all numbers are positive - // XXX Another option is to catch the exception and retry or - // go through the markers and get the largest + 1 - markerCounter = (markerCounter + 1) & 0xffffff; - dbg("Found span end " + className); - dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON())); - writer.addMarker(markerName, { range: range, usingOperation: false }); - } else { - // Text, we should also have text in the children - assert(iChild < codeBlock.childCount && iChildText < childText.length, "Found text in html with no corresponding child text!!!!"); - if (html[iHtml] == "&") { - // highlight.js only encodes - // .replace(/&/g, '&') - // .replace(//g, '>') - // .replace(/"/g, '"') - // .replace(/'/g, '''); - // see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5 - let iAmpEnd = html.indexOf(";", iHtml); - dbg(html.slice(iHtml, iAmpEnd)); - iHtml = iAmpEnd + 1; - } else { - // regular text - dbg(html[iHtml]); - iHtml++; - } - iChildText++; - } - } -} diff --git a/apps/client/src/widgets/type_widgets/editable_text.ts b/apps/client/src/widgets/type_widgets/editable_text.ts index 5415bfd5f..d586c771b 100644 --- a/apps/client/src/widgets/type_widgets/editable_text.ts +++ b/apps/client/src/widgets/type_widgets/editable_text.ts @@ -10,7 +10,6 @@ import AbstractTextTypeWidget from "./abstract_text_type_widget.js"; import link from "../../services/link.js"; import appContext, { type CommandListenerData, type EventData } from "../../components/app_context.js"; import dialogService from "../../services/dialog.js"; -import { initSyntaxHighlighting } from "./ckeditor/syntax_highlight.js"; import options from "../../services/options.js"; import toast from "../../services/toast.js"; import { normalizeMimeTypeForCKEditor } from "../../services/mime_type_definitions.js"; @@ -239,9 +238,6 @@ export default class EditableTextTypeWidget extends AbstractTextTypeWidget { evt.stop(); }); - //@ts-ignore - await initSyntaxHighlighting(editor); - if (isClassicEditor) { let $classicToolbarWidget; if (!utils.isMobile()) { diff --git a/packages/ckeditor5/src/plugins.ts b/packages/ckeditor5/src/plugins.ts index a54caa813..2c67b71c1 100644 --- a/packages/ckeditor5/src/plugins.ts +++ b/packages/ckeditor5/src/plugins.ts @@ -12,6 +12,7 @@ import MarkdownImportPlugin from "./plugins/markdownimport.js"; import MentionCustomization from "./plugins/mention_customization.js"; import IncludeNote from "./plugins/includenote.js"; import Uploadfileplugin from "./plugins/file_upload/uploadfileplugin.js"; +import SyntaxHighlighting from "./plugins/syntax_highlighting/index.js"; import { Kbd } from "@triliumnext/ckeditor5-keyboard-marker"; import { Mermaid } from "@triliumnext/ckeditor5-mermaid"; import { Admonition } from "@triliumnext/ckeditor5-admonition"; @@ -35,7 +36,8 @@ const TRILIUM_PLUGINS: typeof Plugin[] = [ MarkdownImportPlugin, MentionCustomization, IncludeNote, - Uploadfileplugin + Uploadfileplugin, + SyntaxHighlighting ]; const EXTERNAL_PLUGINS: typeof Plugin[] = [ diff --git a/packages/ckeditor5/src/plugins/syntax_highlighting/augmentation.ts b/packages/ckeditor5/src/plugins/syntax_highlighting/augmentation.ts new file mode 100644 index 000000000..9505c8816 --- /dev/null +++ b/packages/ckeditor5/src/plugins/syntax_highlighting/augmentation.ts @@ -0,0 +1,19 @@ +interface EditorConfig { + loadHighlightJs(): Promise; + mapLanguageName(mimeType: string): string; + defaultMimeType: string; + enabled: boolean; +} + +// TODO: Replace once library loader is replaced with webpack. +interface HighlightJs { + highlightAuto(text: string): HighlightJsResult; + highlight(text: string, opts: { + language: string + }): HighlightJsResult; +}; + +interface HighlightJsResult { + +} + diff --git a/packages/ckeditor5/src/plugins/syntax_highlighting/index.ts b/packages/ckeditor5/src/plugins/syntax_highlighting/index.ts new file mode 100644 index 000000000..35e2bfc7e --- /dev/null +++ b/packages/ckeditor5/src/plugins/syntax_highlighting/index.ts @@ -0,0 +1,368 @@ +import type { Element } from "ckeditor5"; +import type { Node, Editor } from "ckeditor5"; +import { Plugin } from "ckeditor5"; + +/* + * This code is an adaptation of https://github.com/antoniotejada/Trilium-SyntaxHighlightWidget with additional improvements, such as: + * + * - support for selecting the language manually; + * - support for determining the language automatically, if a special language is selected ("Auto-detected"); + * - limit for highlighting. + */ + +export default class SyntaxHighlighting extends Plugin { + + private config!: EditorConfig; + private hljs!: HighlightJs; + + async init() { + const config = this.editor.config.get("syntaxHighlighting") as EditorConfig | null; + if (!config || !config.enabled) { + return; + } + + this.config = config; + this.hljs = await config.loadHighlightJs(); + + this.initTextEditor(this.editor); + } + + initTextEditor(textEditor: Editor) { + log("initTextEditor"); + + const document = textEditor.model.document; + + // Create a conversion from model to view that converts + // hljs:hljsClassName:uniqueId into a span with hljsClassName + // See the list of hljs class names at + // https://github.com/highlightjs/highlight.js/blob/6b8c831f00c4e87ecd2189ebbd0bb3bbdde66c02/docs/css-classes-reference.rst + + textEditor.conversion.for("editingDowncast").markerToHighlight({ + model: "hljs", + view: ({ markerName }) => { + dbg("markerName " + markerName); + // markerName has the pattern addMarker:cssClassName:uniqueId + const [, cssClassName, id] = markerName.split(":"); + + // The original code at + // https://github.com/ckeditor/ckeditor5/blob/master/packages/ckeditor5-find-and-replace/src/findandreplaceediting.js + // has this comment + // Marker removal from the view has a bug: + // https://github.com/ckeditor/ckeditor5/issues/7499 + // A minimal option is to return a new object for each converted marker... + return { + name: "span", + classes: [cssClassName], + attributes: { + // ...however, adding a unique attribute should be future-proof.. + "data-syntax-result": id + } + }; + } + }); + + // XXX This is done at BalloonEditor.create time, so it assumes this + // document is always attached to this textEditor, empirically that + // seems to be the case even with two splits showing the same note, + // it's not clear if CKEditor5 has apis to attach and detach + // documents around + document.registerPostFixer((writer) => { + log("postFixer"); + // Postfixers are a simpler way of tracking changes than onchange + // See + // https://github.com/ckeditor/ckeditor5/blob/b53d2a4b49679b072f4ae781ac094e7e831cfb14/packages/ckeditor5-block-quote/src/blockquoteediting.js#L54 + const changes = document.differ.getChanges(); + let dirtyCodeBlocks = new Set(); + + function lookForCodeBlocks(node: Element | Node) { + if (!("getChildren" in node)) { + return; + } + + for (const child of node.getChildren()) { + if (child.is("element", "paragraph")) { + continue; + } + + if (child.is("element", "codeBlock")) { + dirtyCodeBlocks.add(child); + } else if (child.childCount > 0) { + lookForCodeBlocks(child); + } + } + } + + for (const change of changes) { + dbg("change " + JSON.stringify(change)); + + if (change.name !== "paragraph" && change.name !== "codeBlock" && change?.position?.nodeAfter && change.position.nodeAfter.childCount > 0) { + /* + * We need to look for code blocks recursively, as they can be placed within a
due to + * general HTML support or normally underneath other elements such as tables, blockquotes, etc. + */ + lookForCodeBlocks(change.position.nodeAfter); + } else if (change.type == "insert" && change.name == "codeBlock") { + // A new code block was inserted + const codeBlock = change.position?.nodeAfter; + // Even if it's a new codeblock, it needs dirtying in case + // it already has children, like when pasting one or more + // full codeblocks, undoing a delete, changing the language, + // etc (the postfixer won't get later changes for those). + if (codeBlock) { + log("dirtying inserted codeBlock " + JSON.stringify(codeBlock.toJSON())); + dirtyCodeBlocks.add(codeBlock); + } + } else if (change.type == "remove" && change.name == "codeBlock" && change.position) { + // An existing codeblock was removed, do nothing. Note the + // node is no longer in the editor so the codeblock cannot + // be inspected here. No need to dirty the codeblock since + // it has been removed + log("removing codeBlock at path " + JSON.stringify(change.position.toJSON())); + } else if ((change.type == "remove" || change.type == "insert") && change?.position?.parent.is("element", "codeBlock")) { + // Text was added or removed from the codeblock, force a + // highlight + const codeBlock = change.position.parent; + log("dirtying codeBlock " + JSON.stringify(codeBlock.toJSON())); + dirtyCodeBlocks.add(codeBlock); + } + } + for (let codeBlock of dirtyCodeBlocks) { + this.highlightCodeBlock(codeBlock, writer); + } + // Adding markers doesn't modify the document data so no need for + // postfixers to run again + return false; + }); + } + + /** + * This implements highlighting via ephemeral markers (not stored in the + * document). + * + * XXX Another option would be to use formatting markers, which would have + * the benefit of making it work for readonly notes. On the flip side, + * the formatting would be stored with the note and it would need a + * way to remove that formatting when editing back the note. + */ + highlightCodeBlock(codeBlock: Node, writer: Writer) { + log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON())); + const model = codeBlock.root.document.model; + + // Can't invoke addMarker with an already existing marker name, + // clear all highlight markers first. Marker names follow the + // pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1 + const codeBlockRange = model.createRangeIn(codeBlock); + for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) { + dbg("removing marker " + marker.name); + writer.removeMarker(marker.name); + } + + // Don't highlight if plaintext (note this needs to remove the markers + // above first, in case this was a switch from non plaintext to + // plaintext) + const mimeType = codeBlock.getAttribute("language"); + if (mimeType == "text-plain") { + // XXX There's actually a plaintext language that could be used + // if you wanted the non-highlight formatting of + // highlight.js css applied, see + // https://github.com/highlightjs/highlight.js/issues/700 + log("not highlighting plaintext codeblock"); + return; + } + + // Find the corresponding language for the given mimetype. + const highlightJsLanguage = this.config.mapLanguageName(mimeType); + + if (mimeType !== this.config.defaultMimeType && !highlightJsLanguage) { + console.warn(`Unsupported highlight.js for mime type ${mimeType}.`); + return; + } + + // Don't highlight if the code is too big, as the typing performance will be highly degraded. + if (codeBlock.childCount >= HIGHLIGHT_MAX_BLOCK_COUNT) { + return; + } + + // highlight.js needs the full text without HTML tags, eg for the + // text + // #include + // the highlighted html is + // #include <stdio.h> + // But CKEditor codeblocks have
instead of \n + + // Do a two pass algorithm: + // - First pass collect the codeblock children text, change
to + // \n + // - invoke highlight.js on the collected text generating html + // - Second pass parse the highlighted html spans and match each + // char to the CodeBlock text. Issue addMarker CKEditor calls for + // each span + + // XXX This is brittle and assumes how highlight.js generates html + // (blanks, which characters escapes, etc), a better approach + // would be to use highlight.js beta api TreeTokenizer? + + // Collect all the text nodes to pass to the highlighter Text is + // direct children of the codeBlock + let text = ""; + for (let i = 0; i < codeBlock.childCount; ++i) { + let child = codeBlock.getChild(i); + + // We only expect text and br elements here + if (child.is("$text")) { + dbg("child text " + child.data); + text += child.data; + } else if (child.is("element") && child.name == "softBreak") { + dbg("softBreak"); + text += "\n"; + } else { + warn("Unkown child " + JSON.stringify(child.toJSON())); + } + } + + let highlightRes; + if (mimeType === this.config.defaultMimeType) { + highlightRes = this.hljs.highlightAuto(text); + } else { + highlightRes = this.hljs.highlight(text, { language: highlightJsLanguage }); + } + dbg("text\n" + text); + dbg("html\n" + highlightRes.value); + + let iHtml = 0; + let html = highlightRes.value; + let spanStack = []; + let iChild = -1; + let childText = ""; + let child = null; + let iChildText = 0; + + while (iHtml < html.length) { + // Advance the text index and fetch a new child if necessary + if (iChildText >= childText.length) { + iChild++; + if (iChild < codeBlock.childCount) { + dbg("Fetching child " + iChild); + child = codeBlock.getChild(iChild); + if (child.is("$text")) { + dbg("child text " + child.data); + childText = child.data; + iChildText = 0; + } else if (child.is("element", "softBreak")) { + dbg("softBreak"); + iChildText = 0; + childText = "\n"; + } else { + warn("child unknown!!!"); + } + } else { + // Don't bail if beyond the last children, since there's + // still html text, it must be a closing span tag that + // needs to be dealt with below + childText = ""; + } + } + + // This parsing is made slightly simpler and faster by only + // expecting and tags in the highlighted html + if (html[iHtml] == "<" && html[iHtml + 1] != "/") { + // new span, note they can be nested eg C preprocessor lines + // are inside a hljs-meta span, hljs-title function names + // inside a hljs-function span, etc + let iStartQuot = html.indexOf('"', iHtml + 1); + let iEndQuot = html.indexOf('"', iStartQuot + 1); + let className = html.slice(iStartQuot + 1, iEndQuot); + // XXX highlight js uses scope for Python "title function_", + // etc for now just use the first style only + // See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes + let iBlank = className.indexOf(" "); + if (iBlank > 0) { + className = className.slice(0, iBlank); + } + dbg("Found span start " + className); + + iHtml = html.indexOf(">", iHtml) + 1; + + // push the span + let posStart = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); + spanStack.push({ className: className, posStart: posStart }); + } else if (html[iHtml] == "<" && html[iHtml + 1] == "/") { + // Done with this span, pop the span and mark the range + iHtml = html.indexOf(">", iHtml + 1) + 1; + + let stackTop = spanStack.pop(); + let posStart = stackTop?.posStart; + let className = stackTop?.className; + let posEnd = writer.createPositionAt(codeBlock, (child?.startOffset ?? 0) + iChildText); + let range = writer.createRange(posStart, posEnd); + let markerName = "hljs:" + className + ":" + markerCounter; + // Use an incrementing number for the uniqueId, random of + // 10000000 is known to cause collisions with a few + // codeblocks of 10s of lines on real notes (each line is + // one or more marker). + // Wrap-around for good measure so all numbers are positive + // XXX Another option is to catch the exception and retry or + // go through the markers and get the largest + 1 + markerCounter = (markerCounter + 1) & 0xffffff; + dbg("Found span end " + className); + dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON())); + writer.addMarker(markerName, { range: range, usingOperation: false }); + } else { + // Text, we should also have text in the children + assert(iChild < codeBlock.childCount && iChildText < childText.length, "Found text in html with no corresponding child text!!!!"); + if (html[iHtml] == "&") { + // highlight.js only encodes + // .replace(/&/g, '&') + // .replace(//g, '>') + // .replace(/"/g, '"') + // .replace(/'/g, '''); + // see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5 + let iAmpEnd = html.indexOf(";", iHtml); + dbg(html.slice(iHtml, iAmpEnd)); + iHtml = iAmpEnd + 1; + } else { + // regular text + dbg(html[iHtml]); + iHtml++; + } + iChildText++; + } + } + } + +} + + +const HIGHLIGHT_MAX_BLOCK_COUNT = 500; + +const tag = "SyntaxHighlightWidget"; +const debugLevels = ["error", "warn", "info", "log", "debug"]; +const debugLevel = debugLevels.indexOf("warn"); + +let warn = function (...args: unknown[]) {}; +if (debugLevel >= debugLevels.indexOf("warn")) { + warn = console.warn.bind(console, tag + ": "); +} + +let info = function (...args: unknown[]) {}; +if (debugLevel >= debugLevels.indexOf("info")) { + info = console.info.bind(console, tag + ": "); +} + +let log = function (...args: unknown[]) {}; +if (debugLevel >= debugLevels.indexOf("log")) { + log = console.log.bind(console, tag + ": "); +} + +let dbg = function (...args: unknown[]) {}; +if (debugLevel >= debugLevels.indexOf("debug")) { + dbg = console.debug.bind(console, tag + ": "); +} + +function assert(e: boolean, msg?: string) { + console.assert(e, tag + ": " + msg); +} + +// TODO: Should this be scoped to note? +let markerCounter = 0;