diff --git a/src/public/app/widgets/type_widgets/ckeditor/syntax_highlight.js b/src/public/app/widgets/type_widgets/ckeditor/syntax_highlight.js index abf0ca1a5..396f2bea0 100644 --- a/src/public/app/widgets/type_widgets/ckeditor/syntax_highlight.js +++ b/src/public/app/widgets/type_widgets/ckeditor/syntax_highlight.js @@ -31,6 +31,9 @@ function assert(e, msg) { console.assert(e, tag + ": " + msg); } +// TODO: Should this be scoped to note? +let markerCounter = 0; + function initTextEditor(textEditor) { log("initTextEditor"); @@ -126,6 +129,193 @@ function initTextEditor(textEditor) { } +/** + * This implements highlighting via ephemeral markers (not stored in the + * document). + * + * XXX Another option would be to use formatting markers, which would have + * the benefit of making it work for readonly notes. On the flip side, + * the formatting would be stored with the note and it would need a + * way to remove that formatting when editing back the note. + */ function highlightCodeBlock(codeBlock, writer) { - console.log("Highlight code block."); -} \ No newline at end of file + log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON())); + const model = codeBlock.root.document.model; + + // Can't invoke addMarker with an already existing marker name, + // clear all highlight markers first. Marker names follow the + // pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1 + const codeBlockRange = model.createRangeIn(codeBlock); + for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) { + dbg("removing marker " + marker.name); + writer.removeMarker(marker.name); + } + + // Don't highlight if plaintext (note this needs to remove the markers + // above first, in case this was a switch from non plaintext to + // plaintext) + if (codeBlock.getAttribute("language") == "text-plain") { + // XXX There's actually a plaintext language that could be used + // if you wanted the non-highlight formatting of + // highlight.js css applied, see + // https://github.com/highlightjs/highlight.js/issues/700 + log("not highlighting plaintext codeblock"); + return; + } + + // highlight.js needs the full text without HTML tags, eg for the + // text + // #include + // the highlighted html is + // #include <stdio.h> + // But CKEditor codeblocks have
instead of \n + + // Do a two pass algorithm: + // - First pass collect the codeblock children text, change
to + // \n + // - invoke highlight.js on the collected text generating html + // - Second pass parse the highlighted html spans and match each + // char to the CodeBlock text. Issue addMarker CKEditor calls for + // each span + + // XXX This is brittle and assumes how highlight.js generates html + // (blanks, which characters escapes, etc), a better approach + // would be to use highlight.js beta api TreeTokenizer? + + // Collect all the text nodes to pass to the highlighter Text is + // direct children of the codeBlock + let text = ""; + for (let i = 0; i < codeBlock.childCount; ++i) { + let child = codeBlock.getChild(i); + + // We only expect text and br elements here + if (child.is("$text")) { + dbg("child text " + child.data); + text += child.data; + + } else if (child.is("element") && + (child.name == "softBreak")) { + dbg("softBreak"); + text += "\n"; + + } else { + warn("Unkown child " + JSON.stringify(child.toJSON())); + } + } + + // XXX This auto-detects the language, if we want to honor the language + // attribute we can do + // let html = hljs.highlight(text, {language: 'python'}); + // If that is done, it would also be interesting to have an + // auto-detect option. See language mime types at + // https://github.com/zadam/trilium/blob/dbd312c88db2b000ec0ce18c95bc8a27c0e621a1/src/public/app/widgets/type_widgets/editable_text.js#L104 + let highlightRes = hljs.highlightAuto(text); + dbg("text\n" + text); + dbg("html\n" + highlightRes.value); + + let iHtml = 0; + let html = highlightRes.value; + let spanStack = []; + let iChild = -1; + let childText = ""; + let child = null; + let iChildText = 0; + + while (iHtml < html.length) { + // Advance the text index and fetch a new child if necessary + if (iChildText >= childText.length) { + iChild++; + if (iChild < codeBlock.childCount) { + dbg("Fetching child " + iChild); + child = codeBlock.getChild(iChild); + if (child.is("$text")) { + dbg("child text " + child.data); + childText = child.data; + iChildText = 0; + } else if (child.is("element", "softBreak")) { + dbg("softBreak"); + iChildText = 0; + childText = "\n"; + } else { + warn("child unknown!!!"); + } + } else { + // Don't bail if beyond the last children, since there's + // still html text, it must be a closing span tag that + // needs to be dealt with below + childText = ""; + } + } + + // This parsing is made slightly simpler and faster by only + // expecting and tags in the highlighted html + if ((html[iHtml] == "<") && (html[iHtml+1] != "/")) { + // new span, note they can be nested eg C preprocessor lines + // are inside a hljs-meta span, hljs-title function names + // inside a hljs-function span, etc + let iStartQuot = html.indexOf("\"", iHtml+1); + let iEndQuot = html.indexOf("\"", iStartQuot+1); + let className = html.slice(iStartQuot+1, iEndQuot); + // XXX highlight js uses scope for Python "title function_", + // etc for now just use the first style only + // See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes + let iBlank = className.indexOf(" "); + if (iBlank > 0) { + className = className.slice(0, iBlank); + } + dbg("Found span start " + className); + + iHtml = html.indexOf(">", iHtml) + 1; + + // push the span + let posStart = writer.createPositionAt(codeBlock, child.startOffset + iChildText); + spanStack.push({ "className" : className, "posStart": posStart}); + + } else if ((html[iHtml] == "<") && (html[iHtml+1] == "/")) { + // Done with this span, pop the span and mark the range + iHtml = html.indexOf(">", iHtml+1) + 1; + + let stackTop = spanStack.pop(); + let posStart = stackTop.posStart; + let className = stackTop.className; + let posEnd = writer.createPositionAt(codeBlock, child.startOffset + iChildText); + let range = writer.createRange(posStart, posEnd); + let markerName = "hljs:" + className + ":" + markerCounter; + // Use an incrementing number for the uniqueId, random of + // 10000000 is known to cause collisions with a few + // codeblocks of 10s of lines on real notes (each line is + // one or more marker). + // Wrap-around for good measure so all numbers are positive + // XXX Another option is to catch the exception and retry or + // go through the markers and get the largest + 1 + markerCounter = (markerCounter + 1) & 0xFFFFFF; + dbg("Found span end " + className); + dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON())); + writer.addMarker(markerName, {"range": range, "usingOperation": false}); + + } else { + // Text, we should also have text in the children + assert( + ((iChild < codeBlock.childCount) && (iChildText < childText.length)), + "Found text in html with no corresponding child text!!!!" + ); + if (html[iHtml] == "&") { + // highlight.js only encodes + // .replace(/&/g, '&') + // .replace(//g, '>') + // .replace(/"/g, '"') + // .replace(/'/g, '''); + // see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5 + let iAmpEnd = html.indexOf(";", iHtml); + dbg(html.slice(iHtml, iAmpEnd)); + iHtml = iAmpEnd + 1; + } else { + // regular text + dbg(html[iHtml]); + iHtml++; + } + iChildText++; + } + } +}