Carry over code block highlighting

This commit is contained in:
Elian Doran 2024-10-26 23:27:23 +03:00
parent 579b3f4ca0
commit 6078620bf1
No known key found for this signature in database

View File

@ -31,6 +31,9 @@ function assert(e, msg) {
console.assert(e, tag + ": " + msg);
}
// TODO: Should this be scoped to note?
let markerCounter = 0;
function initTextEditor(textEditor) {
log("initTextEditor");
@ -126,6 +129,193 @@ function initTextEditor(textEditor) {
}
/**
* This implements highlighting via ephemeral markers (not stored in the
* document).
*
* XXX Another option would be to use formatting markers, which would have
* the benefit of making it work for readonly notes. On the flip side,
* the formatting would be stored with the note and it would need a
* way to remove that formatting when editing back the note.
*/
function highlightCodeBlock(codeBlock, writer) {
console.log("Highlight code block.");
}
log("highlighting codeblock " + JSON.stringify(codeBlock.toJSON()));
const model = codeBlock.root.document.model;
// Can't invoke addMarker with an already existing marker name,
// clear all highlight markers first. Marker names follow the
// pattern hljs:cssClassName:uniqueId, eg hljs:hljs-comment:1
const codeBlockRange = model.createRangeIn(codeBlock);
for (const marker of model.markers.getMarkersIntersectingRange(codeBlockRange)) {
dbg("removing marker " + marker.name);
writer.removeMarker(marker.name);
}
// Don't highlight if plaintext (note this needs to remove the markers
// above first, in case this was a switch from non plaintext to
// plaintext)
if (codeBlock.getAttribute("language") == "text-plain") {
// XXX There's actually a plaintext language that could be used
// if you wanted the non-highlight formatting of
// highlight.js css applied, see
// https://github.com/highlightjs/highlight.js/issues/700
log("not highlighting plaintext codeblock");
return;
}
// highlight.js needs the full text without HTML tags, eg for the
// text
// #include <stdio.h>
// the highlighted html is
// <span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string">&lt;stdio.h&gt;</span></span>
// But CKEditor codeblocks have <br> instead of \n
// Do a two pass algorithm:
// - First pass collect the codeblock children text, change <br> to
// \n
// - invoke highlight.js on the collected text generating html
// - Second pass parse the highlighted html spans and match each
// char to the CodeBlock text. Issue addMarker CKEditor calls for
// each span
// XXX This is brittle and assumes how highlight.js generates html
// (blanks, which characters escapes, etc), a better approach
// would be to use highlight.js beta api TreeTokenizer?
// Collect all the text nodes to pass to the highlighter Text is
// direct children of the codeBlock
let text = "";
for (let i = 0; i < codeBlock.childCount; ++i) {
let child = codeBlock.getChild(i);
// We only expect text and br elements here
if (child.is("$text")) {
dbg("child text " + child.data);
text += child.data;
} else if (child.is("element") &&
(child.name == "softBreak")) {
dbg("softBreak");
text += "\n";
} else {
warn("Unkown child " + JSON.stringify(child.toJSON()));
}
}
// XXX This auto-detects the language, if we want to honor the language
// attribute we can do
// let html = hljs.highlight(text, {language: 'python'});
// If that is done, it would also be interesting to have an
// auto-detect option. See language mime types at
// https://github.com/zadam/trilium/blob/dbd312c88db2b000ec0ce18c95bc8a27c0e621a1/src/public/app/widgets/type_widgets/editable_text.js#L104
let highlightRes = hljs.highlightAuto(text);
dbg("text\n" + text);
dbg("html\n" + highlightRes.value);
let iHtml = 0;
let html = highlightRes.value;
let spanStack = [];
let iChild = -1;
let childText = "";
let child = null;
let iChildText = 0;
while (iHtml < html.length) {
// Advance the text index and fetch a new child if necessary
if (iChildText >= childText.length) {
iChild++;
if (iChild < codeBlock.childCount) {
dbg("Fetching child " + iChild);
child = codeBlock.getChild(iChild);
if (child.is("$text")) {
dbg("child text " + child.data);
childText = child.data;
iChildText = 0;
} else if (child.is("element", "softBreak")) {
dbg("softBreak");
iChildText = 0;
childText = "\n";
} else {
warn("child unknown!!!");
}
} else {
// Don't bail if beyond the last children, since there's
// still html text, it must be a closing span tag that
// needs to be dealt with below
childText = "";
}
}
// This parsing is made slightly simpler and faster by only
// expecting <span> and </span> tags in the highlighted html
if ((html[iHtml] == "<") && (html[iHtml+1] != "/")) {
// new span, note they can be nested eg C preprocessor lines
// are inside a hljs-meta span, hljs-title function names
// inside a hljs-function span, etc
let iStartQuot = html.indexOf("\"", iHtml+1);
let iEndQuot = html.indexOf("\"", iStartQuot+1);
let className = html.slice(iStartQuot+1, iEndQuot);
// XXX highlight js uses scope for Python "title function_",
// etc for now just use the first style only
// See https://highlightjs.readthedocs.io/en/latest/css-classes-reference.html#a-note-on-scopes-with-sub-scopes
let iBlank = className.indexOf(" ");
if (iBlank > 0) {
className = className.slice(0, iBlank);
}
dbg("Found span start " + className);
iHtml = html.indexOf(">", iHtml) + 1;
// push the span
let posStart = writer.createPositionAt(codeBlock, child.startOffset + iChildText);
spanStack.push({ "className" : className, "posStart": posStart});
} else if ((html[iHtml] == "<") && (html[iHtml+1] == "/")) {
// Done with this span, pop the span and mark the range
iHtml = html.indexOf(">", iHtml+1) + 1;
let stackTop = spanStack.pop();
let posStart = stackTop.posStart;
let className = stackTop.className;
let posEnd = writer.createPositionAt(codeBlock, child.startOffset + iChildText);
let range = writer.createRange(posStart, posEnd);
let markerName = "hljs:" + className + ":" + markerCounter;
// Use an incrementing number for the uniqueId, random of
// 10000000 is known to cause collisions with a few
// codeblocks of 10s of lines on real notes (each line is
// one or more marker).
// Wrap-around for good measure so all numbers are positive
// XXX Another option is to catch the exception and retry or
// go through the markers and get the largest + 1
markerCounter = (markerCounter + 1) & 0xFFFFFF;
dbg("Found span end " + className);
dbg("Adding marker " + markerName + ": " + JSON.stringify(range.toJSON()));
writer.addMarker(markerName, {"range": range, "usingOperation": false});
} else {
// Text, we should also have text in the children
assert(
((iChild < codeBlock.childCount) && (iChildText < childText.length)),
"Found text in html with no corresponding child text!!!!"
);
if (html[iHtml] == "&") {
// highlight.js only encodes
// .replace(/&/g, '&amp;')
// .replace(/</g, '&lt;')
// .replace(/>/g, '&gt;')
// .replace(/"/g, '&quot;')
// .replace(/'/g, '&#x27;');
// see https://github.com/highlightjs/highlight.js/blob/7addd66c19036eccd7c602af61f1ed84d215c77d/src/lib/utils.js#L5
let iAmpEnd = html.indexOf(";", iHtml);
dbg(html.slice(iHtml, iAmpEnd));
iHtml = iAmpEnd + 1;
} else {
// regular text
dbg(html[iHtml]);
iHtml++;
}
iChildText++;
}
}
}