mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-07-29 11:02:28 +08:00
Readability.js update
This commit is contained in:
parent
338f01be01
commit
d3539ec9d7
2
.idea/vcs.xml
generated
2
.idea/vcs.xml
generated
@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
@ -1316,6 +1316,101 @@ Readability.prototype = {
|
||||
return metadata;
|
||||
},
|
||||
|
||||
/**
|
||||
* Check if node is image, or if node contains exactly only one image
|
||||
* whether as a direct child or as its descendants.
|
||||
*
|
||||
* @param Element
|
||||
**/
|
||||
_isSingleImage: function(node) {
|
||||
if (node.tagName === "IMG") {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (node.children.length !== 1 || node.textContent.trim() !== "") {
|
||||
return false;
|
||||
}
|
||||
|
||||
return this._isSingleImage(node.children[0]);
|
||||
},
|
||||
|
||||
/**
|
||||
* Find all <noscript> that are located after <img> nodes, and which contain only one
|
||||
* <img> element. Replace the first image with the image from inside the <noscript> tag,
|
||||
* and remove the <noscript> tag. This improves the quality of the images we use on
|
||||
* some sites (e.g. Medium).
|
||||
*
|
||||
* @param Element
|
||||
**/
|
||||
_unwrapNoscriptImages: function(doc) {
|
||||
// Find img without source or attributes that might contains image, and remove it.
|
||||
// This is done to prevent a placeholder img is replaced by img from noscript in next step.
|
||||
var imgs = Array.from(doc.getElementsByTagName("img"));
|
||||
this._forEachNode(imgs, function(img) {
|
||||
for (var i = 0; i < img.attributes.length; i++) {
|
||||
var attr = img.attributes[i];
|
||||
switch (attr.name) {
|
||||
case "src":
|
||||
case "srcset":
|
||||
case "data-src":
|
||||
case "data-srcset":
|
||||
return;
|
||||
}
|
||||
|
||||
if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
img.parentNode.removeChild(img);
|
||||
});
|
||||
|
||||
// Next find noscript and try to extract its image
|
||||
var noscripts = Array.from(doc.getElementsByTagName("noscript"));
|
||||
this._forEachNode(noscripts, function(noscript) {
|
||||
// Parse content of noscript and make sure it only contains image
|
||||
var tmp = doc.createElement("div");
|
||||
tmp.innerHTML = noscript.innerHTML;
|
||||
if (!this._isSingleImage(tmp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If noscript has previous sibling and it only contains image,
|
||||
// replace it with noscript content. However we also keep old
|
||||
// attributes that might contains image.
|
||||
var prevElement = noscript.previousElementSibling;
|
||||
if (prevElement && this._isSingleImage(prevElement)) {
|
||||
var prevImg = prevElement;
|
||||
if (prevImg.tagName !== "IMG") {
|
||||
prevImg = prevElement.getElementsByTagName("img")[0];
|
||||
}
|
||||
|
||||
var newImg = tmp.getElementsByTagName("img")[0];
|
||||
for (var i = 0; i < prevImg.attributes.length; i++) {
|
||||
var attr = prevImg.attributes[i];
|
||||
if (attr.value === "") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) {
|
||||
if (newImg.getAttribute(attr.name) === attr.value) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var attrName = attr.name;
|
||||
if (newImg.hasAttribute(attrName)) {
|
||||
attrName = "data-old-" + attrName;
|
||||
}
|
||||
|
||||
newImg.setAttribute(attrName, attr.value);
|
||||
}
|
||||
}
|
||||
|
||||
noscript.parentNode.replaceChild(tmp.firstElementChild, prevElement);
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* Removes script tags from the document.
|
||||
*
|
||||
@ -1828,6 +1923,9 @@ Readability.prototype = {
|
||||
}
|
||||
}
|
||||
|
||||
// Unwrap image from noscript
|
||||
this._unwrapNoscriptImages(this._doc);
|
||||
|
||||
// Remove script tags from the document.
|
||||
this._removeScripts(this._doc);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user