mirror of
				https://github.com/TriliumNext/Notes.git
				synced 2025-10-26 09:31:34 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			1191 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			1191 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| // https://github.com/mozilla/readability/tree/814f0a3884350b6f1adfdebb79ca3599e9806605
 | |
| 
 | |
| /*eslint-env es6:false*/
 | |
| /* This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 | |
|  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| /**
 | |
|  * This is a relatively lightweight DOMParser that is safe to use in a web
 | |
|  * worker. This is far from a complete DOM implementation; however, it should
 | |
|  * contain the minimal set of functionality necessary for Readability.js.
 | |
|  *
 | |
|  * Aside from not implementing the full DOM API, there are other quirks to be
 | |
|  * aware of when using the JSDOMParser:
 | |
|  *
 | |
|  *   1) Properly formed HTML/XML must be used. This means you should be extra
 | |
|  *      careful when using this parser on anything received directly from an
 | |
|  *      XMLHttpRequest. Providing a serialized string from an XMLSerializer,
 | |
|  *      however, should be safe (since the browser's XMLSerializer should
 | |
|  *      generate valid HTML/XML). Therefore, if parsing a document from an XHR,
 | |
|  *      the recommended approach is to do the XHR in the main thread, use
 | |
|  *      XMLSerializer.serializeToString() on the responseXML, and pass the
 | |
|  *      resulting string to the worker.
 | |
|  *
 | |
|  *   2) Live NodeLists are not supported. DOM methods and properties such as
 | |
|  *      getElementsByTagName() and childNodes return standard arrays. If you
 | |
|  *      want these lists to be updated when nodes are removed or added to the
 | |
|  *      document, you must take care to manually update them yourself.
 | |
|  */
 | |
| (function (global) {
 | |
| 
 | |
|   // XML only defines these and the numeric ones:
 | |
| 
 | |
|   var entityTable = {
 | |
|     "lt": "<",
 | |
|     "gt": ">",
 | |
|     "amp": "&",
 | |
|     "quot": '"',
 | |
|     "apos": "'",
 | |
|   };
 | |
| 
 | |
|   var reverseEntityTable = {
 | |
|     "<": "<",
 | |
|     ">": ">",
 | |
|     "&": "&",
 | |
|     '"': """,
 | |
|     "'": "'",
 | |
|   };
 | |
| 
 | |
|   function encodeTextContentHTML(s) {
 | |
|     return s.replace(/[&<>]/g, function(x) {
 | |
|       return reverseEntityTable[x];
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   function encodeHTML(s) {
 | |
|     return s.replace(/[&<>'"]/g, function(x) {
 | |
|       return reverseEntityTable[x];
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   function decodeHTML(str) {
 | |
|     return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) {
 | |
|       return entityTable[tag];
 | |
|     }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) {
 | |
|       var num = parseInt(hex || numStr, hex ? 16 : 10); // read num
 | |
|       return String.fromCharCode(num);
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   // When a style is set in JS, map it to the corresponding CSS attribute
 | |
|   var styleMap = {
 | |
|     "alignmentBaseline": "alignment-baseline",
 | |
|     "background": "background",
 | |
|     "backgroundAttachment": "background-attachment",
 | |
|     "backgroundClip": "background-clip",
 | |
|     "backgroundColor": "background-color",
 | |
|     "backgroundImage": "background-image",
 | |
|     "backgroundOrigin": "background-origin",
 | |
|     "backgroundPosition": "background-position",
 | |
|     "backgroundPositionX": "background-position-x",
 | |
|     "backgroundPositionY": "background-position-y",
 | |
|     "backgroundRepeat": "background-repeat",
 | |
|     "backgroundRepeatX": "background-repeat-x",
 | |
|     "backgroundRepeatY": "background-repeat-y",
 | |
|     "backgroundSize": "background-size",
 | |
|     "baselineShift": "baseline-shift",
 | |
|     "border": "border",
 | |
|     "borderBottom": "border-bottom",
 | |
|     "borderBottomColor": "border-bottom-color",
 | |
|     "borderBottomLeftRadius": "border-bottom-left-radius",
 | |
|     "borderBottomRightRadius": "border-bottom-right-radius",
 | |
|     "borderBottomStyle": "border-bottom-style",
 | |
|     "borderBottomWidth": "border-bottom-width",
 | |
|     "borderCollapse": "border-collapse",
 | |
|     "borderColor": "border-color",
 | |
|     "borderImage": "border-image",
 | |
|     "borderImageOutset": "border-image-outset",
 | |
|     "borderImageRepeat": "border-image-repeat",
 | |
|     "borderImageSlice": "border-image-slice",
 | |
|     "borderImageSource": "border-image-source",
 | |
|     "borderImageWidth": "border-image-width",
 | |
|     "borderLeft": "border-left",
 | |
|     "borderLeftColor": "border-left-color",
 | |
|     "borderLeftStyle": "border-left-style",
 | |
|     "borderLeftWidth": "border-left-width",
 | |
|     "borderRadius": "border-radius",
 | |
|     "borderRight": "border-right",
 | |
|     "borderRightColor": "border-right-color",
 | |
|     "borderRightStyle": "border-right-style",
 | |
|     "borderRightWidth": "border-right-width",
 | |
|     "borderSpacing": "border-spacing",
 | |
|     "borderStyle": "border-style",
 | |
|     "borderTop": "border-top",
 | |
|     "borderTopColor": "border-top-color",
 | |
|     "borderTopLeftRadius": "border-top-left-radius",
 | |
|     "borderTopRightRadius": "border-top-right-radius",
 | |
|     "borderTopStyle": "border-top-style",
 | |
|     "borderTopWidth": "border-top-width",
 | |
|     "borderWidth": "border-width",
 | |
|     "bottom": "bottom",
 | |
|     "boxShadow": "box-shadow",
 | |
|     "boxSizing": "box-sizing",
 | |
|     "captionSide": "caption-side",
 | |
|     "clear": "clear",
 | |
|     "clip": "clip",
 | |
|     "clipPath": "clip-path",
 | |
|     "clipRule": "clip-rule",
 | |
|     "color": "color",
 | |
|     "colorInterpolation": "color-interpolation",
 | |
|     "colorInterpolationFilters": "color-interpolation-filters",
 | |
|     "colorProfile": "color-profile",
 | |
|     "colorRendering": "color-rendering",
 | |
|     "content": "content",
 | |
|     "counterIncrement": "counter-increment",
 | |
|     "counterReset": "counter-reset",
 | |
|     "cursor": "cursor",
 | |
|     "direction": "direction",
 | |
|     "display": "display",
 | |
|     "dominantBaseline": "dominant-baseline",
 | |
|     "emptyCells": "empty-cells",
 | |
|     "enableBackground": "enable-background",
 | |
|     "fill": "fill",
 | |
|     "fillOpacity": "fill-opacity",
 | |
|     "fillRule": "fill-rule",
 | |
|     "filter": "filter",
 | |
|     "cssFloat": "float",
 | |
|     "floodColor": "flood-color",
 | |
|     "floodOpacity": "flood-opacity",
 | |
|     "font": "font",
 | |
|     "fontFamily": "font-family",
 | |
|     "fontSize": "font-size",
 | |
|     "fontStretch": "font-stretch",
 | |
|     "fontStyle": "font-style",
 | |
|     "fontVariant": "font-variant",
 | |
|     "fontWeight": "font-weight",
 | |
|     "glyphOrientationHorizontal": "glyph-orientation-horizontal",
 | |
|     "glyphOrientationVertical": "glyph-orientation-vertical",
 | |
|     "height": "height",
 | |
|     "imageRendering": "image-rendering",
 | |
|     "kerning": "kerning",
 | |
|     "left": "left",
 | |
|     "letterSpacing": "letter-spacing",
 | |
|     "lightingColor": "lighting-color",
 | |
|     "lineHeight": "line-height",
 | |
|     "listStyle": "list-style",
 | |
|     "listStyleImage": "list-style-image",
 | |
|     "listStylePosition": "list-style-position",
 | |
|     "listStyleType": "list-style-type",
 | |
|     "margin": "margin",
 | |
|     "marginBottom": "margin-bottom",
 | |
|     "marginLeft": "margin-left",
 | |
|     "marginRight": "margin-right",
 | |
|     "marginTop": "margin-top",
 | |
|     "marker": "marker",
 | |
|     "markerEnd": "marker-end",
 | |
|     "markerMid": "marker-mid",
 | |
|     "markerStart": "marker-start",
 | |
|     "mask": "mask",
 | |
|     "maxHeight": "max-height",
 | |
|     "maxWidth": "max-width",
 | |
|     "minHeight": "min-height",
 | |
|     "minWidth": "min-width",
 | |
|     "opacity": "opacity",
 | |
|     "orphans": "orphans",
 | |
|     "outline": "outline",
 | |
|     "outlineColor": "outline-color",
 | |
|     "outlineOffset": "outline-offset",
 | |
|     "outlineStyle": "outline-style",
 | |
|     "outlineWidth": "outline-width",
 | |
|     "overflow": "overflow",
 | |
|     "overflowX": "overflow-x",
 | |
|     "overflowY": "overflow-y",
 | |
|     "padding": "padding",
 | |
|     "paddingBottom": "padding-bottom",
 | |
|     "paddingLeft": "padding-left",
 | |
|     "paddingRight": "padding-right",
 | |
|     "paddingTop": "padding-top",
 | |
|     "page": "page",
 | |
|     "pageBreakAfter": "page-break-after",
 | |
|     "pageBreakBefore": "page-break-before",
 | |
|     "pageBreakInside": "page-break-inside",
 | |
|     "pointerEvents": "pointer-events",
 | |
|     "position": "position",
 | |
|     "quotes": "quotes",
 | |
|     "resize": "resize",
 | |
|     "right": "right",
 | |
|     "shapeRendering": "shape-rendering",
 | |
|     "size": "size",
 | |
|     "speak": "speak",
 | |
|     "src": "src",
 | |
|     "stopColor": "stop-color",
 | |
|     "stopOpacity": "stop-opacity",
 | |
|     "stroke": "stroke",
 | |
|     "strokeDasharray": "stroke-dasharray",
 | |
|     "strokeDashoffset": "stroke-dashoffset",
 | |
|     "strokeLinecap": "stroke-linecap",
 | |
|     "strokeLinejoin": "stroke-linejoin",
 | |
|     "strokeMiterlimit": "stroke-miterlimit",
 | |
|     "strokeOpacity": "stroke-opacity",
 | |
|     "strokeWidth": "stroke-width",
 | |
|     "tableLayout": "table-layout",
 | |
|     "textAlign": "text-align",
 | |
|     "textAnchor": "text-anchor",
 | |
|     "textDecoration": "text-decoration",
 | |
|     "textIndent": "text-indent",
 | |
|     "textLineThrough": "text-line-through",
 | |
|     "textLineThroughColor": "text-line-through-color",
 | |
|     "textLineThroughMode": "text-line-through-mode",
 | |
|     "textLineThroughStyle": "text-line-through-style",
 | |
|     "textLineThroughWidth": "text-line-through-width",
 | |
|     "textOverflow": "text-overflow",
 | |
|     "textOverline": "text-overline",
 | |
|     "textOverlineColor": "text-overline-color",
 | |
|     "textOverlineMode": "text-overline-mode",
 | |
|     "textOverlineStyle": "text-overline-style",
 | |
|     "textOverlineWidth": "text-overline-width",
 | |
|     "textRendering": "text-rendering",
 | |
|     "textShadow": "text-shadow",
 | |
|     "textTransform": "text-transform",
 | |
|     "textUnderline": "text-underline",
 | |
|     "textUnderlineColor": "text-underline-color",
 | |
|     "textUnderlineMode": "text-underline-mode",
 | |
|     "textUnderlineStyle": "text-underline-style",
 | |
|     "textUnderlineWidth": "text-underline-width",
 | |
|     "top": "top",
 | |
|     "unicodeBidi": "unicode-bidi",
 | |
|     "unicodeRange": "unicode-range",
 | |
|     "vectorEffect": "vector-effect",
 | |
|     "verticalAlign": "vertical-align",
 | |
|     "visibility": "visibility",
 | |
|     "whiteSpace": "white-space",
 | |
|     "widows": "widows",
 | |
|     "width": "width",
 | |
|     "wordBreak": "word-break",
 | |
|     "wordSpacing": "word-spacing",
 | |
|     "wordWrap": "word-wrap",
 | |
|     "writingMode": "writing-mode",
 | |
|     "zIndex": "z-index",
 | |
|     "zoom": "zoom",
 | |
|   };
 | |
| 
 | |
|   // Elements that can be self-closing
 | |
|   var voidElems = {
 | |
|     "area": true,
 | |
|     "base": true,
 | |
|     "br": true,
 | |
|     "col": true,
 | |
|     "command": true,
 | |
|     "embed": true,
 | |
|     "hr": true,
 | |
|     "img": true,
 | |
|     "input": true,
 | |
|     "link": true,
 | |
|     "meta": true,
 | |
|     "param": true,
 | |
|     "source": true,
 | |
|     "wbr": true
 | |
|   };
 | |
| 
 | |
|   var whitespace = [" ", "\t", "\n", "\r"];
 | |
| 
 | |
|   // See http://www.w3schools.com/dom/dom_nodetype.asp
 | |
|   var nodeTypes = {
 | |
|     ELEMENT_NODE: 1,
 | |
|     ATTRIBUTE_NODE: 2,
 | |
|     TEXT_NODE: 3,
 | |
|     CDATA_SECTION_NODE: 4,
 | |
|     ENTITY_REFERENCE_NODE: 5,
 | |
|     ENTITY_NODE: 6,
 | |
|     PROCESSING_INSTRUCTION_NODE: 7,
 | |
|     COMMENT_NODE: 8,
 | |
|     DOCUMENT_NODE: 9,
 | |
|     DOCUMENT_TYPE_NODE: 10,
 | |
|     DOCUMENT_FRAGMENT_NODE: 11,
 | |
|     NOTATION_NODE: 12
 | |
|   };
 | |
| 
 | |
|   function getElementsByTagName(tag) {
 | |
|     tag = tag.toUpperCase();
 | |
|     var elems = [];
 | |
|     var allTags = (tag === "*");
 | |
|     function getElems(node) {
 | |
|       var length = node.children.length;
 | |
|       for (var i = 0; i < length; i++) {
 | |
|         var child = node.children[i];
 | |
|         if (allTags || (child.tagName === tag))
 | |
|           elems.push(child);
 | |
|         getElems(child);
 | |
|       }
 | |
|     }
 | |
|     getElems(this);
 | |
|     return elems;
 | |
|   }
 | |
| 
 | |
|   var Node = function () {};
 | |
| 
 | |
|   Node.prototype = {
 | |
|     attributes: null,
 | |
|     childNodes: null,
 | |
|     localName: null,
 | |
|     nodeName: null,
 | |
|     parentNode: null,
 | |
|     textContent: null,
 | |
|     nextSibling: null,
 | |
|     previousSibling: null,
 | |
| 
 | |
|     get firstChild() {
 | |
|       return this.childNodes[0] || null;
 | |
|     },
 | |
| 
 | |
|     get firstElementChild() {
 | |
|       return this.children[0] || null;
 | |
|     },
 | |
| 
 | |
|     get lastChild() {
 | |
|       return this.childNodes[this.childNodes.length - 1] || null;
 | |
|     },
 | |
| 
 | |
|     get lastElementChild() {
 | |
|       return this.children[this.children.length - 1] || null;
 | |
|     },
 | |
| 
 | |
|     appendChild: function (child) {
 | |
|       if (child.parentNode) {
 | |
|         child.parentNode.removeChild(child);
 | |
|       }
 | |
| 
 | |
|       var last = this.lastChild;
 | |
|       if (last)
 | |
|         last.nextSibling = child;
 | |
|       child.previousSibling = last;
 | |
| 
 | |
|       if (child.nodeType === Node.ELEMENT_NODE) {
 | |
|         child.previousElementSibling = this.children[this.children.length - 1] || null;
 | |
|         this.children.push(child);
 | |
|         child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child);
 | |
|       }
 | |
|       this.childNodes.push(child);
 | |
|       child.parentNode = this;
 | |
|     },
 | |
| 
 | |
|     removeChild: function (child) {
 | |
|       var childNodes = this.childNodes;
 | |
|       var childIndex = childNodes.indexOf(child);
 | |
|       if (childIndex === -1) {
 | |
|         throw "removeChild: node not found";
 | |
|       } else {
 | |
|         child.parentNode = null;
 | |
|         var prev = child.previousSibling;
 | |
|         var next = child.nextSibling;
 | |
|         if (prev)
 | |
|           prev.nextSibling = next;
 | |
|         if (next)
 | |
|           next.previousSibling = prev;
 | |
| 
 | |
|         if (child.nodeType === Node.ELEMENT_NODE) {
 | |
|           prev = child.previousElementSibling;
 | |
|           next = child.nextElementSibling;
 | |
|           if (prev)
 | |
|             prev.nextElementSibling = next;
 | |
|           if (next)
 | |
|             next.previousElementSibling = prev;
 | |
|           this.children.splice(this.children.indexOf(child), 1);
 | |
|         }
 | |
| 
 | |
|         child.previousSibling = child.nextSibling = null;
 | |
|         child.previousElementSibling = child.nextElementSibling = null;
 | |
| 
 | |
|         return childNodes.splice(childIndex, 1)[0];
 | |
|       }
 | |
|     },
 | |
| 
 | |
|     replaceChild: function (newNode, oldNode) {
 | |
|       var childNodes = this.childNodes;
 | |
|       var childIndex = childNodes.indexOf(oldNode);
 | |
|       if (childIndex === -1) {
 | |
|         throw "replaceChild: node not found";
 | |
|       } else {
 | |
|         // This will take care of updating the new node if it was somewhere else before:
 | |
|         if (newNode.parentNode)
 | |
|           newNode.parentNode.removeChild(newNode);
 | |
| 
 | |
|         childNodes[childIndex] = newNode;
 | |
| 
 | |
|         // update the new node's sibling properties, and its new siblings' sibling properties
 | |
|         newNode.nextSibling = oldNode.nextSibling;
 | |
|         newNode.previousSibling = oldNode.previousSibling;
 | |
|         if (newNode.nextSibling)
 | |
|           newNode.nextSibling.previousSibling = newNode;
 | |
|         if (newNode.previousSibling)
 | |
|           newNode.previousSibling.nextSibling = newNode;
 | |
| 
 | |
|         newNode.parentNode = this;
 | |
| 
 | |
|         // Now deal with elements before we clear out those values for the old node,
 | |
|         // because it can help us take shortcuts here:
 | |
|         if (newNode.nodeType === Node.ELEMENT_NODE) {
 | |
|           if (oldNode.nodeType === Node.ELEMENT_NODE) {
 | |
|             // Both were elements, which makes this easier, we just swap things out:
 | |
|             newNode.previousElementSibling = oldNode.previousElementSibling;
 | |
|             newNode.nextElementSibling = oldNode.nextElementSibling;
 | |
|             if (newNode.previousElementSibling)
 | |
|               newNode.previousElementSibling.nextElementSibling = newNode;
 | |
|             if (newNode.nextElementSibling)
 | |
|               newNode.nextElementSibling.previousElementSibling = newNode;
 | |
|             this.children[this.children.indexOf(oldNode)] = newNode;
 | |
|           } else {
 | |
|             // Hard way:
 | |
|             newNode.previousElementSibling = (function() {
 | |
|               for (var i = childIndex - 1; i >= 0; i--) {
 | |
|                 if (childNodes[i].nodeType === Node.ELEMENT_NODE)
 | |
|                   return childNodes[i];
 | |
|               }
 | |
|               return null;
 | |
|             })();
 | |
|             if (newNode.previousElementSibling) {
 | |
|               newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling;
 | |
|             } else {
 | |
|               newNode.nextElementSibling = (function() {
 | |
|                 for (var i = childIndex + 1; i < childNodes.length; i++) {
 | |
|                   if (childNodes[i].nodeType === Node.ELEMENT_NODE)
 | |
|                     return childNodes[i];
 | |
|                 }
 | |
|                 return null;
 | |
|               })();
 | |
|             }
 | |
|             if (newNode.previousElementSibling)
 | |
|               newNode.previousElementSibling.nextElementSibling = newNode;
 | |
|             if (newNode.nextElementSibling)
 | |
|               newNode.nextElementSibling.previousElementSibling = newNode;
 | |
| 
 | |
|             if (newNode.nextElementSibling)
 | |
|               this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode);
 | |
|             else
 | |
|               this.children.push(newNode);
 | |
|           }
 | |
|         } else if (oldNode.nodeType === Node.ELEMENT_NODE) {
 | |
|           // new node is not an element node.
 | |
|           // if the old one was, update its element siblings:
 | |
|           if (oldNode.previousElementSibling)
 | |
|             oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling;
 | |
|           if (oldNode.nextElementSibling)
 | |
|             oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling;
 | |
|           this.children.splice(this.children.indexOf(oldNode), 1);
 | |
| 
 | |
|           // If the old node wasn't an element, neither the new nor the old node was an element,
 | |
|           // and the children array and its members shouldn't need any updating.
 | |
|         }
 | |
| 
 | |
| 
 | |
|         oldNode.parentNode = null;
 | |
|         oldNode.previousSibling = null;
 | |
|         oldNode.nextSibling = null;
 | |
|         if (oldNode.nodeType === Node.ELEMENT_NODE) {
 | |
|           oldNode.previousElementSibling = null;
 | |
|           oldNode.nextElementSibling = null;
 | |
|         }
 | |
|         return oldNode;
 | |
|       }
 | |
|     },
 | |
| 
 | |
|     __JSDOMParser__: true,
 | |
|   };
 | |
| 
 | |
|   for (var nodeType in nodeTypes) {
 | |
|     Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType];
 | |
|   }
 | |
| 
 | |
|   var Attribute = function (name, value) {
 | |
|     this.name = name;
 | |
|     this._value = value;
 | |
|   };
 | |
| 
 | |
|   Attribute.prototype = {
 | |
|     get value() {
 | |
|       return this._value;
 | |
|     },
 | |
|     setValue: function(newValue) {
 | |
|       this._value = newValue;
 | |
|     },
 | |
|     getEncodedValue: function() {
 | |
|       return encodeHTML(this._value);
 | |
|     },
 | |
|   };
 | |
| 
 | |
|   var Comment = function () {
 | |
|     this.childNodes = [];
 | |
|   };
 | |
| 
 | |
|   Comment.prototype = {
 | |
|     __proto__: Node.prototype,
 | |
| 
 | |
|     nodeName: "#comment",
 | |
|     nodeType: Node.COMMENT_NODE
 | |
|   };
 | |
| 
 | |
|   var Text = function () {
 | |
|     this.childNodes = [];
 | |
|   };
 | |
| 
 | |
|   Text.prototype = {
 | |
|     __proto__: Node.prototype,
 | |
| 
 | |
|     nodeName: "#text",
 | |
|     nodeType: Node.TEXT_NODE,
 | |
|     get textContent() {
 | |
|       if (typeof this._textContent === "undefined") {
 | |
|         this._textContent = decodeHTML(this._innerHTML || "");
 | |
|       }
 | |
|       return this._textContent;
 | |
|     },
 | |
|     get innerHTML() {
 | |
|       if (typeof this._innerHTML === "undefined") {
 | |
|         this._innerHTML = encodeTextContentHTML(this._textContent || "");
 | |
|       }
 | |
|       return this._innerHTML;
 | |
|     },
 | |
| 
 | |
|     set innerHTML(newHTML) {
 | |
|       this._innerHTML = newHTML;
 | |
|       delete this._textContent;
 | |
|     },
 | |
|     set textContent(newText) {
 | |
|       this._textContent = newText;
 | |
|       delete this._innerHTML;
 | |
|     },
 | |
|   };
 | |
| 
 | |
|   var Document = function (url) {
 | |
|     this.documentURI = url;
 | |
|     this.styleSheets = [];
 | |
|     this.childNodes = [];
 | |
|     this.children = [];
 | |
|   };
 | |
| 
 | |
|   Document.prototype = {
 | |
|     __proto__: Node.prototype,
 | |
| 
 | |
|     nodeName: "#document",
 | |
|     nodeType: Node.DOCUMENT_NODE,
 | |
|     title: "",
 | |
| 
 | |
|     getElementsByTagName: getElementsByTagName,
 | |
| 
 | |
|     getElementById: function (id) {
 | |
|       function getElem(node) {
 | |
|         var length = node.children.length;
 | |
|         if (node.id === id)
 | |
|           return node;
 | |
|         for (var i = 0; i < length; i++) {
 | |
|           var el = getElem(node.children[i]);
 | |
|           if (el)
 | |
|             return el;
 | |
|         }
 | |
|         return null;
 | |
|       }
 | |
|       return getElem(this);
 | |
|     },
 | |
| 
 | |
|     createElement: function (tag) {
 | |
|       var node = new Element(tag);
 | |
|       return node;
 | |
|     },
 | |
| 
 | |
|     createTextNode: function (text) {
 | |
|       var node = new Text();
 | |
|       node.textContent = text;
 | |
|       return node;
 | |
|     },
 | |
| 
 | |
|     get baseURI() {
 | |
|       if (!this.hasOwnProperty("_baseURI")) {
 | |
|         this._baseURI = this.documentURI;
 | |
|         var baseElements = this.getElementsByTagName("base");
 | |
|         var href = baseElements[0] && baseElements[0].getAttribute("href");
 | |
|         if (href) {
 | |
|           try {
 | |
|             this._baseURI = (new URL(href, this._baseURI)).href;
 | |
|           } catch (ex) {/* Just fall back to documentURI */}
 | |
|         }
 | |
|       }
 | |
|       return this._baseURI;
 | |
|     },
 | |
|   };
 | |
| 
 | |
|   var Element = function (tag) {
 | |
|     // We use this to find the closing tag.
 | |
|     this._matchingTag = tag;
 | |
|     // We're explicitly a non-namespace aware parser, we just pretend it's all HTML.
 | |
|     var lastColonIndex = tag.lastIndexOf(":");
 | |
|     if (lastColonIndex != -1) {
 | |
|       tag = tag.substring(lastColonIndex + 1);
 | |
|     }
 | |
|     this.attributes = [];
 | |
|     this.childNodes = [];
 | |
|     this.children = [];
 | |
|     this.nextElementSibling = this.previousElementSibling = null;
 | |
|     this.localName = tag.toLowerCase();
 | |
|     this.tagName = tag.toUpperCase();
 | |
|     this.style = new Style(this);
 | |
|   };
 | |
| 
 | |
|   Element.prototype = {
 | |
|     __proto__: Node.prototype,
 | |
| 
 | |
|     nodeType: Node.ELEMENT_NODE,
 | |
| 
 | |
|     getElementsByTagName: getElementsByTagName,
 | |
| 
 | |
|     get className() {
 | |
|       return this.getAttribute("class") || "";
 | |
|     },
 | |
| 
 | |
|     set className(str) {
 | |
|       this.setAttribute("class", str);
 | |
|     },
 | |
| 
 | |
|     get id() {
 | |
|       return this.getAttribute("id") || "";
 | |
|     },
 | |
| 
 | |
|     set id(str) {
 | |
|       this.setAttribute("id", str);
 | |
|     },
 | |
| 
 | |
|     get href() {
 | |
|       return this.getAttribute("href") || "";
 | |
|     },
 | |
| 
 | |
|     set href(str) {
 | |
|       this.setAttribute("href", str);
 | |
|     },
 | |
| 
 | |
|     get src() {
 | |
|       return this.getAttribute("src") || "";
 | |
|     },
 | |
| 
 | |
|     set src(str) {
 | |
|       this.setAttribute("src", str);
 | |
|     },
 | |
| 
 | |
|     get srcset() {
 | |
|       return this.getAttribute("srcset") || "";
 | |
|     },
 | |
| 
 | |
|     set srcset(str) {
 | |
|       this.setAttribute("srcset", str);
 | |
|     },
 | |
| 
 | |
|     get nodeName() {
 | |
|       return this.tagName;
 | |
|     },
 | |
| 
 | |
|     get innerHTML() {
 | |
|       function getHTML(node) {
 | |
|         var i = 0;
 | |
|         for (i = 0; i < node.childNodes.length; i++) {
 | |
|           var child = node.childNodes[i];
 | |
|           if (child.localName) {
 | |
|             arr.push("<" + child.localName);
 | |
| 
 | |
|             // serialize attribute list
 | |
|             for (var j = 0; j < child.attributes.length; j++) {
 | |
|               var attr = child.attributes[j];
 | |
|               // the attribute value will be HTML escaped.
 | |
|               var val = attr.getEncodedValue();
 | |
|               var quote = (val.indexOf('"') === -1 ? '"' : "'");
 | |
|               arr.push(" " + attr.name + "=" + quote + val + quote);
 | |
|             }
 | |
| 
 | |
|             if (child.localName in voidElems && !child.childNodes.length) {
 | |
|               // if this is a self-closing element, end it here
 | |
|               arr.push("/>");
 | |
|             } else {
 | |
|               // otherwise, add its children
 | |
|               arr.push(">");
 | |
|               getHTML(child);
 | |
|               arr.push("</" + child.localName + ">");
 | |
|             }
 | |
|           } else {
 | |
|             // This is a text node, so asking for innerHTML won't recurse.
 | |
|             arr.push(child.innerHTML);
 | |
|           }
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       // Using Array.join() avoids the overhead from lazy string concatenation.
 | |
|       // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
 | |
|       var arr = [];
 | |
|       getHTML(this);
 | |
|       return arr.join("");
 | |
|     },
 | |
| 
 | |
|     set innerHTML(html) {
 | |
|       var parser = new JSDOMParser();
 | |
|       var node = parser.parse(html);
 | |
|       var i;
 | |
|       for (i = this.childNodes.length; --i >= 0;) {
 | |
|         this.childNodes[i].parentNode = null;
 | |
|       }
 | |
|       this.childNodes = node.childNodes;
 | |
|       this.children = node.children;
 | |
|       for (i = this.childNodes.length; --i >= 0;) {
 | |
|         this.childNodes[i].parentNode = this;
 | |
|       }
 | |
|     },
 | |
| 
 | |
|     set textContent(text) {
 | |
|       // clear parentNodes for existing children
 | |
|       for (var i = this.childNodes.length; --i >= 0;) {
 | |
|         this.childNodes[i].parentNode = null;
 | |
|       }
 | |
| 
 | |
|       var node = new Text();
 | |
|       this.childNodes = [ node ];
 | |
|       this.children = [];
 | |
|       node.textContent = text;
 | |
|       node.parentNode = this;
 | |
|     },
 | |
| 
 | |
|     get textContent() {
 | |
|       function getText(node) {
 | |
|         var nodes = node.childNodes;
 | |
|         for (var i = 0; i < nodes.length; i++) {
 | |
|           var child = nodes[i];
 | |
|           if (child.nodeType === 3) {
 | |
|             text.push(child.textContent);
 | |
|           } else {
 | |
|             getText(child);
 | |
|           }
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       // Using Array.join() avoids the overhead from lazy string concatenation.
 | |
|       // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes
 | |
|       var text = [];
 | |
|       getText(this);
 | |
|       return text.join("");
 | |
|     },
 | |
| 
 | |
|     getAttribute: function (name) {
 | |
|       for (var i = this.attributes.length; --i >= 0;) {
 | |
|         var attr = this.attributes[i];
 | |
|         if (attr.name === name) {
 | |
|           return attr.value;
 | |
|         }
 | |
|       }
 | |
|       return undefined;
 | |
|     },
 | |
| 
 | |
|     setAttribute: function (name, value) {
 | |
|       for (var i = this.attributes.length; --i >= 0;) {
 | |
|         var attr = this.attributes[i];
 | |
|         if (attr.name === name) {
 | |
|           attr.setValue(value);
 | |
|           return;
 | |
|         }
 | |
|       }
 | |
|       this.attributes.push(new Attribute(name, value));
 | |
|     },
 | |
| 
 | |
|     removeAttribute: function (name) {
 | |
|       for (var i = this.attributes.length; --i >= 0;) {
 | |
|         var attr = this.attributes[i];
 | |
|         if (attr.name === name) {
 | |
|           this.attributes.splice(i, 1);
 | |
|           break;
 | |
|         }
 | |
|       }
 | |
|     },
 | |
| 
 | |
|     hasAttribute: function (name) {
 | |
|       return this.attributes.some(function (attr) {
 | |
|         return attr.name == name;
 | |
|       });
 | |
|     },
 | |
|   };
 | |
| 
 | |
|   var Style = function (node) {
 | |
|     this.node = node;
 | |
|   };
 | |
| 
 | |
|   // getStyle() and setStyle() use the style attribute string directly. This
 | |
|   // won't be very efficient if there are a lot of style manipulations, but
 | |
|   // it's the easiest way to make sure the style attribute string and the JS
 | |
|   // style property stay in sync. Readability.js doesn't do many style
 | |
|   // manipulations, so this should be okay.
 | |
|   Style.prototype = {
 | |
|     getStyle: function (styleName) {
 | |
|       var attr = this.node.getAttribute("style");
 | |
|       if (!attr)
 | |
|         return undefined;
 | |
| 
 | |
|       var styles = attr.split(";");
 | |
|       for (var i = 0; i < styles.length; i++) {
 | |
|         var style = styles[i].split(":");
 | |
|         var name = style[0].trim();
 | |
|         if (name === styleName)
 | |
|           return style[1].trim();
 | |
|       }
 | |
| 
 | |
|       return undefined;
 | |
|     },
 | |
| 
 | |
|     setStyle: function (styleName, styleValue) {
 | |
|       var value = this.node.getAttribute("style") || "";
 | |
|       var index = 0;
 | |
|       do {
 | |
|         var next = value.indexOf(";", index) + 1;
 | |
|         var length = next - index - 1;
 | |
|         var style = (length > 0 ? value.substr(index, length) : value.substr(index));
 | |
|         if (style.substr(0, style.indexOf(":")).trim() === styleName) {
 | |
|           value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : "");
 | |
|           break;
 | |
|         }
 | |
|         index = next;
 | |
|       } while (index);
 | |
| 
 | |
|       value += " " + styleName + ": " + styleValue + ";";
 | |
|       this.node.setAttribute("style", value.trim());
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   // For each item in styleMap, define a getter and setter on the style
 | |
|   // property.
 | |
|   for (var jsName in styleMap) {
 | |
|     (function (cssName) {
 | |
|       Style.prototype.__defineGetter__(jsName, function () {
 | |
|         return this.getStyle(cssName);
 | |
|       });
 | |
|       Style.prototype.__defineSetter__(jsName, function (value) {
 | |
|         this.setStyle(cssName, value);
 | |
|       });
 | |
|     })(styleMap[jsName]);
 | |
|   }
 | |
| 
 | |
|   var JSDOMParser = function () {
 | |
|     this.currentChar = 0;
 | |
| 
 | |
|     // In makeElementNode() we build up many strings one char at a time. Using
 | |
|     // += for this results in lots of short-lived intermediate strings. It's
 | |
|     // better to build an array of single-char strings and then join() them
 | |
|     // together at the end. And reusing a single array (i.e. |this.strBuf|)
 | |
|     // over and over for this purpose uses less memory than using a new array
 | |
|     // for each string.
 | |
|     this.strBuf = [];
 | |
| 
 | |
|     // Similarly, we reuse this array to return the two arguments from
 | |
|     // makeElementNode(), which saves us from having to allocate a new array
 | |
|     // every time.
 | |
|     this.retPair = [];
 | |
| 
 | |
|     this.errorState = "";
 | |
|   };
 | |
| 
 | |
|   JSDOMParser.prototype = {
 | |
|     error: function(m) {
 | |
|       dump("JSDOMParser error: " + m + "\n");
 | |
|       this.errorState += m + "\n";
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Look at the next character without advancing the index.
 | |
|      */
 | |
|     peekNext: function () {
 | |
|       return this.html[this.currentChar];
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Get the next character and advance the index.
 | |
|      */
 | |
|     nextChar: function () {
 | |
|       return this.html[this.currentChar++];
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Called after a quote character is read. This finds the next quote
 | |
|      * character and returns the text string in between.
 | |
|      */
 | |
|     readString: function (quote) {
 | |
|       var str;
 | |
|       var n = this.html.indexOf(quote, this.currentChar);
 | |
|       if (n === -1) {
 | |
|         this.currentChar = this.html.length;
 | |
|         str = null;
 | |
|       } else {
 | |
|         str = this.html.substring(this.currentChar, n);
 | |
|         this.currentChar = n + 1;
 | |
|       }
 | |
| 
 | |
|       return str;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Called when parsing a node. This finds the next name/value attribute
 | |
|      * pair and adds the result to the attributes list.
 | |
|      */
 | |
|     readAttribute: function (node) {
 | |
|       var name = "";
 | |
| 
 | |
|       var n = this.html.indexOf("=", this.currentChar);
 | |
|       if (n === -1) {
 | |
|         this.currentChar = this.html.length;
 | |
|       } else {
 | |
|         // Read until a '=' character is hit; this will be the attribute key
 | |
|         name = this.html.substring(this.currentChar, n);
 | |
|         this.currentChar = n + 1;
 | |
|       }
 | |
| 
 | |
|       if (!name)
 | |
|         return;
 | |
| 
 | |
|       // After a '=', we should see a '"' for the attribute value
 | |
|       var c = this.nextChar();
 | |
|       if (c !== '"' && c !== "'") {
 | |
|         this.error("Error reading attribute " + name + ", expecting '\"'");
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|       // Read the attribute value (and consume the matching quote)
 | |
|       var value = this.readString(c);
 | |
| 
 | |
|       node.attributes.push(new Attribute(name, decodeHTML(value)));
 | |
| 
 | |
|       return;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Parses and returns an Element node. This is called after a '<' has been
 | |
|      * read.
 | |
|      *
 | |
|      * @returns an array; the first index of the array is the parsed node;
 | |
|      *          the second index is a boolean indicating whether this is a void
 | |
|      *          Element
 | |
|      */
 | |
|     makeElementNode: function (retPair) {
 | |
|       var c = this.nextChar();
 | |
| 
 | |
|       // Read the Element tag name
 | |
|       var strBuf = this.strBuf;
 | |
|       strBuf.length = 0;
 | |
|       while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") {
 | |
|         if (c === undefined)
 | |
|           return false;
 | |
|         strBuf.push(c);
 | |
|         c = this.nextChar();
 | |
|       }
 | |
|       var tag = strBuf.join("");
 | |
| 
 | |
|       if (!tag)
 | |
|         return false;
 | |
| 
 | |
|       var node = new Element(tag);
 | |
| 
 | |
|       // Read Element attributes
 | |
|       while (c !== "/" && c !== ">") {
 | |
|         if (c === undefined)
 | |
|           return false;
 | |
|         while (whitespace.indexOf(this.html[this.currentChar++]) != -1) {
 | |
|           // Advance cursor to first non-whitespace char.
 | |
|         }
 | |
|         this.currentChar--;
 | |
|         c = this.nextChar();
 | |
|         if (c !== "/" && c !== ">") {
 | |
|           --this.currentChar;
 | |
|           this.readAttribute(node);
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       // If this is a self-closing tag, read '/>'
 | |
|       var closed = false;
 | |
|       if (c === "/") {
 | |
|         closed = true;
 | |
|         c = this.nextChar();
 | |
|         if (c !== ">") {
 | |
|           this.error("expected '>' to close " + tag);
 | |
|           return false;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       retPair[0] = node;
 | |
|       retPair[1] = closed;
 | |
|       return true;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * If the current input matches this string, advance the input index;
 | |
|      * otherwise, do nothing.
 | |
|      *
 | |
|      * @returns whether input matched string
 | |
|      */
 | |
|     match: function (str) {
 | |
|       var strlen = str.length;
 | |
|       if (this.html.substr(this.currentChar, strlen).toLowerCase() === str.toLowerCase()) {
 | |
|         this.currentChar += strlen;
 | |
|         return true;
 | |
|       }
 | |
|       return false;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Searches the input until a string is found and discards all input up to
 | |
|      * and including the matched string.
 | |
|      */
 | |
|     discardTo: function (str) {
 | |
|       var index = this.html.indexOf(str, this.currentChar) + str.length;
 | |
|       if (index === -1)
 | |
|         this.currentChar = this.html.length;
 | |
|       this.currentChar = index;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Reads child nodes for the given node.
 | |
|      */
 | |
|     readChildren: function (node) {
 | |
|       var child;
 | |
|       while ((child = this.readNode())) {
 | |
|         // Don't keep Comment nodes
 | |
|         if (child.nodeType !== 8) {
 | |
|           node.appendChild(child);
 | |
|         }
 | |
|       }
 | |
|     },
 | |
| 
 | |
|     discardNextComment: function() {
 | |
|       if (this.match("--")) {
 | |
|         this.discardTo("-->");
 | |
|       } else {
 | |
|         var c = this.nextChar();
 | |
|         while (c !== ">") {
 | |
|           if (c === undefined)
 | |
|             return null;
 | |
|           if (c === '"' || c === "'")
 | |
|             this.readString(c);
 | |
|           c = this.nextChar();
 | |
|         }
 | |
|       }
 | |
|       return new Comment();
 | |
|     },
 | |
| 
 | |
| 
 | |
|     /**
 | |
|      * Reads the next child node from the input. If we're reading a closing
 | |
|      * tag, or if we've reached the end of input, return null.
 | |
|      *
 | |
|      * @returns the node
 | |
|      */
 | |
|     readNode: function () {
 | |
|       var c = this.nextChar();
 | |
| 
 | |
|       if (c === undefined)
 | |
|         return null;
 | |
| 
 | |
|       // Read any text as Text node
 | |
|       var textNode;
 | |
|       if (c !== "<") {
 | |
|         --this.currentChar;
 | |
|         textNode = new Text();
 | |
|         var n = this.html.indexOf("<", this.currentChar);
 | |
|         if (n === -1) {
 | |
|           textNode.innerHTML = this.html.substring(this.currentChar, this.html.length);
 | |
|           this.currentChar = this.html.length;
 | |
|         } else {
 | |
|           textNode.innerHTML = this.html.substring(this.currentChar, n);
 | |
|           this.currentChar = n;
 | |
|         }
 | |
|         return textNode;
 | |
|       }
 | |
| 
 | |
|       if (this.match("![CDATA[")) {
 | |
|         var endChar = this.html.indexOf("]]>", this.currentChar);
 | |
|         if (endChar === -1) {
 | |
|           this.error("unclosed CDATA section");
 | |
|           return null;
 | |
|         }
 | |
|         textNode = new Text();
 | |
|         textNode.textContent = this.html.substring(this.currentChar, endChar);
 | |
|         this.currentChar = endChar + ("]]>").length;
 | |
|         return textNode;
 | |
|       }
 | |
| 
 | |
|       c = this.peekNext();
 | |
| 
 | |
|       // Read Comment node. Normally, Comment nodes know their inner
 | |
|       // textContent, but we don't really care about Comment nodes (we throw
 | |
|       // them away in readChildren()). So just returning an empty Comment node
 | |
|       // here is sufficient.
 | |
|       if (c === "!" || c === "?") {
 | |
|         // We're still before the ! or ? that is starting this comment:
 | |
|         this.currentChar++;
 | |
|         return this.discardNextComment();
 | |
|       }
 | |
| 
 | |
|       // If we're reading a closing tag, return null. This means we've reached
 | |
|       // the end of this set of child nodes.
 | |
|       if (c === "/") {
 | |
|         --this.currentChar;
 | |
|         return null;
 | |
|       }
 | |
| 
 | |
|       // Otherwise, we're looking at an Element node
 | |
|       var result = this.makeElementNode(this.retPair);
 | |
|       if (!result)
 | |
|         return null;
 | |
| 
 | |
|       var node = this.retPair[0];
 | |
|       var closed = this.retPair[1];
 | |
|       var localName = node.localName;
 | |
| 
 | |
|       // If this isn't a void Element, read its child nodes
 | |
|       if (!closed) {
 | |
|         this.readChildren(node);
 | |
|         var closingTag = "</" + node._matchingTag + ">";
 | |
|         if (!this.match(closingTag)) {
 | |
|           this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length));
 | |
|           return null;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       // Only use the first title, because SVG might have other
 | |
|       // title elements which we don't care about (medium.com
 | |
|       // does this, at least).
 | |
|       if (localName === "title" && !this.doc.title) {
 | |
|         this.doc.title = node.textContent.trim();
 | |
|       } else if (localName === "head") {
 | |
|         this.doc.head = node;
 | |
|       } else if (localName === "body") {
 | |
|         this.doc.body = node;
 | |
|       } else if (localName === "html") {
 | |
|         this.doc.documentElement = node;
 | |
|       }
 | |
| 
 | |
|       return node;
 | |
|     },
 | |
| 
 | |
|     /**
 | |
|      * Parses an HTML string and returns a JS implementation of the Document.
 | |
|      */
 | |
|     parse: function (html, url) {
 | |
|       this.html = html;
 | |
|       var doc = this.doc = new Document(url);
 | |
|       this.readChildren(doc);
 | |
| 
 | |
|       // If this is an HTML document, remove root-level children except for the
 | |
|       // <html> node
 | |
|       if (doc.documentElement) {
 | |
|         for (var i = doc.childNodes.length; --i >= 0;) {
 | |
|           var child = doc.childNodes[i];
 | |
|           if (child !== doc.documentElement) {
 | |
|             doc.removeChild(child);
 | |
|           }
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       return doc;
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   // Attach the standard DOM types to the global scope
 | |
|   global.Node = Node;
 | |
|   global.Comment = Comment;
 | |
|   global.Document = Document;
 | |
|   global.Element = Element;
 | |
|   global.Text = Text;
 | |
| 
 | |
|   // Attach JSDOMParser to the global scope
 | |
|   global.JSDOMParser = JSDOMParser;
 | |
| 
 | |
| })(this);
 | 
