Index: third_party/document_image_extractor/src/dom_utils.js |
diff --git a/third_party/document_image_extractor/src/dom_utils.js b/third_party/document_image_extractor/src/dom_utils.js |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e8127b4d9ceaa543824777b24e08a955c53c93c8 |
--- /dev/null |
+++ b/third_party/document_image_extractor/src/dom_utils.js |
@@ -0,0 +1,349 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+/** |
+ * @fileoverview Provides copied versions of Closure library functions. The |
+ * functions in this file are modified to remove non-Chrome compatibility |
+ * code. |
+ */ |
+goog.provide('image.collections.extension.domextractor.DomUtils'); |
+ |
+goog.require('image.collections.extension.domextractor.Size'); |
+ |
+ |
+goog.scope(function() { |
+var DomUtils = image.collections.extension.domextractor.DomUtils; |
+ |
+ |
+/** |
+ * Inherit the prototype methods from one constructor into another. |
+ * |
+ * Usage: |
+ * <pre> |
+ * function ParentClass(a, b) { } |
+ * ParentClass.prototype.foo = function(a) { }; |
+ * |
+ * function ChildClass(a, b, c) { |
+ * ChildClass.base(this, 'constructor', a, b); |
+ * } |
+ * DomUtils.inherits(ChildClass, ParentClass); |
+ * |
+ * var child = new ChildClass('a', 'b', 'see'); |
+ * child.foo(); // This works. |
+ * </pre> |
+ * |
+ * @param {Function} childCtor Child class. |
+ * @param {Function} parentCtor Parent class. |
+ */ |
+image.collections.extension.domextractor.DomUtils.inherits = |
+ function(childCtor, parentCtor) { |
+ /** @constructor */ |
+ function tempCtor() {}; |
+ tempCtor.prototype = parentCtor.prototype; |
+ childCtor.prototype = new tempCtor(); |
+ /** @override */ |
+ childCtor.prototype.constructor = childCtor; |
+ |
+ /** |
+ * Calls superclass constructor/method. |
+ * |
+ * @param {!Object} me Should always be "this". |
+ * @param {string} methodName The method name to call. Calling |
+ * superclass constructor can be done with the special string |
+ * 'constructor'. |
+ * @param {...*} var_args The arguments to pass to superclass |
+ * method/constructor. |
+ * @return {*} The return value of the superclass method/constructor. |
+ */ |
+ childCtor.base = function(me, methodName, var_args) { |
+ // Copying using loop to avoid deop due to passing arguments object to |
+ // function. This is faster in many JS engines as of late 2014. |
+ var args = new Array(arguments.length - 2); |
+ for (var i = 2; i < arguments.length; i++) { |
+ args[i - 2] = arguments[i]; |
+ } |
+ return parentCtor.prototype[methodName].apply(me, args); |
+ }; |
+}; |
+ |
+ |
+/** |
+ * Map of tags whose content to ignore when calculating text length. |
+ * @const {!Object<string, number>} |
+ */ |
+var TAGS_TO_IGNORE = { |
+ 'SCRIPT': 1, |
+ 'STYLE': 1, |
+ 'HEAD': 1, |
+ 'IFRAME': 1, |
+ 'OBJECT': 1 |
+}; |
+ |
+/** |
+ * Map of tags which have predefined values with regard to whitespace. |
+ * @const {!Object<string, string>} |
+ */ |
+var PREDEFINED_TAG_VALUES = {'IMG': ' ', 'BR': '\n'}; |
+ |
+/** @const {number} */ |
+var ELEMENT_NODE_TYPE = 1; |
+ |
+/** @const {number} */ |
+var TEXT_NODE_TYPE = 3; |
+ |
+/** @const {number} */ |
+var DOCUMENT_NODE_TYPE = 9; |
+ |
+/** |
+ * Regular expression that matches an HTML entity. |
+ * See also HTML5: Tokenization / Tokenizing character references. |
+ * @type {!RegExp} |
+ */ |
+var HTML_ENTITY_PATTERN = /&([^;\s<&]+);?/g; |
+ |
+ |
+ |
+/** |
+ * Retrieves a computed style value of a node. It returns empty string if the |
+ * value cannot be computed (which will be the case in Internet Explorer) or |
+ * "none" if the property requested is an SVG one and it has not been |
+ * explicitly set (firefox and webkit). |
+ * |
+ * @param {!Element} element Element to get style of. |
+ * @param {string} property Property to get (camel-case). |
+ * @return {string} Style value. |
+ */ |
+image.collections.extension.domextractor.DomUtils.getComputedStyle = |
+ function(element, property) { |
+ var doc = DomUtils.getOwnerDocument(element); |
+ if (doc.defaultView && doc.defaultView.getComputedStyle) { |
+ var styles = doc.defaultView.getComputedStyle(element, null); |
+ if (styles) { |
+ // element.style[..] is undefined for browser specific styles |
+ // as 'filter'. |
+ return styles[property] || styles.getPropertyValue(property) || ''; |
+ } |
+ } |
+ |
+ return ''; |
+}; |
+ |
+ |
+/** |
+ * Gets the height and width of an element, even if its display is none. |
+ * |
+ * Specifically, this returns the height and width of the border box, |
+ * irrespective of the box model in effect. |
+ * |
+ * Note that this function does not take CSS transforms into account. |
+ * @param {!Element} element Element to get size of. |
+ * @return {!image.collections.extension.domextractor.Size} Object with |
+ * width/height properties. |
+ */ |
+image.collections.extension.domextractor.DomUtils.getSize = function(element) { |
+ if (DomUtils.getComputedStyle(element, 'display') != 'none') { |
+ return DomUtils.getSizeWithDisplay_(element); |
+ } |
+ |
+ var style = element.style; |
+ var originalDisplay = style.display; |
+ var originalVisibility = style.visibility; |
+ var originalPosition = style.position; |
+ |
+ style.visibility = 'hidden'; |
+ style.position = 'absolute'; |
+ style.display = 'inline'; |
+ |
+ var retVal = DomUtils.getSizeWithDisplay_(element); |
+ |
+ style.display = originalDisplay; |
+ style.position = originalPosition; |
+ style.visibility = originalVisibility; |
+ |
+ return retVal; |
+}; |
+ |
+ |
+/** |
+ * Gets the height and width of an element when the display is not none. |
+ * @param {!Element} element Element to get size of. |
+ * @return {!image.collections.extension.domextractor.Size} Object with |
+ * width/height properties. |
+ * @private |
+ */ |
+image.collections.extension.domextractor.DomUtils.getSizeWithDisplay_ = |
+ function(element) { |
+ var offsetWidth = element.offsetWidth; |
+ var offsetHeight = element.offsetHeight; |
+ var offsetsZero = !offsetWidth && !offsetHeight; |
+ if ((offsetWidth === undefined) || offsetsZero) { |
+ // Fall back to calling getBoundingClientRect when offsetWidth or |
+ // offsetHeight are not defined, or when they are zero. |
+ // This makes sure that we return for the correct size for SVG elements. |
+ var clientRect = element.getBoundingClientRect(); |
+ return new image.collections.extension.domextractor.Size( |
+ clientRect.right - clientRect.left, clientRect.bottom - clientRect.top); |
+ } |
+ return new image.collections.extension.domextractor.Size( |
+ offsetWidth, offsetHeight); |
+}; |
+ |
+ |
+/** |
+ * Returns the owner document for a node. |
+ * @param {!Node|!Window} node The node to get the document for. |
+ * @return {!Document} The document owning the node. |
+ */ |
+image.collections.extension.domextractor.DomUtils.getOwnerDocument = |
+ function(node) { |
+ return /** @type {!Document} */ (node.nodeType == DOCUMENT_NODE_TYPE ? |
+ node : node.ownerDocument || node.document); |
+}; |
+ |
+ |
+/** |
+ * Returns an element's parent, if it's an Element. |
+ * @param {Element} element The DOM element. |
+ * @return {Element} The parent, or null if not an Element. |
+ */ |
+image.collections.extension.domextractor.DomUtils.getParentElement = |
+ function(element) { |
+ if (element.parentElement) { |
+ return element.parentElement; |
+ } |
+ var parent = element.parentNode; |
+ if (typeof parent == 'object' && parent.nodeType == ELEMENT_NODE_TYPE) { |
+ return /** @type {!Element} */ (parent); |
+ } |
+ return null; |
+}; |
+ |
+ |
+/** |
+ * Returns the top coordinate of an element relative to the HTML document |
+ * @param {!Element} el Elements. |
+ * @return {number} The top coordinate. |
+ */ |
+image.collections.extension.domextractor.DomUtils.getPageOffsetTop = |
+ function(el) { |
+ var doc = DomUtils.getOwnerDocument(el); |
+ if (el == doc.documentElement) { |
+ // viewport is always at 0,0 as that defined the coordinate system for this |
+ // function - this avoids special case checks in the code below |
+ return 0; |
+ } |
+ |
+ // Must add the scroll coordinates in to get the absolute page offset |
+ // of element since getBoundingClientRect returns relative coordinates to |
+ // the viewport. |
+ var documentScrollElement = doc.body || doc.documentElement; |
+ var win = doc.defaultView; |
+ var scrollOffset = win.pageYOffset || documentScrollElement.scrollTop; |
+ |
+ return el.getBoundingClientRect().top + scrollOffset; |
+}; |
+ |
+ |
+/** |
+ * Returns the text content of the current node, without markup and invisible |
+ * symbols. New lines are stripped and whitespace is collapsed, |
+ * such that each character would be visible. |
+ * |
+ * @param {Node} node The node from which we are getting content. |
+ * @return {string} The text content. |
+ */ |
+DomUtils.getTextContent = function(node) { |
+ var textContent; |
+ var buf = []; |
+ DomUtils.getTextContent_(node, buf); |
+ textContent = buf.join(''); |
+ |
+ textContent = textContent.replace(/ +/g, ' '); |
+ if (textContent != ' ') { |
+ textContent = textContent.replace(/^\s*/, ''); |
+ } |
+ |
+ return textContent; |
+}; |
+ |
+/** |
+ * Recursive support function for text content retrieval. |
+ * |
+ * @param {Node} node The node from which we are getting content. |
+ * @param {Array<string>} buf string buffer. |
+ * @private |
+ */ |
+image.collections.extension.domextractor.DomUtils.getTextContent_ = |
+ function(node, buf) { |
+ if (node.nodeName in TAGS_TO_IGNORE) { |
+ // ignore certain tags |
+ } else if (node.nodeType == TEXT_NODE_TYPE) { |
+ // Text node |
+ buf.push(String(node.nodeValue).replace(/(\r\n|\r|\n)/g, '')); |
+ } else if (node.nodeName in PREDEFINED_TAG_VALUES) { |
+ buf.push(PREDEFINED_TAG_VALUES[node.nodeName]); |
+ } else { |
+ var child = node.firstChild; |
+ while (child) { |
+ DomUtils.getTextContent_(child, buf); |
+ child = child.nextSibling; |
+ } |
+ } |
+}; |
+ |
+ |
+/** |
+ * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric |
+ * entities. This function is XSS-safe and whitespace-preserving. |
+ * @param {string} str The string to unescape. |
+ * @param {Document=} opt_document An optional document to use for creating |
+ * elements. If this is not specified then the default window.document |
+ * will be used. |
+ * @return {string} The unescaped {@code str} string. |
+ */ |
+image.collections.extension.domextractor.DomUtils.unescapeEntitiesUsingDom = |
+ function(str, opt_document) { |
+ if (str.indexOf('&') == -1) { |
+ return str; |
+ } |
+ /** @type {!Object<string, string>} */ |
+ var seen = {'&': '&', '<': '<', '>': '>', '"': '"'}; |
+ var div; |
+ if (opt_document) { |
+ div = opt_document.createElement('div'); |
+ } else { |
+ div = document.createElement('div'); |
+ } |
+ // Match as many valid entity characters as possible. If the actual entity |
+ // happens to be shorter, it will still work as innerHTML will return the |
+ // trailing characters unchanged. Since the entity characters do not include |
+ // open angle bracket, there is no chance of XSS from the innerHTML use. |
+ // Since no whitespace is passed to innerHTML, whitespace is preserved. |
+ return str.replace(HTML_ENTITY_PATTERN, function(s, entity) { |
+ // Check for cached entity. |
+ var value = seen[s]; |
+ if (value) { |
+ return value; |
+ } |
+ // Check for numeric entity. |
+ if (entity.charAt(0) == '#') { |
+ // Prefix with 0 so that hex entities (e.g. ) parse as hex numbers. |
+ var n = Number('0' + entity.substr(1)); |
+ if (!isNaN(n)) { |
+ value = String.fromCharCode(n); |
+ } |
+ } |
+ // Fall back to innerHTML otherwise. |
+ if (!value) { |
+ // Append a non-entity character to avoid a bug in Webkit that parses |
+ // an invalid entity at the end of innerHTML text as the empty string. |
+ div.innerHTML = s + ' '; |
+ // Then remove the trailing character from the result. |
+ value = div.firstChild.nodeValue.slice(0, -1); |
+ } |
+ // Cache and return. |
+ return seen[s] = value; |
+ }); |
+}; |
+}); // goog.scope |