third_party/document_image_extractor/src/dom_utils.js - Issue 1138123002: Update third_party/document_image_extractor

Unified Diff: third_party/document_image_extractor/src/dom_utils.js

Issue 1138123002: Update third_party/document_image_extractor (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/document_image_extractor/src/dom_event.js ('k') | third_party/document_image_extractor/src/element_filter.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/document_image_extractor/src/dom_utils.js

diff --git a/third_party/document_image_extractor/src/dom_utils.js b/third_party/document_image_extractor/src/dom_utils.js

new file mode 100644

index 0000000000000000000000000000000000000000..e8127b4d9ceaa543824777b24e08a955c53c93c8

--- /dev/null

+++ b/third_party/document_image_extractor/src/dom_utils.js

@@ -0,0 +1,349 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+/**

+ * @fileoverview Provides copied versions of Closure library functions. The

+ * functions in this file are modified to remove non-Chrome compatibility

+ * code.

+ */

+goog.provide('image.collections.extension.domextractor.DomUtils');

+goog.require('image.collections.extension.domextractor.Size');

+goog.scope(function() {

+var DomUtils = image.collections.extension.domextractor.DomUtils;

+/**

+ * Inherit the prototype methods from one constructor into another.

+ *

+ * Usage:

+ * <pre>

+ * function ParentClass(a, b) { }

+ * ParentClass.prototype.foo = function(a) { };

+ *

+ * function ChildClass(a, b, c) {

+ * ChildClass.base(this, 'constructor', a, b);

+ * }

+ * DomUtils.inherits(ChildClass, ParentClass);

+ *

+ * var child = new ChildClass('a', 'b', 'see');

+ * child.foo(); // This works.

+ * </pre>

+ *

+ * @param {Function} childCtor Child class.

+ * @param {Function} parentCtor Parent class.

+ */

+image.collections.extension.domextractor.DomUtils.inherits =

+ function(childCtor, parentCtor) {

+ /** @constructor */

+ function tempCtor() {};

+ tempCtor.prototype = parentCtor.prototype;

+ childCtor.prototype = new tempCtor();

+ /** @override */

+ childCtor.prototype.constructor = childCtor;

+ /**

+ * Calls superclass constructor/method.

+ *

+ * @param {!Object} me Should always be "this".

+ * @param {string} methodName The method name to call. Calling

+ * superclass constructor can be done with the special string

+ * 'constructor'.

+ * @param {...*} var_args The arguments to pass to superclass

+ * method/constructor.

+ * @return {*} The return value of the superclass method/constructor.

+ */

+ childCtor.base = function(me, methodName, var_args) {

+ // Copying using loop to avoid deop due to passing arguments object to

+ // function. This is faster in many JS engines as of late 2014.

+ var args = new Array(arguments.length - 2);

+ for (var i = 2; i < arguments.length; i++) {

+ args[i - 2] = arguments[i];

+ }

+ return parentCtor.prototype[methodName].apply(me, args);

+ };

+};

+/**

+ * Map of tags whose content to ignore when calculating text length.

+ * @const {!Object<string, number>}

+ */

+var TAGS_TO_IGNORE = {

+ 'SCRIPT': 1,

+ 'STYLE': 1,

+ 'HEAD': 1,

+ 'IFRAME': 1,

+ 'OBJECT': 1

+};

+/**

+ * Map of tags which have predefined values with regard to whitespace.

+ * @const {!Object<string, string>}

+ */

+var PREDEFINED_TAG_VALUES = {'IMG': ' ', 'BR': '\n'};

+/** @const {number} */

+var ELEMENT_NODE_TYPE = 1;

+/** @const {number} */

+var TEXT_NODE_TYPE = 3;

+/** @const {number} */

+var DOCUMENT_NODE_TYPE = 9;

+/**

+ * Regular expression that matches an HTML entity.

+ * See also HTML5: Tokenization / Tokenizing character references.

+ * @type {!RegExp}

+ */

+var HTML_ENTITY_PATTERN = /&([^;\s<&]+);?/g;

+/**

+ * Retrieves a computed style value of a node. It returns empty string if the

+ * value cannot be computed (which will be the case in Internet Explorer) or

+ * "none" if the property requested is an SVG one and it has not been

+ * explicitly set (firefox and webkit).

+ *

+ * @param {!Element} element Element to get style of.

+ * @param {string} property Property to get (camel-case).

+ * @return {string} Style value.

+ */

+image.collections.extension.domextractor.DomUtils.getComputedStyle =

+ function(element, property) {

+ var doc = DomUtils.getOwnerDocument(element);

+ if (doc.defaultView && doc.defaultView.getComputedStyle) {

+ var styles = doc.defaultView.getComputedStyle(element, null);

+ if (styles) {

+ // element.style[..] is undefined for browser specific styles

+ // as 'filter'.

+ return styles[property] || styles.getPropertyValue(property) || '';

+ }

+ return '';

+};

+/**

+ * Gets the height and width of an element, even if its display is none.

+ *

+ * Specifically, this returns the height and width of the border box,

+ * irrespective of the box model in effect.

+ *

+ * Note that this function does not take CSS transforms into account.

+ * @param {!Element} element Element to get size of.

+ * @return {!image.collections.extension.domextractor.Size} Object with

+ * width/height properties.

+ */

+image.collections.extension.domextractor.DomUtils.getSize = function(element) {

+ if (DomUtils.getComputedStyle(element, 'display') != 'none') {

+ return DomUtils.getSizeWithDisplay_(element);

+ }

+ var style = element.style;

+ var originalDisplay = style.display;

+ var originalVisibility = style.visibility;

+ var originalPosition = style.position;

+ style.visibility = 'hidden';

+ style.position = 'absolute';

+ style.display = 'inline';

+ var retVal = DomUtils.getSizeWithDisplay_(element);

+ style.display = originalDisplay;

+ style.position = originalPosition;

+ style.visibility = originalVisibility;

+ return retVal;

+};

+/**

+ * Gets the height and width of an element when the display is not none.

+ * @param {!Element} element Element to get size of.

+ * @return {!image.collections.extension.domextractor.Size} Object with

+ * width/height properties.

+ * @private

+ */

+image.collections.extension.domextractor.DomUtils.getSizeWithDisplay_ =

+ function(element) {

+ var offsetWidth = element.offsetWidth;

+ var offsetHeight = element.offsetHeight;

+ var offsetsZero = !offsetWidth && !offsetHeight;

+ if ((offsetWidth === undefined) || offsetsZero) {

+ // Fall back to calling getBoundingClientRect when offsetWidth or

+ // offsetHeight are not defined, or when they are zero.

+ // This makes sure that we return for the correct size for SVG elements.

+ var clientRect = element.getBoundingClientRect();

+ return new image.collections.extension.domextractor.Size(

+ clientRect.right - clientRect.left, clientRect.bottom - clientRect.top);

+ }

+ return new image.collections.extension.domextractor.Size(

+ offsetWidth, offsetHeight);

+};

+/**

+ * Returns the owner document for a node.

+ * @param {!Node|!Window} node The node to get the document for.

+ * @return {!Document} The document owning the node.

+ */

+image.collections.extension.domextractor.DomUtils.getOwnerDocument =

+ function(node) {

+ return /** @type {!Document} */ (node.nodeType == DOCUMENT_NODE_TYPE ?

+ node : node.ownerDocument || node.document);

+};

+/**

+ * Returns an element's parent, if it's an Element.

+ * @param {Element} element The DOM element.

+ * @return {Element} The parent, or null if not an Element.

+ */

+image.collections.extension.domextractor.DomUtils.getParentElement =

+ function(element) {

+ if (element.parentElement) {

+ return element.parentElement;

+ }

+ var parent = element.parentNode;

+ if (typeof parent == 'object' && parent.nodeType == ELEMENT_NODE_TYPE) {

+ return /** @type {!Element} */ (parent);

+ }

+ return null;

+};

+/**

+ * Returns the top coordinate of an element relative to the HTML document

+ * @param {!Element} el Elements.

+ * @return {number} The top coordinate.

+ */

+image.collections.extension.domextractor.DomUtils.getPageOffsetTop =

+ function(el) {

+ var doc = DomUtils.getOwnerDocument(el);

+ if (el == doc.documentElement) {

+ // viewport is always at 0,0 as that defined the coordinate system for this

+ // function - this avoids special case checks in the code below

+ return 0;

+ }

+ // Must add the scroll coordinates in to get the absolute page offset

+ // of element since getBoundingClientRect returns relative coordinates to

+ // the viewport.

+ var documentScrollElement = doc.body || doc.documentElement;

+ var win = doc.defaultView;

+ var scrollOffset = win.pageYOffset || documentScrollElement.scrollTop;

+ return el.getBoundingClientRect().top + scrollOffset;

+};

+/**

+ * Returns the text content of the current node, without markup and invisible

+ * symbols. New lines are stripped and whitespace is collapsed,

+ * such that each character would be visible.

+ *

+ * @param {Node} node The node from which we are getting content.

+ * @return {string} The text content.

+ */

+DomUtils.getTextContent = function(node) {

+ var textContent;

+ var buf = [];

+ DomUtils.getTextContent_(node, buf);

+ textContent = buf.join('');

+ textContent = textContent.replace(/ +/g, ' ');

+ if (textContent != ' ') {

+ textContent = textContent.replace(/^\s*/, '');

+ }

+ return textContent;

+};

+/**

+ * Recursive support function for text content retrieval.

+ *

+ * @param {Node} node The node from which we are getting content.

+ * @param {Array<string>} buf string buffer.

+ * @private

+ */

+image.collections.extension.domextractor.DomUtils.getTextContent_ =

+ function(node, buf) {

+ if (node.nodeName in TAGS_TO_IGNORE) {

+ // ignore certain tags

+ } else if (node.nodeType == TEXT_NODE_TYPE) {

+ // Text node

+ buf.push(String(node.nodeValue).replace(/(\r\n|\r|\n)/g, ''));

+ } else if (node.nodeName in PREDEFINED_TAG_VALUES) {

+ buf.push(PREDEFINED_TAG_VALUES[node.nodeName]);

+ } else {

+ var child = node.firstChild;

+ while (child) {

+ DomUtils.getTextContent_(child, buf);

+ child = child.nextSibling;

+ }

+};

+/**

+ * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric

+ * entities. This function is XSS-safe and whitespace-preserving.

+ * @param {string} str The string to unescape.

+ * @param {Document=} opt_document An optional document to use for creating

+ * elements. If this is not specified then the default window.document

+ * will be used.

+ * @return {string} The unescaped {@code str} string.

+ */

+image.collections.extension.domextractor.DomUtils.unescapeEntitiesUsingDom =

+ function(str, opt_document) {

+ if (str.indexOf('&') == -1) {

+ return str;

+ }

+ /** @type {!Object<string, string>} */

+ var seen = {'&': '&', '<': '<', '>': '>', '"': '"'};

+ var div;

+ if (opt_document) {

+ div = opt_document.createElement('div');

+ } else {

+ div = document.createElement('div');

+ }

+ // Match as many valid entity characters as possible. If the actual entity

+ // happens to be shorter, it will still work as innerHTML will return the

+ // trailing characters unchanged. Since the entity characters do not include

+ // open angle bracket, there is no chance of XSS from the innerHTML use.

+ // Since no whitespace is passed to innerHTML, whitespace is preserved.

+ return str.replace(HTML_ENTITY_PATTERN, function(s, entity) {

+ // Check for cached entity.

+ var value = seen[s];

+ if (value) {

+ return value;

+ }

+ // Check for numeric entity.

+ if (entity.charAt(0) == '#') {

+ // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.

+ var n = Number('0' + entity.substr(1));

+ if (!isNaN(n)) {

+ value = String.fromCharCode(n);

+ }

+ // Fall back to innerHTML otherwise.

+ if (!value) {

+ // Append a non-entity character to avoid a bug in Webkit that parses

+ // an invalid entity at the end of innerHTML text as the empty string.

+ div.innerHTML = s + ' ';

+ // Then remove the trailing character from the result.

+ value = div.firstChild.nodeValue.slice(0, -1);

+ }

+ // Cache and return.

+ return seen[s] = value;

+ });

+};

+}); // goog.scope