Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: third_party/document_image_extractor/src/dom_utils.js

Issue 1138123002: Update third_party/document_image_extractor (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 /**
6 * @fileoverview Provides copied versions of Closure library functions. The
7 * functions in this file are modified to remove non-Chrome compatibility
8 * code.
9 */
10 goog.provide('image.collections.extension.domextractor.DomUtils');
11
12 goog.require('image.collections.extension.domextractor.Size');
13
14
15 goog.scope(function() {
16 var DomUtils = image.collections.extension.domextractor.DomUtils;
17
18
19 /**
20 * Inherit the prototype methods from one constructor into another.
21 *
22 * Usage:
23 * <pre>
24 * function ParentClass(a, b) { }
25 * ParentClass.prototype.foo = function(a) { };
26 *
27 * function ChildClass(a, b, c) {
28 * ChildClass.base(this, 'constructor', a, b);
29 * }
30 * DomUtils.inherits(ChildClass, ParentClass);
31 *
32 * var child = new ChildClass('a', 'b', 'see');
33 * child.foo(); // This works.
34 * </pre>
35 *
36 * @param {Function} childCtor Child class.
37 * @param {Function} parentCtor Parent class.
38 */
39 image.collections.extension.domextractor.DomUtils.inherits =
40 function(childCtor, parentCtor) {
41 /** @constructor */
42 function tempCtor() {};
43 tempCtor.prototype = parentCtor.prototype;
44 childCtor.prototype = new tempCtor();
45 /** @override */
46 childCtor.prototype.constructor = childCtor;
47
48 /**
49 * Calls superclass constructor/method.
50 *
51 * @param {!Object} me Should always be "this".
52 * @param {string} methodName The method name to call. Calling
53 * superclass constructor can be done with the special string
54 * 'constructor'.
55 * @param {...*} var_args The arguments to pass to superclass
56 * method/constructor.
57 * @return {*} The return value of the superclass method/constructor.
58 */
59 childCtor.base = function(me, methodName, var_args) {
60 // Copying using loop to avoid deop due to passing arguments object to
61 // function. This is faster in many JS engines as of late 2014.
62 var args = new Array(arguments.length - 2);
63 for (var i = 2; i < arguments.length; i++) {
64 args[i - 2] = arguments[i];
65 }
66 return parentCtor.prototype[methodName].apply(me, args);
67 };
68 };
69
70
71 /**
72 * Map of tags whose content to ignore when calculating text length.
73 * @const {!Object<string, number>}
74 */
75 var TAGS_TO_IGNORE = {
76 'SCRIPT': 1,
77 'STYLE': 1,
78 'HEAD': 1,
79 'IFRAME': 1,
80 'OBJECT': 1
81 };
82
83 /**
84 * Map of tags which have predefined values with regard to whitespace.
85 * @const {!Object<string, string>}
86 */
87 var PREDEFINED_TAG_VALUES = {'IMG': ' ', 'BR': '\n'};
88
89 /** @const {number} */
90 var ELEMENT_NODE_TYPE = 1;
91
92 /** @const {number} */
93 var TEXT_NODE_TYPE = 3;
94
95 /** @const {number} */
96 var DOCUMENT_NODE_TYPE = 9;
97
98 /**
99 * Regular expression that matches an HTML entity.
100 * See also HTML5: Tokenization / Tokenizing character references.
101 * @type {!RegExp}
102 */
103 var HTML_ENTITY_PATTERN = /&([^;\s<&]+);?/g;
104
105
106
107 /**
108 * Retrieves a computed style value of a node. It returns empty string if the
109 * value cannot be computed (which will be the case in Internet Explorer) or
110 * "none" if the property requested is an SVG one and it has not been
111 * explicitly set (firefox and webkit).
112 *
113 * @param {!Element} element Element to get style of.
114 * @param {string} property Property to get (camel-case).
115 * @return {string} Style value.
116 */
117 image.collections.extension.domextractor.DomUtils.getComputedStyle =
118 function(element, property) {
119 var doc = DomUtils.getOwnerDocument(element);
120 if (doc.defaultView && doc.defaultView.getComputedStyle) {
121 var styles = doc.defaultView.getComputedStyle(element, null);
122 if (styles) {
123 // element.style[..] is undefined for browser specific styles
124 // as 'filter'.
125 return styles[property] || styles.getPropertyValue(property) || '';
126 }
127 }
128
129 return '';
130 };
131
132
133 /**
134 * Gets the height and width of an element, even if its display is none.
135 *
136 * Specifically, this returns the height and width of the border box,
137 * irrespective of the box model in effect.
138 *
139 * Note that this function does not take CSS transforms into account.
140 * @param {!Element} element Element to get size of.
141 * @return {!image.collections.extension.domextractor.Size} Object with
142 * width/height properties.
143 */
144 image.collections.extension.domextractor.DomUtils.getSize = function(element) {
145 if (DomUtils.getComputedStyle(element, 'display') != 'none') {
146 return DomUtils.getSizeWithDisplay_(element);
147 }
148
149 var style = element.style;
150 var originalDisplay = style.display;
151 var originalVisibility = style.visibility;
152 var originalPosition = style.position;
153
154 style.visibility = 'hidden';
155 style.position = 'absolute';
156 style.display = 'inline';
157
158 var retVal = DomUtils.getSizeWithDisplay_(element);
159
160 style.display = originalDisplay;
161 style.position = originalPosition;
162 style.visibility = originalVisibility;
163
164 return retVal;
165 };
166
167
168 /**
169 * Gets the height and width of an element when the display is not none.
170 * @param {!Element} element Element to get size of.
171 * @return {!image.collections.extension.domextractor.Size} Object with
172 * width/height properties.
173 * @private
174 */
175 image.collections.extension.domextractor.DomUtils.getSizeWithDisplay_ =
176 function(element) {
177 var offsetWidth = element.offsetWidth;
178 var offsetHeight = element.offsetHeight;
179 var offsetsZero = !offsetWidth && !offsetHeight;
180 if ((offsetWidth === undefined) || offsetsZero) {
181 // Fall back to calling getBoundingClientRect when offsetWidth or
182 // offsetHeight are not defined, or when they are zero.
183 // This makes sure that we return for the correct size for SVG elements.
184 var clientRect = element.getBoundingClientRect();
185 return new image.collections.extension.domextractor.Size(
186 clientRect.right - clientRect.left, clientRect.bottom - clientRect.top);
187 }
188 return new image.collections.extension.domextractor.Size(
189 offsetWidth, offsetHeight);
190 };
191
192
193 /**
194 * Returns the owner document for a node.
195 * @param {!Node|!Window} node The node to get the document for.
196 * @return {!Document} The document owning the node.
197 */
198 image.collections.extension.domextractor.DomUtils.getOwnerDocument =
199 function(node) {
200 return /** @type {!Document} */ (node.nodeType == DOCUMENT_NODE_TYPE ?
201 node : node.ownerDocument || node.document);
202 };
203
204
205 /**
206 * Returns an element's parent, if it's an Element.
207 * @param {Element} element The DOM element.
208 * @return {Element} The parent, or null if not an Element.
209 */
210 image.collections.extension.domextractor.DomUtils.getParentElement =
211 function(element) {
212 if (element.parentElement) {
213 return element.parentElement;
214 }
215 var parent = element.parentNode;
216 if (typeof parent == 'object' && parent.nodeType == ELEMENT_NODE_TYPE) {
217 return /** @type {!Element} */ (parent);
218 }
219 return null;
220 };
221
222
223 /**
224 * Returns the top coordinate of an element relative to the HTML document
225 * @param {!Element} el Elements.
226 * @return {number} The top coordinate.
227 */
228 image.collections.extension.domextractor.DomUtils.getPageOffsetTop =
229 function(el) {
230 var doc = DomUtils.getOwnerDocument(el);
231 if (el == doc.documentElement) {
232 // viewport is always at 0,0 as that defined the coordinate system for this
233 // function - this avoids special case checks in the code below
234 return 0;
235 }
236
237 // Must add the scroll coordinates in to get the absolute page offset
238 // of element since getBoundingClientRect returns relative coordinates to
239 // the viewport.
240 var documentScrollElement = doc.body || doc.documentElement;
241 var win = doc.defaultView;
242 var scrollOffset = win.pageYOffset || documentScrollElement.scrollTop;
243
244 return el.getBoundingClientRect().top + scrollOffset;
245 };
246
247
248 /**
249 * Returns the text content of the current node, without markup and invisible
250 * symbols. New lines are stripped and whitespace is collapsed,
251 * such that each character would be visible.
252 *
253 * @param {Node} node The node from which we are getting content.
254 * @return {string} The text content.
255 */
256 DomUtils.getTextContent = function(node) {
257 var textContent;
258 var buf = [];
259 DomUtils.getTextContent_(node, buf);
260 textContent = buf.join('');
261
262 textContent = textContent.replace(/ +/g, ' ');
263 if (textContent != ' ') {
264 textContent = textContent.replace(/^\s*/, '');
265 }
266
267 return textContent;
268 };
269
270 /**
271 * Recursive support function for text content retrieval.
272 *
273 * @param {Node} node The node from which we are getting content.
274 * @param {Array<string>} buf string buffer.
275 * @private
276 */
277 image.collections.extension.domextractor.DomUtils.getTextContent_ =
278 function(node, buf) {
279 if (node.nodeName in TAGS_TO_IGNORE) {
280 // ignore certain tags
281 } else if (node.nodeType == TEXT_NODE_TYPE) {
282 // Text node
283 buf.push(String(node.nodeValue).replace(/(\r\n|\r|\n)/g, ''));
284 } else if (node.nodeName in PREDEFINED_TAG_VALUES) {
285 buf.push(PREDEFINED_TAG_VALUES[node.nodeName]);
286 } else {
287 var child = node.firstChild;
288 while (child) {
289 DomUtils.getTextContent_(child, buf);
290 child = child.nextSibling;
291 }
292 }
293 };
294
295
296 /**
297 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
298 * entities. This function is XSS-safe and whitespace-preserving.
299 * @param {string} str The string to unescape.
300 * @param {Document=} opt_document An optional document to use for creating
301 * elements. If this is not specified then the default window.document
302 * will be used.
303 * @return {string} The unescaped {@code str} string.
304 */
305 image.collections.extension.domextractor.DomUtils.unescapeEntitiesUsingDom =
306 function(str, opt_document) {
307 if (str.indexOf('&') == -1) {
308 return str;
309 }
310 /** @type {!Object<string, string>} */
311 var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
312 var div;
313 if (opt_document) {
314 div = opt_document.createElement('div');
315 } else {
316 div = document.createElement('div');
317 }
318 // Match as many valid entity characters as possible. If the actual entity
319 // happens to be shorter, it will still work as innerHTML will return the
320 // trailing characters unchanged. Since the entity characters do not include
321 // open angle bracket, there is no chance of XSS from the innerHTML use.
322 // Since no whitespace is passed to innerHTML, whitespace is preserved.
323 return str.replace(HTML_ENTITY_PATTERN, function(s, entity) {
324 // Check for cached entity.
325 var value = seen[s];
326 if (value) {
327 return value;
328 }
329 // Check for numeric entity.
330 if (entity.charAt(0) == '#') {
331 // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
332 var n = Number('0' + entity.substr(1));
333 if (!isNaN(n)) {
334 value = String.fromCharCode(n);
335 }
336 }
337 // Fall back to innerHTML otherwise.
338 if (!value) {
339 // Append a non-entity character to avoid a bug in Webkit that parses
340 // an invalid entity at the end of innerHTML text as the empty string.
341 div.innerHTML = s + ' ';
342 // Then remove the trailing character from the result.
343 value = div.firstChild.nodeValue.slice(0, -1);
344 }
345 // Cache and return.
346 return seen[s] = value;
347 });
348 };
349 }); // goog.scope
OLDNEW
« no previous file with comments | « third_party/document_image_extractor/src/dom_event.js ('k') | third_party/document_image_extractor/src/element_filter.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698