Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: isVisibleByOffset() method improved. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 }-*/; 95 }-*/;
96 96
97 public static boolean isVisible(Element e) { 97 public static boolean isVisible(Element e) {
98 Style style = getComputedStyle(e); 98 Style style = getComputedStyle(e);
99 double opacity = JavaScript.parseFloat(style.getOpacity()); 99 double opacity = JavaScript.parseFloat(style.getOpacity());
100 return !(style.getDisplay().equals("none") || 100 return !(style.getDisplay().equals("none") ||
101 style.getVisibility().equals("hidden") || 101 style.getVisibility().equals("hidden") ||
102 opacity == 0.0F); 102 opacity == 0.0F);
103 } 103 }
104 104
105 /**
106 * Verifies if a given element is visible by checking its offset.
107 */
108 public static boolean isVisibleByOffset(Element e) {
109 // Detect whether any of the ancestors has "display: none".
110 // Using offsetParent alone wouldn't work because it's also null
111 // when position is fixed.
112 // Using offsetHeight/Width alone makes sense in production,
113 // but we have too many zero-sized elements in our tests.
114 return e.getOffsetParent() != null || e.getOffsetHeight() != 0
115 || e.getOffsetWidth() != 0;
116 }
117
118 /**
119 * Get the element of the main article, if any.
120 * @return An element of article (not necessarily the html5 article element) .
121 */
122 public static Element getArticleElement(Element root) {
123 NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
124 List<Element> visibleElements = getVisibleElements(allArticles);
125 // Having multiple article elements usually indicates a bad case for thi s shortcut.
126 // TODO(wychen): some sites exclude things like title and author in arti cle element.
127 if (visibleElements.size() == 1) {
128 return visibleElements.get(0);
129 }
130 // Note that the CSS property matching is case sensitive, and "Article" is the correct
131 // capitalization.
132 String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype* =\"Post\"]";
133 allArticles = DomUtil.querySelectorAll(root, query);
134 visibleElements = getVisibleElements(allArticles);
135 // It is commonly seen that the article is wrapped separately or in mult iple layers.
136 if (visibleElements.size() > 0) {
137 return Element.as(DomUtil.getNearestCommonAncestor(visibleElements)) ;
138 }
139 return null;
140 }
141
142 /**
143 * Get a list of visible elements.
144 * @return A list of visible elements.
145 */
146 public static List<Element> getVisibleElements(NodeList<Element> nodeList) {
wychen 2016/06/06 18:11:22 nit: I meant the double space public static List<E
marcelorcorrea 2016/06/06 19:21:59 Done.
147 List<Element> visibleElements = new ArrayList<>();
148 for (int i = 0; i < nodeList.getLength(); i++) {
149 Element element = nodeList.getItem(i);
150 if (DomUtil.isVisible(element) && DomUtil.isVisibleByOffset(element) ) {
151 visibleElements.add(element);
152 }
153 }
154 return visibleElements;
155 }
156
105 /* 157 /*
106 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's 158 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's
107 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old 159 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old
108 * IE behaviour, which returns text within <script> tags. 160 * IE behaviour, which returns text within <script> tags.
109 */ 161 */
110 public static native String getInnerText(Node node) /*-{ 162 public static native String getInnerText(Node node) /*-{
111 return node.innerText; 163 return node.innerText;
112 }-*/; 164 }-*/;
113 165
114 public static native double getTime() /*-{ 166 public static native double getTime() /*-{
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 216 */
165 public static Node getNearestCommonAncestor(final Node n1, final Node n2) { 217 public static Node getNearestCommonAncestor(final Node n1, final Node n2) {
166 Node parent = n1; 218 Node parent = n1;
167 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode(); 219 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode();
168 return parent; 220 return parent;
169 } 221 }
170 222
171 /** 223 /**
172 * Get the nearest common ancestor of nodes. 224 * Get the nearest common ancestor of nodes.
173 */ 225 */
174 public static Node getNearestCommonAncestor(final NodeList ns) { 226 public static Node getNearestCommonAncestor(final List<Element> ns) {
175 if (ns.getLength() == 0) return null; 227 if (ns.size() == 0) return null;
176 Node parent = ns.getItem(0); 228 Node parent = ns.get(0);
177 for (int i = 1; i < ns.getLength(); i++) { 229 for (int i = 1; i < ns.size(); i++) {
178 parent = getNearestCommonAncestor(parent, ns.getItem(i)); 230 parent = getNearestCommonAncestor(parent, ns.get(i));
179 } 231 }
180 return parent; 232 return parent;
181 } 233 }
182 234
183 /** 235 /**
184 * Get all text from a tree/sub-tree. 236 * Get all text from a tree/sub-tree.
185 * @param node The root of the tree. 237 * @param node The root of the tree.
186 * @return The text contained in this tree. 238 * @return The text contained in this tree.
187 */ 239 */
188 public static String getTextFromTree(Node node) { 240 public static String getTextFromTree(Node node) {
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 }-*/; 517 }-*/;
466 518
467 public static native Document createHTMLDocument(Document doc) /*-{ 519 public static native Document createHTMLDocument(Document doc) /*-{
468 return doc.implementation.createHTMLDocument(); 520 return doc.implementation.createHTMLDocument();
469 }-*/; 521 }-*/;
470 522
471 public static native Element getFirstElementChild(Document document) /*-{ 523 public static native Element getFirstElementChild(Document document) /*-{
472 return document.firstElementChild; 524 return document.firstElementChild;
473 }-*/; 525 }-*/;
474 } 526 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698