Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(361)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: nitpick fixed 2 Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 }-*/; 95 }-*/;
96 96
97 public static boolean isVisible(Element e) { 97 public static boolean isVisible(Element e) {
98 Style style = getComputedStyle(e); 98 Style style = getComputedStyle(e);
99 double opacity = JavaScript.parseFloat(style.getOpacity()); 99 double opacity = JavaScript.parseFloat(style.getOpacity());
100 return !(style.getDisplay().equals("none") || 100 return !(style.getDisplay().equals("none") ||
101 style.getVisibility().equals("hidden") || 101 style.getVisibility().equals("hidden") ||
102 opacity == 0.0F); 102 opacity == 0.0F);
103 } 103 }
104 104
105 /**
106 * Verifies if a given element is visible by checking its offset.
107 */
108 public static boolean isVisibleByOffset(Element e) {
109 // Detect whether any of the ancestors has "display: none".
110 // Using offsetParent alone wouldn't work because it's also null when p osition is fixed.
wychen 2016/06/07 13:43:11 nit: double space. null when
marcelorcorrea 2016/06/07 15:35:11 Done.
111 // Using offsetHeight/Width alone makes sense in production, but we have too many
112 // zero-sized elements in our tests.
113 return e.getOffsetParent() != null || e.getOffsetHeight() != 0 || e.getO ffsetWidth() != 0;
114 }
115
116 /**
117 * Get the element of the main article, if any.
118 * @return An element of article (not necessarily the html5 article element) .
119 */
120 public static Element getArticleElement(Element root) {
121 NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
122 List<Element> visibleElements = getVisibleElements(allArticles);
123 // Having multiple article elements usually indicates a bad case for thi s shortcut.
124 // TODO(wychen): some sites exclude things like title and author in arti cle element.
125 if (visibleElements.size() == 1) {
126 return visibleElements.get(0);
127 }
128 // Note that the CSS property matching is case sensitive, and "Article" is the correct
129 // capitalization.
130 String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype* =\"Post\"]";
131 allArticles = DomUtil.querySelectorAll(root, query);
132 visibleElements = getVisibleElements(allArticles);
133 // It is commonly seen that the article is wrapped separately or in mult iple layers.
134 if (visibleElements.size() > 0) {
135 return Element.as(DomUtil.getNearestCommonAncestor(visibleElements)) ;
136 }
137 return null;
138 }
139
140 /**
141 * Get a list of visible elements.
142 * @return A list of visible elements.
143 */
144 public static List<Element> getVisibleElements(NodeList<Element> nodeList) {
145 List<Element> visibleElements = new ArrayList<>();
146 for (int i = 0; i < nodeList.getLength(); i++) {
147 Element element = nodeList.getItem(i);
148 if (DomUtil.isVisible(element) &&
149 DomUtil.isVisibleByOffset(element)) {
150 visibleElements.add(element);
151 }
152 }
153 return visibleElements;
154 }
155
105 /* 156 /*
106 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's 157 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's
107 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old 158 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old
108 * IE behaviour, which returns text within <script> tags. 159 * IE behaviour, which returns text within <script> tags.
109 */ 160 */
110 public static native String getInnerText(Node node) /*-{ 161 public static native String getInnerText(Node node) /*-{
111 return node.innerText; 162 return node.innerText;
112 }-*/; 163 }-*/;
113 164
114 public static native double getTime() /*-{ 165 public static native double getTime() /*-{
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 215 */
165 public static Node getNearestCommonAncestor(final Node n1, final Node n2) { 216 public static Node getNearestCommonAncestor(final Node n1, final Node n2) {
166 Node parent = n1; 217 Node parent = n1;
167 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode(); 218 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode();
168 return parent; 219 return parent;
169 } 220 }
170 221
171 /** 222 /**
172 * Get the nearest common ancestor of nodes. 223 * Get the nearest common ancestor of nodes.
173 */ 224 */
174 public static Node getNearestCommonAncestor(final NodeList ns) { 225 public static Node getNearestCommonAncestor(final List<Element> ns) {
175 if (ns.getLength() == 0) return null; 226 if (ns.size() == 0) return null;
176 Node parent = ns.getItem(0); 227 Node parent = ns.get(0);
177 for (int i = 1; i < ns.getLength(); i++) { 228 for (int i = 1; i < ns.size(); i++) {
178 parent = getNearestCommonAncestor(parent, ns.getItem(i)); 229 parent = getNearestCommonAncestor(parent, ns.get(i));
179 } 230 }
180 return parent; 231 return parent;
181 } 232 }
182 233
183 /** 234 /**
184 * Get all text from a tree/sub-tree. 235 * Get all text from a tree/sub-tree.
185 * @param node The root of the tree. 236 * @param node The root of the tree.
186 * @return The text contained in this tree. 237 * @return The text contained in this tree.
187 */ 238 */
188 public static String getTextFromTree(Node node) { 239 public static String getTextFromTree(Node node) {
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 }-*/; 516 }-*/;
466 517
467 public static native Document createHTMLDocument(Document doc) /*-{ 518 public static native Document createHTMLDocument(Document doc) /*-{
468 return doc.implementation.createHTMLDocument(); 519 return doc.implementation.createHTMLDocument();
469 }-*/; 520 }-*/;
470 521
471 public static native Element getFirstElementChild(Document document) /*-{ 522 public static native Element getFirstElementChild(Document document) /*-{
472 return document.firstElementChild; 523 return document.firstElementChild;
473 }-*/; 524 }-*/;
474 } 525 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/ContentExtractor.java ('k') | javatests/org/chromium/distiller/ContentExtractorTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698