Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(404)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Method and unit tests names changed Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 }-*/; 95 }-*/;
96 96
97 public static boolean isVisible(Element e) { 97 public static boolean isVisible(Element e) {
98 Style style = getComputedStyle(e); 98 Style style = getComputedStyle(e);
99 double opacity = JavaScript.parseFloat(style.getOpacity()); 99 double opacity = JavaScript.parseFloat(style.getOpacity());
100 return !(style.getDisplay().equals("none") || 100 return !(style.getDisplay().equals("none") ||
101 style.getVisibility().equals("hidden") || 101 style.getVisibility().equals("hidden") ||
102 opacity == 0.0F); 102 opacity == 0.0F);
103 } 103 }
104 104
105 /**
106 * Verifies if a given element is visible by checking its offset.
107 */
108 public static boolean isVisibleByOffset(Element e) {
109 return !(e.getOffsetHeight() <= 0 || e.getOffsetWidth() <= 0);
110 }
111
112 /**
113 * Get the element of the main article, if any.
114 * @return An element of article (not necessarily the html5 article element) .
115 */
116 public static Element getArticleElement(Element root) {
117 NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
118 List<Element> visibleElements = getVisibleElements(allArticles);
119 // Having multiple article elements usually indicates a bad case for thi s shortcut.
120 // TODO(wychen): some sites exclude things like title and author in arti cle element.
121 if (visibleElements.size() == 1) {
122 return visibleElements.get(0);
123 }
124 // Note that the CSS property matching is case sensitive, and "Article" is the correct
125 // capitalization.
126 String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype* =\"Post\"]";
127 allArticles = DomUtil.querySelectorAll(root, query);
128 visibleElements = getVisibleElements(allArticles);
129 // It is commonly seen that the article is wrapped separately or in mult iple layers.
130 if (visibleElements.size() > 0) {
131 return Element.as(DomUtil.getNearestCommonAncestor(visibleElements)) ;
132 }
133 return null;
134 }
135
136 /**
137 * Get a list of visible elements.
138 * @return A list of visible elements.
139 */
140 public static List<Element> getVisibleElements(
wychen 2016/06/03 16:49:53 nit: space.
marcelorcorrea 2016/06/06 13:17:57 Done.
141 NodeList<Element> nodeList) {
wychen 2016/06/03 16:49:53 Nitpick: Why wrap here? I think we are using 100ch
marcelorcorrea 2016/06/06 13:17:57 Done.
142 List<Element> visibleElements = new ArrayList<>();
143 for (int i = 0; i < nodeList.getLength(); i++) {
144 Element element = nodeList.getItem(i);
145 if (DomUtil.isVisible(element) &&
146 DomUtil.isVisibleByOffset(element)) {
147 visibleElements.add(element);
148 }
149 }
150 return visibleElements;
151 }
152
105 /* 153 /*
106 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's 154 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's
107 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old 155 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old
108 * IE behaviour, which returns text within <script> tags. 156 * IE behaviour, which returns text within <script> tags.
109 */ 157 */
110 public static native String getInnerText(Node node) /*-{ 158 public static native String getInnerText(Node node) /*-{
111 return node.innerText; 159 return node.innerText;
112 }-*/; 160 }-*/;
113 161
114 public static native double getTime() /*-{ 162 public static native double getTime() /*-{
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 212 */
165 public static Node getNearestCommonAncestor(final Node n1, final Node n2) { 213 public static Node getNearestCommonAncestor(final Node n1, final Node n2) {
166 Node parent = n1; 214 Node parent = n1;
167 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode(); 215 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode();
168 return parent; 216 return parent;
169 } 217 }
170 218
171 /** 219 /**
172 * Get the nearest common ancestor of nodes. 220 * Get the nearest common ancestor of nodes.
173 */ 221 */
174 public static Node getNearestCommonAncestor(final NodeList ns) { 222 public static Node getNearestCommonAncestor(final List<Element> ns) {
175 if (ns.getLength() == 0) return null; 223 if (ns.size() == 0) return null;
176 Node parent = ns.getItem(0); 224 Node parent = ns.get(0);
177 for (int i = 1; i < ns.getLength(); i++) { 225 for (int i = 1; i < ns.size(); i++) {
178 parent = getNearestCommonAncestor(parent, ns.getItem(i)); 226 parent = getNearestCommonAncestor(parent, ns.get(i));
179 } 227 }
180 return parent; 228 return parent;
181 } 229 }
182 230
183 /** 231 /**
184 * Get all text from a tree/sub-tree. 232 * Get all text from a tree/sub-tree.
185 * @param node The root of the tree. 233 * @param node The root of the tree.
186 * @return The text contained in this tree. 234 * @return The text contained in this tree.
187 */ 235 */
188 public static String getTextFromTree(Node node) { 236 public static String getTextFromTree(Node node) {
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 }-*/; 513 }-*/;
466 514
467 public static native Document createHTMLDocument(Document doc) /*-{ 515 public static native Document createHTMLDocument(Document doc) /*-{
468 return doc.implementation.createHTMLDocument(); 516 return doc.implementation.createHTMLDocument();
469 }-*/; 517 }-*/;
470 518
471 public static native Element getFirstElementChild(Document document) /*-{ 519 public static native Element getFirstElementChild(Document document) /*-{
472 return document.firstElementChild; 520 return document.firstElementChild;
473 }-*/; 521 }-*/;
474 } 522 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698