Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(523)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: wychen's comments addressed Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 }-*/; 95 }-*/;
96 96
97 public static boolean isVisible(Element e) { 97 public static boolean isVisible(Element e) {
98 Style style = getComputedStyle(e); 98 Style style = getComputedStyle(e);
99 double opacity = JavaScript.parseFloat(style.getOpacity()); 99 double opacity = JavaScript.parseFloat(style.getOpacity());
100 return !(style.getDisplay().equals("none") || 100 return !(style.getDisplay().equals("none") ||
101 style.getVisibility().equals("hidden") || 101 style.getVisibility().equals("hidden") ||
102 opacity == 0.0F); 102 opacity == 0.0F);
103 } 103 }
104 104
105 /**
106 * Verifies if a given element is visible by checking its offset.
107 */
108 public static boolean isVisibleByItsOffset(Element e) {
109 return !(e.getOffsetHeight() <= 0 || e.getOffsetWidth() <= 0);
wychen 2015/12/10 23:33:36 I amended the current isVisible() implementation i
wychen 2016/06/01 08:46:30 Let's just use the logic from there without rebasi
110 }
111
112 /**
113 * Get the element of the main article, if any.
114 * @return An element of article (not necessarily the html5 article element) .
115 */
116 public static Element getArticleElement(Element root) {
117 NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
118 List<Element> visibleElements = getVisibleElements(allArticles);
119 // Having multiple article elements usually indicates a bad case for thi s shortcut.
120 // TODO(wychen): some sites exclude things like title and author in arti cle element.
121 if (visibleElements.size() == 1) {
122 return visibleElements.get(0);
123 }
124 // Note that the CSS property matching is case sensitive, and "Article" is the correct
125 // capitalization.
126 String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype* =\"Post\"]";
127 allArticles = DomUtil.querySelectorAll(root, query);
128 visibleElements = getVisibleElements(allArticles);
129 // It is commonly seen that the article is wrapped separately or in mult iple layers.
130 if (visibleElements.size() > 0) {
131 return Element.as(DomUtil.getNearestCommonAncestor(visibleElements)) ;
132 }
133 return null;
134 }
135
136 /**
137 * Get a list of visible elements.
138 * @return A list of visible elements.
139 */
140 public static List<Element> getVisibleElements(NodeList<Element> nodeList) {
141 List<Element> visibleElements = new ArrayList<>();
142 for (int i = 0; i < nodeList.getLength(); i++) {
143 Element element = nodeList.getItem(i);
144 if (DomUtil.isVisible(element) && DomUtil.isVisibleByItsOffset(eleme nt)) {
145 visibleElements.add(element);
146 }
147 }
148 return visibleElements;
149 }
150
105 /* 151 /*
106 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's 152 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's
107 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old 153 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old
108 * IE behaviour, which returns text within <script> tags. 154 * IE behaviour, which returns text within <script> tags.
109 */ 155 */
110 public static native String getInnerText(Node node) /*-{ 156 public static native String getInnerText(Node node) /*-{
111 return node.innerText; 157 return node.innerText;
112 }-*/; 158 }-*/;
113 159
114 public static native double getTime() /*-{ 160 public static native double getTime() /*-{
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 210 */
165 public static Node getNearestCommonAncestor(final Node n1, final Node n2) { 211 public static Node getNearestCommonAncestor(final Node n1, final Node n2) {
166 Node parent = n1; 212 Node parent = n1;
167 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode(); 213 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode();
168 return parent; 214 return parent;
169 } 215 }
170 216
171 /** 217 /**
172 * Get the nearest common ancestor of nodes. 218 * Get the nearest common ancestor of nodes.
173 */ 219 */
174 public static Node getNearestCommonAncestor(final NodeList ns) { 220 public static Node getNearestCommonAncestor(final List<Element> ns) {
175 if (ns.getLength() == 0) return null; 221 if (ns.size() == 0) return null;
176 Node parent = ns.getItem(0); 222 Node parent = ns.get(0);
177 for (int i = 1; i < ns.getLength(); i++) { 223 for (int i = 1; i < ns.size(); i++) {
178 parent = getNearestCommonAncestor(parent, ns.getItem(i)); 224 parent = getNearestCommonAncestor(parent, ns.get(i));
179 } 225 }
180 return parent; 226 return parent;
181 } 227 }
182 228
183 /** 229 /**
184 * Get all text from a tree/sub-tree. 230 * Get all text from a tree/sub-tree.
185 * @param node The root of the tree. 231 * @param node The root of the tree.
186 * @return The text contained in this tree. 232 * @return The text contained in this tree.
187 */ 233 */
188 public static String getTextFromTree(Node node) { 234 public static String getTextFromTree(Node node) {
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 }-*/; 440 }-*/;
395 441
396 public static native Document createHTMLDocument(Document doc) /*-{ 442 public static native Document createHTMLDocument(Document doc) /*-{
397 return doc.implementation.createHTMLDocument(); 443 return doc.implementation.createHTMLDocument();
398 }-*/; 444 }-*/;
399 445
400 public static native Element getFirstElementChild(Document document) /*-{ 446 public static native Element getFirstElementChild(Document document) /*-{
401 return document.firstElementChild; 447 return document.firstElementChild;
402 }-*/; 448 }-*/;
403 } 449 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698