Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: java/org/chromium/distiller/DomUtil.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: nit fixed Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.core.client.JsArray; 7 import com.google.gwt.core.client.JsArray;
8 import com.google.gwt.core.client.JsArrayString; 8 import com.google.gwt.core.client.JsArrayString;
9 import com.google.gwt.dom.client.AnchorElement; 9 import com.google.gwt.dom.client.AnchorElement;
10 import com.google.gwt.dom.client.Document; 10 import com.google.gwt.dom.client.Document;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 }-*/; 95 }-*/;
96 96
97 public static boolean isVisible(Element e) { 97 public static boolean isVisible(Element e) {
98 Style style = getComputedStyle(e); 98 Style style = getComputedStyle(e);
99 double opacity = JavaScript.parseFloat(style.getOpacity()); 99 double opacity = JavaScript.parseFloat(style.getOpacity());
100 return !(style.getDisplay().equals("none") || 100 return !(style.getDisplay().equals("none") ||
101 style.getVisibility().equals("hidden") || 101 style.getVisibility().equals("hidden") ||
102 opacity == 0.0F); 102 opacity == 0.0F);
103 } 103 }
104 104
105 /**
106 * Verifies if a given element is visible by checking its offset.
107 */
108 public static boolean isVisibleByOffset(Element e) {
109 // Detect whether any of the ancestors has "display: none".
110 // Using offsetParent alone wouldn't work because it's also null
111 // when position is fixed.
112 // Using offsetHeight/Width alone makes sense in production,
113 // but we have too many zero-sized elements in our tests.
114 return e.getOffsetParent() != null || e.getOffsetHeight() != 0
115 || e.getOffsetWidth() != 0;
wychen 2016/06/06 21:11:27 Nitpick: rewrap this function to 100char limit.
116 }
117
118 /**
119 * Get the element of the main article, if any.
120 * @return An element of article (not necessarily the html5 article element) .
121 */
122 public static Element getArticleElement(Element root) {
123 NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
124 List<Element> visibleElements = getVisibleElements(allArticles);
125 // Having multiple article elements usually indicates a bad case for thi s shortcut.
126 // TODO(wychen): some sites exclude things like title and author in arti cle element.
127 if (visibleElements.size() == 1) {
128 return visibleElements.get(0);
129 }
130 // Note that the CSS property matching is case sensitive, and "Article" is the correct
131 // capitalization.
132 String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype* =\"Post\"]";
133 allArticles = DomUtil.querySelectorAll(root, query);
134 visibleElements = getVisibleElements(allArticles);
135 // It is commonly seen that the article is wrapped separately or in mult iple layers.
136 if (visibleElements.size() > 0) {
137 return Element.as(DomUtil.getNearestCommonAncestor(visibleElements)) ;
138 }
139 return null;
140 }
141
142 /**
143 * Get a list of visible elements.
144 * @return A list of visible elements.
145 */
146 public static List<Element> getVisibleElements(NodeList<Element> nodeList) {
147 List<Element> visibleElements = new ArrayList<>();
148 for (int i = 0; i < nodeList.getLength(); i++) {
149 Element element = nodeList.getItem(i);
150 if (DomUtil.isVisible(element) &&
151 DomUtil.isVisibleByOffset(element)) {
152 visibleElements.add(element);
153 }
154 }
155 return visibleElements;
156 }
157
105 /* 158 /*
106 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's 159 * We want to use jsni for direct access to javascript's innerText. This av oids GWT's
107 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old 160 * implementation of Element::getInnerText(), which is intentionally differe nt to mimic an old
108 * IE behaviour, which returns text within <script> tags. 161 * IE behaviour, which returns text within <script> tags.
109 */ 162 */
110 public static native String getInnerText(Node node) /*-{ 163 public static native String getInnerText(Node node) /*-{
111 return node.innerText; 164 return node.innerText;
112 }-*/; 165 }-*/;
113 166
114 public static native double getTime() /*-{ 167 public static native double getTime() /*-{
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 217 */
165 public static Node getNearestCommonAncestor(final Node n1, final Node n2) { 218 public static Node getNearestCommonAncestor(final Node n1, final Node n2) {
166 Node parent = n1; 219 Node parent = n1;
167 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode(); 220 while (parent != null && !JavaScript.contains(parent, n2)) parent = pare nt.getParentNode();
168 return parent; 221 return parent;
169 } 222 }
170 223
171 /** 224 /**
172 * Get the nearest common ancestor of nodes. 225 * Get the nearest common ancestor of nodes.
173 */ 226 */
174 public static Node getNearestCommonAncestor(final NodeList ns) { 227 public static Node getNearestCommonAncestor(final List<Element> ns) {
175 if (ns.getLength() == 0) return null; 228 if (ns.size() == 0) return null;
176 Node parent = ns.getItem(0); 229 Node parent = ns.get(0);
177 for (int i = 1; i < ns.getLength(); i++) { 230 for (int i = 1; i < ns.size(); i++) {
178 parent = getNearestCommonAncestor(parent, ns.getItem(i)); 231 parent = getNearestCommonAncestor(parent, ns.get(i));
179 } 232 }
180 return parent; 233 return parent;
181 } 234 }
182 235
183 /** 236 /**
184 * Get all text from a tree/sub-tree. 237 * Get all text from a tree/sub-tree.
185 * @param node The root of the tree. 238 * @param node The root of the tree.
186 * @return The text contained in this tree. 239 * @return The text contained in this tree.
187 */ 240 */
188 public static String getTextFromTree(Node node) { 241 public static String getTextFromTree(Node node) {
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 }-*/; 518 }-*/;
466 519
467 public static native Document createHTMLDocument(Document doc) /*-{ 520 public static native Document createHTMLDocument(Document doc) /*-{
468 return doc.implementation.createHTMLDocument(); 521 return doc.implementation.createHTMLDocument();
469 }-*/; 522 }-*/;
470 523
471 public static native Element getFirstElementChild(Document document) /*-{ 524 public static native Element getFirstElementChild(Document document) /*-{
472 return document.firstElementChild; 525 return document.firstElementChild;
473 }-*/; 526 }-*/;
474 } 527 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/ContentExtractor.java ('k') | javatests/org/chromium/distiller/ContentExtractorTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698