Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(250)

Side by Side Diff: src/com/dom_distiller/client/IEReadingViewParser.java

Issue 449923002: gwt getInnerText -> javascript innerText or textContent (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package com.dom_distiller.client; 5 package com.dom_distiller.client;
6 6
7 import java.util.ArrayList; 7 import java.util.ArrayList;
8 import java.util.EnumMap; 8 import java.util.EnumMap;
9 import java.util.List; 9 import java.util.List;
10 import java.util.Map; 10 import java.util.Map;
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 } 126 }
127 } 127 }
128 } 128 }
129 129
130 private void findDate() { 130 private void findDate() {
131 mDate = ""; 131 mDate = "";
132 132
133 // Get date from any element that includes the "dateline" class. 133 // Get date from any element that includes the "dateline" class.
134 Element elem = DomUtil.getFirstElementWithClassName(mRoot, "dateline"); 134 Element elem = DomUtil.getFirstElementWithClassName(mRoot, "dateline");
135 if (elem != null) { 135 if (elem != null) {
136 mDate = elem.getInnerText(); 136 // Use javascript textContent (instead of javascript innerText) to i nclude invisible
137 // text.
138 mDate = DomUtil.javascriptTextContent(elem);
137 } else { // Otherwise, get date from meta tag with "displaydate" as nam e. 139 } else { // Otherwise, get date from meta tag with "displaydate" as nam e.
138 for (int i = 0; i < mAllMeta.getLength(); i++) { 140 for (int i = 0; i < mAllMeta.getLength(); i++) {
139 MetaElement meta = MetaElement.as(mAllMeta.getItem(i)); 141 MetaElement meta = MetaElement.as(mAllMeta.getItem(i));
140 if (meta.getName().equalsIgnoreCase("displaydate")) { 142 if (meta.getName().equalsIgnoreCase("displaydate")) {
141 mDate = meta.getContent(); 143 mDate = meta.getContent();
142 break; 144 break;
143 } 145 }
144 } 146 }
145 } 147 }
146 } 148 }
147 149
148 private void findAuthor() { 150 private void findAuthor() {
149 mAuthor = ""; 151 mAuthor = "";
150 152
151 // Get author from the first element that includes the "byline-name" cla ss. 153 // Get author from the first element that includes the "byline-name" cla ss.
152 // Note that we ignore the order of this element for now. 154 // Note that we ignore the order of this element for now.
153 Element elem = DomUtil.getFirstElementWithClassName(mRoot, "byline-name" ); 155 Element elem = DomUtil.getFirstElementWithClassName(mRoot, "byline-name" );
154 if (elem != null) mAuthor = elem.getInnerText(); 156 // Use javascript textContent (instead of javascript innerText) to inclu de invisible text.
157 if (elem != null) mAuthor = DomUtil.javascriptTextContent(elem);
155 } 158 }
156 159
157 private void findPublisher() { 160 private void findPublisher() {
158 mPublisher = ""; 161 mPublisher = "";
159 162
160 // Look for "publisher" or "source_organization" attribute in any html t ag. 163 // Look for "publisher" or "source_organization" attribute in any html t ag.
161 NodeList<Element> allElems = mRoot.getElementsByTagName("*"); 164 NodeList<Element> allElems = mRoot.getElementsByTagName("*");
162 for (int i = 0; i < allElems.getLength() && mPublisher.isEmpty(); i++) { 165 for (int i = 0; i < allElems.getLength() && mPublisher.isEmpty(); i++) {
163 Element e = allElems.getItem(i); 166 Element e = allElems.getItem(i);
164 mPublisher = e.getAttribute("publisher"); 167 mPublisher = e.getAttribute("publisher");
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
221 double aspectRatio = (double) width / (double) image.getHeight(); 224 double aspectRatio = (double) width / (double) image.getHeight();
222 return aspectRatio >= 1.3 && aspectRatio <= 3.0; 225 return aspectRatio >= 1.3 && aspectRatio <= 3.0;
223 } 226 }
224 227
225 private static String getCaption(ImageElement image) { 228 private static String getCaption(ImageElement image) {
226 // If |image| is a child of <figure>, then get the <figcaption> elements . 229 // If |image| is a child of <figure>, then get the <figcaption> elements .
227 Element parent = image.getParentElement(); 230 Element parent = image.getParentElement();
228 if (!parent.hasTagName("FIGURE")) return ""; 231 if (!parent.hasTagName("FIGURE")) return "";
229 NodeList<Element> captions = parent.getElementsByTagName("FIGCAPTION"); 232 NodeList<Element> captions = parent.getElementsByTagName("FIGCAPTION");
230 int numCaptions = captions.getLength(); 233 int numCaptions = captions.getLength();
231 if (numCaptions > 0 && numCaptions <= 2) 234 String caption = "";
232 return captions.getItem(0).getInnerText(); // Just use the first on e. 235 if (numCaptions > 0 && numCaptions <= 2) {
233 return ""; 236 // Use javascript innerText (instead of javascript textContent) to g et only visible
237 // captions.
238 for (int i = 0; i < numCaptions && caption.isEmpty(); i++) {
239 caption = DomUtil.getInnerText(captions.getItem(i));
240 }
241 }
242 return caption;
234 } 243 }
235 244
236 private static boolean isTextInBody(Element root, String text) { 245 private static boolean isTextInBody(Element root, String text) {
237 String lowerText = text.toLowerCase(); 246 String lowerText = text.toLowerCase();
238 NodeList<Element> bodies = root.getElementsByTagName("BODY"); 247 NodeList<Element> bodies = root.getElementsByTagName("BODY");
239 for (int i = 0; i < bodies.getLength(); i++) { 248 for (int i = 0; i < bodies.getLength(); i++) {
240 if (bodies.getItem(i).getInnerText().toLowerCase().contains(lowerTex t)) return true; 249 // Use javascript textContent (instead of javascript innerText) to i nclude invisible
250 // text.
251 if (DomUtil.javascriptTextContent(
252 bodies.getItem(i)).toLowerCase().contains(lowerText)) {
253 return true;
254 }
241 } 255 }
242 return false; 256 return false;
243 } 257 }
244 } 258 }
OLDNEW
« no previous file with comments | « src/com/dom_distiller/client/DomUtil.java ('k') | src/com/dom_distiller/client/PagingLinksFinder.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698