Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: src/com/dom_distiller/client/PagingLinksFinder.java

Issue 449923002: gwt getInnerText -> javascript innerText or textContent (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 /* 5 /*
6 * Parts of this file are adapted from Readability. 6 * Parts of this file are adapted from Readability.
7 * 7 *
8 * Readability is Copyright (c) 2010 Src90 Inc 8 * Readability is Copyright (c) 2010 Src90 Inc
9 * and licenced under the Apache License, Version 2.0. 9 * and licenced under the Apache License, Version 2.0.
10 */ 10 */
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 } 126 }
127 127
128 // If it's on a different domain, skip it. 128 // If it's on a different domain, skip it.
129 String[] urlSlashes = StringUtil.split(linkHref, "\\/+"); 129 String[] urlSlashes = StringUtil.split(linkHref, "\\/+");
130 if (urlSlashes.length < 3 || // Expect at least the protocol, domai n, and path. 130 if (urlSlashes.length < 3 || // Expect at least the protocol, domai n, and path.
131 !Window.Location.getHost().equalsIgnoreCase(urlSlashes[1])) { 131 !Window.Location.getHost().equalsIgnoreCase(urlSlashes[1])) {
132 appendDbgStrForLink(link, "ignored: different domain"); 132 appendDbgStrForLink(link, "ignored: different domain");
133 continue; 133 continue;
134 } 134 }
135 135
136 String linkText = link.getInnerText(); 136 // Use javascript innerText (instead of javascript textContent) to o nly get visible
137 // text.
138 String linkText = DomUtil.getInnerText(link);
137 139
138 // If the linkText looks like it's not the next or previous page, sk ip it. 140 // If the linkText looks like it's not the next or previous page, sk ip it.
139 if (StringUtil.match(linkText, EXTRANEOUS_REGEX) || linkText.length( ) > 25) { 141 if (StringUtil.match(linkText, EXTRANEOUS_REGEX) || linkText.length( ) > 25) {
140 appendDbgStrForLink(link, "ignored: one of extra"); 142 appendDbgStrForLink(link, "ignored: one of extra");
141 continue; 143 continue;
142 } 144 }
143 145
144 // For next page link, if the initial part of the URL is identical t o the base URL, but 146 // For next page link, if the initial part of the URL is identical t o the base URL, but
145 // the rest of it doesn't contain any digits, it's certainly not a n ext page link. 147 // the rest of it doesn't contain any digits, it's certainly not a n ext page link.
146 // However, this doesn't apply to previous page link, because most s ites will just have 148 // However, this doesn't apply to previous page link, because most s ites will just have
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 // (TODO)kuan): investigate how to get logging when running "ant test.pr od" - currently, 411 // (TODO)kuan): investigate how to get logging when running "ant test.pr od" - currently,
410 // nothing appears. In the meantime, throwing an exception with a log m essage at suspicious 412 // nothing appears. In the meantime, throwing an exception with a log m essage at suspicious
411 // codepoints can produce a call stack and help debugging, albeit tediou sly. 413 // codepoints can produce a call stack and help debugging, albeit tediou sly.
412 LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found " + 414 LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found " +
413 (pageLink == PageLink.NEXT ? "next: " : "prev: ") + 415 (pageLink == PageLink.NEXT ? "next: " : "prev: ") +
414 (pagingHref != null ? pagingHref : "null")); 416 (pagingHref != null ? pagingHref : "null"));
415 417
416 for (int i = 0; i < allLinks.getLength(); i++) { 418 for (int i = 0; i < allLinks.getLength(); i++) {
417 AnchorElement link = AnchorElement.as(allLinks.getItem(i)); 419 AnchorElement link = AnchorElement.as(allLinks.getItem(i));
418 420
419 String text = link.getInnerText(); 421 // Use javascript innerText (instead of javascript textContent) to g et only visible
422 // text.
423 String text = DomUtil.getInnerText(link);
420 // Trim unnecessary whitespaces from text. 424 // Trim unnecessary whitespaces from text.
421 String[] words = StringUtil.split(text, "\\s+"); 425 String[] words = StringUtil.split(text, "\\s+");
422 text = ""; 426 text = "";
423 for (int w = 0; w < words.length; w++) { 427 for (int w = 0; w < words.length; w++) {
424 text += words[w]; 428 text += words[w];
425 if (w < words.length - 1) text += " "; 429 if (w < words.length - 1) text += " ";
426 } 430 }
427 431
428 LogUtil.logToConsole(i + ")" + link.getHref() + ", txt=[" + text + " ], dbg=[" + 432 LogUtil.logToConsole(i + ")" + link.getHref() + ", txt=[" + text + " ], dbg=[" +
429 mLinkDebugInfo.get(link) + "]"); 433 mLinkDebugInfo.get(link) + "]");
(...skipping 15 matching lines...) Expand all
445 } 449 }
446 450
447 private enum PageLink { 451 private enum PageLink {
448 NEXT, 452 NEXT,
449 PREV, 453 PREV,
450 } 454 }
451 455
452 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme nt, String>(); 456 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme nt, String>();
453 457
454 } 458 }
OLDNEW
« no previous file with comments | « src/com/dom_distiller/client/IEReadingViewParser.java ('k') | src/com/dom_distiller/client/SchemaOrgParser.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698