| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 /* | 5 /* |
| 6 * Parts of this file are adapted from Readability. | 6 * Parts of this file are adapted from Readability. |
| 7 * | 7 * |
| 8 * Readability is Copyright (c) 2010 Src90 Inc | 8 * Readability is Copyright (c) 2010 Src90 Inc |
| 9 * and licenced under the Apache License, Version 2.0. | 9 * and licenced under the Apache License, Version 2.0. |
| 10 */ | 10 */ |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 126 } | 126 } |
| 127 | 127 |
| 128 // If it's on a different domain, skip it. | 128 // If it's on a different domain, skip it. |
| 129 String[] urlSlashes = StringUtil.split(linkHref, "\\/+"); | 129 String[] urlSlashes = StringUtil.split(linkHref, "\\/+"); |
| 130 if (urlSlashes.length < 3 || // Expect at least the protocol, domai
n, and path. | 130 if (urlSlashes.length < 3 || // Expect at least the protocol, domai
n, and path. |
| 131 !Window.Location.getHost().equalsIgnoreCase(urlSlashes[1]))
{ | 131 !Window.Location.getHost().equalsIgnoreCase(urlSlashes[1]))
{ |
| 132 appendDbgStrForLink(link, "ignored: different domain"); | 132 appendDbgStrForLink(link, "ignored: different domain"); |
| 133 continue; | 133 continue; |
| 134 } | 134 } |
| 135 | 135 |
| 136 String linkText = link.getInnerText(); | 136 // Use javascript innerText (instead of javascript textContent) to o
nly get visible |
| 137 // text. |
| 138 String linkText = DomUtil.getInnerText(link); |
| 137 | 139 |
| 138 // If the linkText looks like it's not the next or previous page, sk
ip it. | 140 // If the linkText looks like it's not the next or previous page, sk
ip it. |
| 139 if (StringUtil.match(linkText, EXTRANEOUS_REGEX) || linkText.length(
) > 25) { | 141 if (StringUtil.match(linkText, EXTRANEOUS_REGEX) || linkText.length(
) > 25) { |
| 140 appendDbgStrForLink(link, "ignored: one of extra"); | 142 appendDbgStrForLink(link, "ignored: one of extra"); |
| 141 continue; | 143 continue; |
| 142 } | 144 } |
| 143 | 145 |
| 144 // For next page link, if the initial part of the URL is identical t
o the base URL, but | 146 // For next page link, if the initial part of the URL is identical t
o the base URL, but |
| 145 // the rest of it doesn't contain any digits, it's certainly not a n
ext page link. | 147 // the rest of it doesn't contain any digits, it's certainly not a n
ext page link. |
| 146 // However, this doesn't apply to previous page link, because most s
ites will just have | 148 // However, this doesn't apply to previous page link, because most s
ites will just have |
| (...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 409 // (TODO)kuan): investigate how to get logging when running "ant test.pr
od" - currently, | 411 // (TODO)kuan): investigate how to get logging when running "ant test.pr
od" - currently, |
| 410 // nothing appears. In the meantime, throwing an exception with a log m
essage at suspicious | 412 // nothing appears. In the meantime, throwing an exception with a log m
essage at suspicious |
| 411 // codepoints can produce a call stack and help debugging, albeit tediou
sly. | 413 // codepoints can produce a call stack and help debugging, albeit tediou
sly. |
| 412 LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found " + | 414 LogUtil.logToConsole("numLinks=" + allLinks.getLength() + ", found " + |
| 413 (pageLink == PageLink.NEXT ? "next: " : "prev: ") + | 415 (pageLink == PageLink.NEXT ? "next: " : "prev: ") + |
| 414 (pagingHref != null ? pagingHref : "null")); | 416 (pagingHref != null ? pagingHref : "null")); |
| 415 | 417 |
| 416 for (int i = 0; i < allLinks.getLength(); i++) { | 418 for (int i = 0; i < allLinks.getLength(); i++) { |
| 417 AnchorElement link = AnchorElement.as(allLinks.getItem(i)); | 419 AnchorElement link = AnchorElement.as(allLinks.getItem(i)); |
| 418 | 420 |
| 419 String text = link.getInnerText(); | 421 // Use javascript innerText (instead of javascript textContent) to g
et only visible |
| 422 // text. |
| 423 String text = DomUtil.getInnerText(link); |
| 420 // Trim unnecessary whitespaces from text. | 424 // Trim unnecessary whitespaces from text. |
| 421 String[] words = StringUtil.split(text, "\\s+"); | 425 String[] words = StringUtil.split(text, "\\s+"); |
| 422 text = ""; | 426 text = ""; |
| 423 for (int w = 0; w < words.length; w++) { | 427 for (int w = 0; w < words.length; w++) { |
| 424 text += words[w]; | 428 text += words[w]; |
| 425 if (w < words.length - 1) text += " "; | 429 if (w < words.length - 1) text += " "; |
| 426 } | 430 } |
| 427 | 431 |
| 428 LogUtil.logToConsole(i + ")" + link.getHref() + ", txt=[" + text + "
], dbg=[" + | 432 LogUtil.logToConsole(i + ")" + link.getHref() + ", txt=[" + text + "
], dbg=[" + |
| 429 mLinkDebugInfo.get(link) + "]"); | 433 mLinkDebugInfo.get(link) + "]"); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 445 } | 449 } |
| 446 | 450 |
| 447 private enum PageLink { | 451 private enum PageLink { |
| 448 NEXT, | 452 NEXT, |
| 449 PREV, | 453 PREV, |
| 450 } | 454 } |
| 451 | 455 |
| 452 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme
nt, String>(); | 456 private static final Map<Element, String> mLinkDebugInfo = new HashMap<Eleme
nt, String>(); |
| 453 | 457 |
| 454 } | 458 } |
| OLD | NEW |