| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 /* | 5 /* |
| 6 * Parts of this file are adapted from Readability. | 6 * Parts of this file are adapted from Readability. |
| 7 * | 7 * |
| 8 * Readability is Copyright (c) 2010 Src90 Inc | 8 * Readability is Copyright (c) 2010 Src90 Inc |
| 9 * and licensed under the Apache License, Version 2.0. | 9 * and licensed under the Apache License, Version 2.0. |
| 10 */ | 10 */ |
| (...skipping 28 matching lines...) Expand all Loading... |
| 39 if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of
String type. | 39 if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of
String type. |
| 40 currTitle = origTitle = objTitle.toString(); | 40 currTitle = origTitle = objTitle.toString(); |
| 41 } else if (root != null) { // Otherwise, use text of first TITLE elemen
t. | 41 } else if (root != null) { // Otherwise, use text of first TITLE elemen
t. |
| 42 NodeList<Element> titles = root.getElementsByTagName("TITLE"); | 42 NodeList<Element> titles = root.getElementsByTagName("TITLE"); |
| 43 if (titles.getLength() > 0) { | 43 if (titles.getLength() > 0) { |
| 44 // Use javascript textContent instead of javascript innerText; the
latter only returns | 44 // Use javascript textContent instead of javascript innerText; the
latter only returns |
| 45 // visible text, but <title> tags are invisible. | 45 // visible text, but <title> tags are invisible. |
| 46 currTitle = origTitle = DomUtil.javascriptTextContent(titles.getIt
em(0)); | 46 currTitle = origTitle = DomUtil.javascriptTextContent(titles.getIt
em(0)); |
| 47 } | 47 } |
| 48 } | 48 } |
| 49 if (currTitle == "") return ""; | 49 if (currTitle.isEmpty()) return ""; |
| 50 | 50 |
| 51 if (StringUtil.match(currTitle, " [\\|\\-] ")) { // Title has '|' and/o
r '-'. | 51 if (StringUtil.match(currTitle, " [\\|\\-] ")) { // Title has '|' and/o
r '-'. |
| 52 // Get part before last '|' or '-'. | 52 // Get part before last '|' or '-'. |
| 53 currTitle = StringUtil.findAndReplace(origTitle, "(.*)[\\|\\-] .*",
"$1"); | 53 currTitle = StringUtil.findAndReplace(origTitle, "(.*)[\\|\\-] .*",
"$1"); |
| 54 if (StringUtil.countWords(currTitle) < 3) { // Part has < 3 words. | 54 if (StringUtil.countWords(currTitle) < 3) { // Part has < 3 words. |
| 55 // Get part after first '|' or '-'. | 55 // Get part after first '|' or '-'. |
| 56 currTitle = StringUtil.findAndReplace(origTitle, "[^\\|\\-]*[\\|
\\-](.*)", "$1"); | 56 currTitle = StringUtil.findAndReplace(origTitle, "[^\\|\\-]*[\\|
\\-](.*)", "$1"); |
| 57 } | 57 } |
| 58 } else if (currTitle.indexOf(": ") != -1) { // Title has ':'. | 58 } else if (currTitle.indexOf(": ") != -1) { // Title has ':'. |
| 59 // Get part after last ':'. | 59 // Get part after last ':'. |
| (...skipping 23 matching lines...) Expand all Loading... |
| 83 NodeList<Element> hOnes = root.getElementsByTagName("H1"); | 83 NodeList<Element> hOnes = root.getElementsByTagName("H1"); |
| 84 // Use javascript innerText instead of javascript textContent; the forme
r only returns | 84 // Use javascript innerText instead of javascript textContent; the forme
r only returns |
| 85 // visible text, and we assume visible H1's are more inclined to being p
otential titles. | 85 // visible text, and we assume visible H1's are more inclined to being p
otential titles. |
| 86 String h1 = ""; | 86 String h1 = ""; |
| 87 for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) { | 87 for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) { |
| 88 h1 = DomUtil.getInnerText(hOnes.getItem(i)); | 88 h1 = DomUtil.getInnerText(hOnes.getItem(i)); |
| 89 } | 89 } |
| 90 return h1; | 90 return h1; |
| 91 } | 91 } |
| 92 } | 92 } |
| OLD | NEW |