OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 /* | 5 /* |
6 * Parts of this file are adapted from Readability. | 6 * Parts of this file are adapted from Readability. |
7 * | 7 * |
8 * Readability is Copyright (c) 2010 Src90 Inc | 8 * Readability is Copyright (c) 2010 Src90 Inc |
9 * and licensed under the Apache License, Version 2.0. | 9 * and licensed under the Apache License, Version 2.0. |
10 */ | 10 */ |
(...skipping 28 matching lines...) Expand all Loading... |
39 if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of
String type. | 39 if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of
String type. |
40 currTitle = origTitle = objTitle.toString(); | 40 currTitle = origTitle = objTitle.toString(); |
41 } else if (root != null) { // Otherwise, use text of first TITLE elemen
t. | 41 } else if (root != null) { // Otherwise, use text of first TITLE elemen
t. |
42 NodeList<Element> titles = root.getElementsByTagName("TITLE"); | 42 NodeList<Element> titles = root.getElementsByTagName("TITLE"); |
43 if (titles.getLength() > 0) { | 43 if (titles.getLength() > 0) { |
44 // Use javascript textContent instead of javascript innerText; the
latter only returns | 44 // Use javascript textContent instead of javascript innerText; the
latter only returns |
45 // visible text, but <title> tags are invisible. | 45 // visible text, but <title> tags are invisible. |
46 currTitle = origTitle = DomUtil.javascriptTextContent(titles.getIt
em(0)); | 46 currTitle = origTitle = DomUtil.javascriptTextContent(titles.getIt
em(0)); |
47 } | 47 } |
48 } | 48 } |
49 if (currTitle == "") return ""; | 49 if (currTitle.isEmpty()) return ""; |
50 | 50 |
51 if (StringUtil.match(currTitle, " [\\|\\-] ")) { // Title has '|' and/o
r '-'. | 51 if (StringUtil.match(currTitle, " [\\|\\-] ")) { // Title has '|' and/o
r '-'. |
52 // Get part before last '|' or '-'. | 52 // Get part before last '|' or '-'. |
53 currTitle = StringUtil.findAndReplace(origTitle, "(.*)[\\|\\-] .*",
"$1"); | 53 currTitle = StringUtil.findAndReplace(origTitle, "(.*)[\\|\\-] .*",
"$1"); |
54 if (StringUtil.countWords(currTitle) < 3) { // Part has < 3 words. | 54 if (StringUtil.countWords(currTitle) < 3) { // Part has < 3 words. |
55 // Get part after first '|' or '-'. | 55 // Get part after first '|' or '-'. |
56 currTitle = StringUtil.findAndReplace(origTitle, "[^\\|\\-]*[\\|
\\-](.*)", "$1"); | 56 currTitle = StringUtil.findAndReplace(origTitle, "[^\\|\\-]*[\\|
\\-](.*)", "$1"); |
57 } | 57 } |
58 } else if (currTitle.indexOf(": ") != -1) { // Title has ':'. | 58 } else if (currTitle.indexOf(": ") != -1) { // Title has ':'. |
59 // Get part after last ':'. | 59 // Get part after last ':'. |
(...skipping 23 matching lines...) Expand all Loading... |
83 NodeList<Element> hOnes = root.getElementsByTagName("H1"); | 83 NodeList<Element> hOnes = root.getElementsByTagName("H1"); |
84 // Use javascript innerText instead of javascript textContent; the forme
r only returns | 84 // Use javascript innerText instead of javascript textContent; the forme
r only returns |
85 // visible text, and we assume visible H1's are more inclined to being p
otential titles. | 85 // visible text, and we assume visible H1's are more inclined to being p
otential titles. |
86 String h1 = ""; | 86 String h1 = ""; |
87 for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) { | 87 for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) { |
88 h1 = DomUtil.getInnerText(hOnes.getItem(i)); | 88 h1 = DomUtil.getInnerText(hOnes.getItem(i)); |
89 } | 89 } |
90 return h1; | 90 return h1; |
91 } | 91 } |
92 } | 92 } |
OLD | NEW |