Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(727)

Unified Diff: src/com/dom_distiller/client/DocumentTitleGetter.java

Issue 449923002: gwt getInnerText -> javascript innerText or textContent (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/com/dom_distiller/client/ContentExtractor.java ('k') | src/com/dom_distiller/client/DomUtil.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/com/dom_distiller/client/DocumentTitleGetter.java
diff --git a/src/com/dom_distiller/client/DocumentTitleGetter.java b/src/com/dom_distiller/client/DocumentTitleGetter.java
index 2910503027478bee110afdbf452eb70b040dd889..fbbd700017c58cd0280e4fb44f58d9657a2915b2 100644
--- a/src/com/dom_distiller/client/DocumentTitleGetter.java
+++ b/src/com/dom_distiller/client/DocumentTitleGetter.java
@@ -34,14 +34,16 @@ public class DocumentTitleGetter {
* @return The title of the distilled document.
*/
public static String getDocumentTitle(Object objTitle, Element root) {
- String currTitle = "", origTitle = "";
+ String currTitle = "", origTitle = "";
if (objTitle.getClass() == currTitle.getClass()) { // If objTitle is of String type.
currTitle = origTitle = objTitle.toString();
} else if (root != null) { // Otherwise, use text of first TITLE element.
NodeList<Element> titles = root.getElementsByTagName("TITLE");
if (titles.getLength() > 0) {
- currTitle = origTitle = titles.getItem(0).getInnerText();
+ // Use javacript textContent instead of javascript innerText; the latter only returns
+ // visible text, but <title> tags are invisible.
+ currTitle = origTitle = DomUtil.javascriptTextContent(titles.getItem(0));
}
}
if (currTitle == "") return "";
@@ -65,8 +67,8 @@ public class DocumentTitleGetter {
// TODO(kuan): this is what readability does, but this block may make more sense as an
// if rather than else-if, e.g. currently this else-if block is used when original title
// is "foo" but not when it is "foo |" or "foo:".
- currTitle = findTheOnlyH1(root);
- if (currTitle == null) currTitle = origTitle;
+ currTitle = findFirstH1(root);
+ if (currTitle.isEmpty()) currTitle = origTitle;
}
currTitle = StringUtil.trim(currTitle);
@@ -77,8 +79,14 @@ public class DocumentTitleGetter {
}
- private static String findTheOnlyH1(Element root) {
+ private static String findFirstH1(Element root) {
NodeList<Element> hOnes = root.getElementsByTagName("H1");
- return hOnes.getLength() == 1 ? hOnes.getItem(0).getInnerText() : null;
+ // Use javacript innerText instead of javascript textContent; the former only returns
+ // visible text, and we assume visible H1's are more inclined to being potential titles.
+ String h1 = "";
+ for (int i = 0; i < hOnes.getLength() && h1.isEmpty(); i++) {
+ h1 = DomUtil.getInnerText(hOnes.getItem(i));
+ }
+ return h1;
}
}
« no previous file with comments | « src/com/dom_distiller/client/ContentExtractor.java ('k') | src/com/dom_distiller/client/DomUtil.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698