| Index: java/org/chromium/distiller/ContentExtractor.java
|
| diff --git a/java/org/chromium/distiller/ContentExtractor.java b/java/org/chromium/distiller/ContentExtractor.java
|
| index 4a8f8bd8dbee9acff98fbbc9084f7f5d8b4626fe..7d16a774dd4360a4ce2015ed5296b466d386e969 100644
|
| --- a/java/org/chromium/distiller/ContentExtractor.java
|
| +++ b/java/org/chromium/distiller/ContentExtractor.java
|
| @@ -162,35 +162,13 @@ public class ContentExtractor {
|
| }
|
|
|
| /**
|
| - * Get the element of the main article, if any.
|
| - * @return An element of article (not necessarily the html5 article element).
|
| - */
|
| - private Element getArticleElement(Element root) {
|
| - NodeList<Element> allArticles = root.getElementsByTagName("ARTICLE");
|
| - // Having multiple article elements usually indicates a bad case for this shortcut.
|
| - // TODO(wychen): some sites exclude things like title and author in article element.
|
| - if (allArticles.getLength() == 1) {
|
| - return allArticles.getItem(0);
|
| - }
|
| - // Note that the CSS property matching is case sensitive, and "Article" is the correct
|
| - // capitalization.
|
| - String query = "[itemscope][itemtype*=\"Article\"],[itemscope][itemtype*=\"Post\"]";
|
| - allArticles = DomUtil.querySelectorAll(root, query);
|
| - // It is commonly seen that the article is wrapped separately or in multiple layers.
|
| - if (allArticles.getLength() > 0) {
|
| - return Element.as(DomUtil.getNearestCommonAncestor(allArticles));
|
| - }
|
| - return null;
|
| - }
|
| -
|
| - /**
|
| * Converts the original HTML page into a WebDocument for analysis.
|
| */
|
| private WebDocumentInfo createWebDocumentInfoFromPage() {
|
| WebDocumentInfo info = new WebDocumentInfo();
|
| WebDocumentBuilder documentBuilder = new WebDocumentBuilder();
|
| DomConverter converter = new DomConverter(documentBuilder);
|
| - Element walkerRoot = getArticleElement(documentElement);
|
| + Element walkerRoot = DomUtil.getArticleElement(documentElement);
|
| if (walkerRoot == null) {
|
| walkerRoot = documentElement;
|
| }
|
|
|