Chromium Code Reviews| Index: javatests/org/chromium/distiller/ContentExtractorTest.java |
| diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java |
| index 0b03d6a9dd95b2177f5233aaa61e93beb6229ef7..584236f3dc869f4b01d8c59cce1fb483bbe5e54f 100644 |
| --- a/javatests/org/chromium/distiller/ContentExtractorTest.java |
| +++ b/javatests/org/chromium/distiller/ContentExtractorTest.java |
| @@ -512,6 +512,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(article, htmlArticle); |
| } |
| + public void testOnlyProcessArticleElementWithHiddenArticleElement() { |
| + final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<article>" + article + "</article>" + |
| + "<article style=\"display:none\">" + article + "</article>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessArticleElementMultiple() { |
| final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| @@ -525,6 +536,20 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(expected, htmlArticle); |
| } |
| + public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { |
| + final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<article>" + article + "</article>" + |
| + "<article style=\"display:none\">" + article + "</article>" + |
| + "<article>" + article + "</article>"; |
| + final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + article; |
| + |
| + // The existence of multiple articles disables the fast path. |
| + assertExtractor(expected, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessOGArticle() { |
| final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| @@ -535,6 +560,18 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(article, htmlArticle); |
| } |
| + public void testOnlyProcessOGArticleWithHiddenArticleElement() { |
| + final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + |
| + "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + |
| + article + "</div>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessOGArticleNews() { |
| final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| @@ -545,6 +582,18 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(article, htmlArticle); |
| } |
| + public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { |
| + final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + article + "</div>" + |
| + "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"display:none\">" + |
| + article + "</div>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessOGArticleBlog() { |
| final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| @@ -555,6 +604,18 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(article, htmlArticle); |
| } |
| + public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { |
| + final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + article + "</div>" + |
| + "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"display:none\">" + |
| + article + "</div>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessOGArticleNested() { |
| final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
| final String article = paragraph + paragraph; |
| @@ -569,6 +630,39 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| assertExtractor(article, htmlArticle); |
| } |
| + public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { |
| + final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
| + final String article = paragraph + paragraph; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
| + "<div itemscope itemtype=\"http://schema.org/Article\">" + |
| + paragraph + |
| + "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" + |
| + "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + |
| + article + "</div>" + |
| + "</div>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| + public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { |
| + final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
| + final String article = paragraph + paragraph; |
| + |
| + final String htmlArticle = |
| + "<h1>" + CONTENT_TEXT + "</h1>" + |
|
wychen
2015/10/21 21:00:46
nit: You could probably match the indentation styl
|
| + "<div itemscope itemtype=\"http://schema.org/Article\">" + |
| + paragraph + |
| + "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + |
| + "</div>" + |
| + "</div>" + |
| + "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + |
| + article + "</div>"; |
| + |
| + assertExtractor(article, htmlArticle); |
| + } |
| + |
| public void testOnlyProcessOGNonArticleMovie() { |
| final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |