Index: javatests/org/chromium/distiller/ContentExtractorTest.java |
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java |
index 0b03d6a9dd95b2177f5233aaa61e93beb6229ef7..15a15e5b71e5ba0e3aacec14e4a51dea7c30fd97 100644 |
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java |
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java |
@@ -512,6 +512,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(article, htmlArticle); |
} |
+ public void testOnlyProcessArticleElementWithHiddenArticleElement() { |
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<article>" + article + "</article>" + |
+ "<article style=\"display:none\">" + article + "</article>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
public void testOnlyProcessArticleElementMultiple() { |
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
@@ -525,6 +536,20 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(expected, htmlArticle); |
} |
+ public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { |
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<article>" + article + "</article>" + |
+ "<article style=\"display:none\">" + article + "</article>" + |
+ "<article>" + article + "</article>"; |
+ final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + article; |
+ |
+ // The existence of multiple articles disables the fast path. |
+ assertExtractor(expected, htmlArticle); |
+ } |
+ |
public void testOnlyProcessOGArticle() { |
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
@@ -535,6 +560,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(article, htmlArticle); |
} |
+ public void testOnlyProcessOGArticleWithHiddenArticleElement() { |
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + |
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
public void testOnlyProcessOGArticleNews() { |
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
@@ -545,6 +581,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(article, htmlArticle); |
} |
+ public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { |
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + article + "</div>" + |
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"display:none\">" + article + "</div>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
public void testOnlyProcessOGArticleBlog() { |
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
@@ -555,6 +602,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(article, htmlArticle); |
} |
+ public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { |
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + article + "</div>" + |
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"display:none\">" + article + "</div>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
public void testOnlyProcessOGArticleNested() { |
final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
final String article = paragraph + paragraph; |
@@ -569,6 +627,36 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
assertExtractor(article, htmlArticle); |
} |
+ public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { |
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
+ final String article = paragraph + paragraph; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + |
+ paragraph + |
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" + |
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>" + |
+ "</div>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
+ public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { |
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
+ final String article = paragraph + paragraph; |
+ |
+ final String htmlArticle = |
+ "<h1>" + CONTENT_TEXT + "</h1>" + |
wychen
2015/10/21 17:21:20
nit: inconsistent indentation around here.
|
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + |
+ paragraph + |
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" + |
+ "</div>" + |
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>"; |
+ |
+ assertExtractor(article, htmlArticle); |
+ } |
+ |
public void testOnlyProcessOGNonArticleMovie() { |
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |