Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(484)

Unified Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Fixed inconsistent indentation Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: javatests/org/chromium/distiller/ContentExtractorTest.java
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index 0b03d6a9dd95b2177f5233aaa61e93beb6229ef7..eae624a98af28934d1fb44b723745818e19079e7 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -512,6 +512,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessArticleElementWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<article>" + article + "</article>" +
+ "<article style=\"display:none\">" + article + "</article>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessArticleElementMultiple() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -525,6 +536,20 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(expected, htmlArticle);
}
+ public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<article>" + article + "</article>" +
+ "<article style=\"display:none\">" + article + "</article>" +
+ "<article>" + article + "</article>";
+ final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + article;
+
+ // The existence of multiple articles disables the fast path.
+ assertExtractor(expected, htmlArticle);
+ }
+
public void testOnlyProcessOGArticle() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -535,6 +560,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>";
mdjones 2015/10/21 18:15:02 Lines < 100 char please
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleNews() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -545,6 +581,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleBlog() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -555,6 +602,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleNested() {
final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
final String article = paragraph + paragraph;
@@ -569,6 +627,36 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() {
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
+ final String article = paragraph + paragraph;
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" +
+ paragraph +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>" +
+ "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
+ public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() {
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
+ final String article = paragraph + paragraph;
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" +
+ paragraph +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" +
+ "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGNonArticleMovie() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

Powered by Google App Engine
This is Rietveld 408576698