Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(759)

Unified Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: javatests/org/chromium/distiller/ContentExtractorTest.java
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index 0b03d6a9dd95b2177f5233aaa61e93beb6229ef7..15a15e5b71e5ba0e3aacec14e4a51dea7c30fd97 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -512,6 +512,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessArticleElementWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<article>" + article + "</article>" +
+ "<article style=\"display:none\">" + article + "</article>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessArticleElementMultiple() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -525,6 +536,20 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(expected, htmlArticle);
}
+ public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<article>" + article + "</article>" +
+ "<article style=\"display:none\">" + article + "</article>" +
+ "<article>" + article + "</article>";
+ final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + article;
+
+ // The existence of multiple articles disables the fast path.
+ assertExtractor(expected, htmlArticle);
+ }
+
public void testOnlyProcessOGArticle() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -535,6 +560,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleNews() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -545,6 +581,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleBlog() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
@@ -555,6 +602,17 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() {
+ final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + article + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGArticleNested() {
final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
final String article = paragraph + paragraph;
@@ -569,6 +627,36 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
assertExtractor(article, htmlArticle);
}
+ public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() {
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
+ final String article = paragraph + paragraph;
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" +
+ paragraph +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>" +
+ "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
+ public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() {
+ final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
+ final String article = paragraph + paragraph;
+
+ final String htmlArticle =
+ "<h1>" + CONTENT_TEXT + "</h1>" +
wychen 2015/10/21 17:21:20 nit: inconsistent indentation around here.
+ "<div itemscope itemtype=\"http://schema.org/Article\">" +
+ paragraph +
+ "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" +
+ "</div>" +
+ "<div itemscope itemtype=\"http://schema.org/Article\" style=\"display:none\">" + article + "</div>";
+
+ assertExtractor(article, htmlArticle);
+ }
+
public void testOnlyProcessOGNonArticleMovie() {
final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698