Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: wychen's comments addressed Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 private void assertExtractor(String expected, String html) { 488 private void assertExtractor(String expected, String html) {
489 mBody.setInnerHTML(""); 489 mBody.setInnerHTML("");
490 Element div = TestUtil.createDiv(0); 490 Element div = TestUtil.createDiv(0);
491 mBody.appendChild(div); 491 mBody.appendChild(div);
492 492
493 div.setInnerHTML(html); 493 div.setInnerHTML(html);
494 ContentExtractor extractor = new ContentExtractor(mRoot); 494 ContentExtractor extractor = new ContentExtractor(mRoot);
495 String extractedContent = extractor.extractContent(); 495 String extractedContent = extractor.extractContent();
496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );
497 } 497 }
498
499 public void testOnlyProcessArticleElement() {
500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
501
502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>";
503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
504
505 // Make sure everything is there before using the fast path.
506 assertExtractor(expected, html);
507
508 final String htmlArticle =
509 "<h1>" + CONTENT_TEXT + "</h1>" +
510 "<article>" + article + "</article>";
511
512 assertExtractor(article, htmlArticle);
513 }
514
515 public void testOnlyProcessArticleElementMultiple() {
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
517
518 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<article>" + article + "</article>" +
521 "<article>" + article + "</article>";
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
523
524 // The existence of multiple articles disables the fast path.
525 assertExtractor(expected, htmlArticle);
526 }
527
528 public void testOnlyProcessOGArticle() {
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
530
531 final String htmlArticle =
532 "<h1>" + CONTENT_TEXT + "</h1>" +
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";
534
535 assertExtractor(article, htmlArticle);
536 }
537
538 public void testOnlyProcessOGArticleNews() {
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
540
541 final String htmlArticle =
542 "<h1>" + CONTENT_TEXT + "</h1>" +
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";
544
545 assertExtractor(article, htmlArticle);
546 }
547
548 public void testOnlyProcessOGArticleBlog() {
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
550
551 final String htmlArticle =
552 "<h1>" + CONTENT_TEXT + "</h1>" +
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";
554
555 assertExtractor(article, htmlArticle);
556 }
557
558 public void testOnlyProcessOGArticleNested() {
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
560 final String article = paragraph + paragraph;
561
562 final String htmlArticle =
563 "<h1>" + CONTENT_TEXT + "</h1>" +
564 "<div itemscope itemtype=\"http://schema.org/Article\">" +
565 paragraph +
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
567 "</div>";
568
569 assertExtractor(article, htmlArticle);
570 }
571
572 public void testOnlyProcessOGNonArticleMovie() {
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
574
575 final String htmlArticle =
576 "<h1>" + CONTENT_TEXT + "</h1>" +
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
579
580 // Non-article schema.org types should not use the fast path.
581 assertExtractor(expected, htmlArticle);
582 }
583 } 498 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698