Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: nitpick fixed 2 Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 mBody.setInnerHTML(""); 549 mBody.setInnerHTML("");
550 Element div = TestUtil.createDiv(0); 550 Element div = TestUtil.createDiv(0);
551 mBody.appendChild(div); 551 mBody.appendChild(div);
552 552
553 div.setInnerHTML(html); 553 div.setInnerHTML(html);
554 ContentExtractor extractor = new ContentExtractor(mRoot); 554 ContentExtractor extractor = new ContentExtractor(mRoot);
555 String extractedContent = extractor.extractContent(); 555 String extractedContent = extractor.extractContent();
556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); 556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );
557 } 557 }
558 558
559 public void testOnlyProcessArticleElement() {
560 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
561
562 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>";
563 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
564
565 // Make sure everything is there before using the fast path.
566 assertExtractor(expected, html);
567
568 final String htmlArticle =
569 "<h1>" + CONTENT_TEXT + "</h1>" +
570 "<article>" + article + "</article>";
571
572 assertExtractor(article, htmlArticle);
573 }
574
575 public void testOnlyProcessArticleElementMultiple() {
576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
577
578 final String htmlArticle =
579 "<h1>" + CONTENT_TEXT + "</h1>" +
580 "<article>" + article + "</article>" +
581 "<article>" + article + "</article>";
582 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
583
584 // The existence of multiple articles disables the fast path.
585 assertExtractor(expected, htmlArticle);
586 }
587
588 public void testOnlyProcessOGArticle() {
589 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
590
591 final String htmlArticle =
592 "<h1>" + CONTENT_TEXT + "</h1>" +
593 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";
594
595 assertExtractor(article, htmlArticle);
596 }
597
598 public void testOnlyProcessOGArticleNews() {
599 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
600
601 final String htmlArticle =
602 "<h1>" + CONTENT_TEXT + "</h1>" +
603 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";
604
605 assertExtractor(article, htmlArticle);
606 }
607
608 public void testOnlyProcessOGArticleBlog() {
609 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
610
611 final String htmlArticle =
612 "<h1>" + CONTENT_TEXT + "</h1>" +
613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";
614
615 assertExtractor(article, htmlArticle);
616 }
617
618 public void testOnlyProcessOGArticleNested() {
619 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
620 final String article = paragraph + paragraph;
621
622 final String htmlArticle =
623 "<h1>" + CONTENT_TEXT + "</h1>" +
624 "<div itemscope itemtype=\"http://schema.org/Article\">" +
625 paragraph +
626 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
627 "</div>";
628
629 assertExtractor(article, htmlArticle);
630 }
631
632 public void testOnlyProcessOGNonArticleMovie() {
633 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
634
635 final String htmlArticle =
636 "<h1>" + CONTENT_TEXT + "</h1>" +
637 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
638 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
639
640 // Non-article schema.org types should not use the fast path.
641 assertExtractor(expected, htmlArticle);
642 }
643
644 public void testDropCap() { 559 public void testDropCap() {
645 String html = 560 String html =
646 "<h1>" + 561 "<h1>" +
647 CONTENT_TEXT + 562 CONTENT_TEXT +
648 "</h1>" + 563 "</h1>" +
649 "<p>" + 564 "<p>" +
650 "<strong><span style=\"float: left\">T</span>est</strong>" + 565 "<strong><span style=\"float: left\">T</span>est</strong>" +
651 CONTENT_TEXT + 566 CONTENT_TEXT +
652 "</p>"; 567 "</p>";
653 568
654 final String expected = 569 final String expected =
655 "<h1>" + 570 "<h1>" +
656 CONTENT_TEXT + 571 CONTENT_TEXT +
657 "</h1>" + 572 "</h1>" +
658 "<p>" + 573 "<p>" +
659 "<strong><span>T</span>est</strong>" + 574 "<strong><span>T</span>est</strong>" +
660 CONTENT_TEXT + 575 CONTENT_TEXT +
661 "</p>"; 576 "</p>";
662 577
663 mBody.setInnerHTML(html); 578 mBody.setInnerHTML(html);
664 579
665 ContentExtractor extractor = new ContentExtractor(mRoot); 580 ContentExtractor extractor = new ContentExtractor(mRoot);
666 String extractedContent = extractor.extractContent(); 581 String extractedContent = extractor.extractContent();
667 assertEquals(expected, 582 assertEquals(expected,
668 TestUtil.removeAllDirAttributes(extractedContent)); 583 TestUtil.removeAllDirAttributes(extractedContent));
669 } 584 }
670 } 585 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698