Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 // Make sure everything is there before using the fast path. 505 // Make sure everything is there before using the fast path.
506 assertExtractor(expected, html); 506 assertExtractor(expected, html);
507 507
508 final String htmlArticle = 508 final String htmlArticle =
509 "<h1>" + CONTENT_TEXT + "</h1>" + 509 "<h1>" + CONTENT_TEXT + "</h1>" +
510 "<article>" + article + "</article>"; 510 "<article>" + article + "</article>";
511 511
512 assertExtractor(article, htmlArticle); 512 assertExtractor(article, htmlArticle);
513 } 513 }
514 514
515 public void testOnlyProcessArticleElementWithHiddenArticleElement() {
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
517
518 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<article>" + article + "</article>" +
521 "<article style=\"display:none\">" + article + "</article>";
522
523 assertExtractor(article, htmlArticle);
524 }
525
515 public void testOnlyProcessArticleElementMultiple() { 526 public void testOnlyProcessArticleElementMultiple() {
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
517 528
518 final String htmlArticle = 529 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" + 530 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<article>" + article + "</article>" + 531 "<article>" + article + "</article>" +
521 "<article>" + article + "</article>"; 532 "<article>" + article + "</article>";
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
523 534
524 // The existence of multiple articles disables the fast path. 535 // The existence of multiple articles disables the fast path.
525 assertExtractor(expected, htmlArticle); 536 assertExtractor(expected, htmlArticle);
526 } 537 }
527 538
539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() {
540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
541
542 final String htmlArticle =
543 "<h1>" + CONTENT_TEXT + "</h1>" +
544 "<article>" + article + "</article>" +
545 "<article style=\"display:none\">" + article + "</article>" +
546 "<article>" + article + "</article>";
547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
548
549 // The existence of multiple articles disables the fast path.
550 assertExtractor(expected, htmlArticle);
551 }
552
528 public void testOnlyProcessOGArticle() { 553 public void testOnlyProcessOGArticle() {
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
530 555
531 final String htmlArticle = 556 final String htmlArticle =
532 "<h1>" + CONTENT_TEXT + "</h1>" + 557 "<h1>" + CONTENT_TEXT + "</h1>" +
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";
534 559
535 assertExtractor(article, htmlArticle); 560 assertExtractor(article, htmlArticle);
536 } 561 }
537 562
563 public void testOnlyProcessOGArticleWithHiddenArticleElement() {
564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
565
566 final String htmlArticle =
567 "<h1>" + CONTENT_TEXT + "</h1>" +
568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" +
569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" + article + "</div>";
570
571 assertExtractor(article, htmlArticle);
572 }
573
538 public void testOnlyProcessOGArticleNews() { 574 public void testOnlyProcessOGArticleNews() {
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 575 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
540 576
541 final String htmlArticle = 577 final String htmlArticle =
542 "<h1>" + CONTENT_TEXT + "</h1>" + 578 "<h1>" + CONTENT_TEXT + "</h1>" +
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; 579 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";
544 580
545 assertExtractor(article, htmlArticle); 581 assertExtractor(article, htmlArticle);
546 } 582 }
547 583
584 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() {
585 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
586
587 final String htmlArticle =
588 "<h1>" + CONTENT_TEXT + "</h1>" +
589 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" +
590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" + article + "</div>";
591
592 assertExtractor(article, htmlArticle);
593 }
594
548 public void testOnlyProcessOGArticleBlog() { 595 public void testOnlyProcessOGArticleBlog() {
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 596 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
550 597
551 final String htmlArticle = 598 final String htmlArticle =
552 "<h1>" + CONTENT_TEXT + "</h1>" + 599 "<h1>" + CONTENT_TEXT + "</h1>" +
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; 600 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";
554 601
555 assertExtractor(article, htmlArticle); 602 assertExtractor(article, htmlArticle);
556 } 603 }
557 604
605 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() {
606 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
607
608 final String htmlArticle =
609 "<h1>" + CONTENT_TEXT + "</h1>" +
610 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" +
611 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" + article + "</div>";
612
613 assertExtractor(article, htmlArticle);
614 }
615
558 public void testOnlyProcessOGArticleNested() { 616 public void testOnlyProcessOGArticleNested() {
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; 617 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
560 final String article = paragraph + paragraph; 618 final String article = paragraph + paragraph;
561 619
562 final String htmlArticle = 620 final String htmlArticle =
563 "<h1>" + CONTENT_TEXT + "</h1>" + 621 "<h1>" + CONTENT_TEXT + "</h1>" +
564 "<div itemscope itemtype=\"http://schema.org/Article\">" + 622 "<div itemscope itemtype=\"http://schema.org/Article\">" +
565 paragraph + 623 paragraph +
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + 624 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
567 "</div>"; 625 "</div>";
568 626
569 assertExtractor(article, htmlArticle); 627 assertExtractor(article, htmlArticle);
570 } 628 }
571 629
630 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() {
631 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
632 final String article = paragraph + paragraph;
633
634 final String htmlArticle =
635 "<h1>" + CONTENT_TEXT + "</h1>" +
636 "<div itemscope itemtype=\"http://schema.org/Article\">" +
637 paragraph +
638 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
639 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" + article + "</div>" +
640 "</div>";
641
642 assertExtractor(article, htmlArticle);
643 }
644
645 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() {
646 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
647 final String article = paragraph + paragraph;
648
649 final String htmlArticle =
650 "<h1>" + CONTENT_TEXT + "</h1>" +
wychen 2015/10/21 17:21:20 nit: inconsistent indentation around here.
651 "<div itemscope itemtype=\"http://schema.org/Article\">" +
652 paragraph +
653 "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" +
654 "</div>" +
655 "<div itemscope itemtype=\"http://schema.org/Article\" s tyle=\"display:none\">" + article + "</div>";
656
657 assertExtractor(article, htmlArticle);
658 }
659
572 public void testOnlyProcessOGNonArticleMovie() { 660 public void testOnlyProcessOGNonArticleMovie() {
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 661 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
574 662
575 final String htmlArticle = 663 final String htmlArticle =
576 "<h1>" + CONTENT_TEXT + "</h1>" + 664 "<h1>" + CONTENT_TEXT + "</h1>" +
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 665 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 666 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
579 667
580 // Non-article schema.org types should not use the fast path. 668 // Non-article schema.org types should not use the fast path.
581 assertExtractor(expected, htmlArticle); 669 assertExtractor(expected, htmlArticle);
582 } 670 }
583 } 671 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698