Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(437)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Comments addressed Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 // Make sure everything is there before using the fast path. 505 // Make sure everything is there before using the fast path.
506 assertExtractor(expected, html); 506 assertExtractor(expected, html);
507 507
508 final String htmlArticle = 508 final String htmlArticle =
509 "<h1>" + CONTENT_TEXT + "</h1>" + 509 "<h1>" + CONTENT_TEXT + "</h1>" +
510 "<article>" + article + "</article>"; 510 "<article>" + article + "</article>";
511 511
512 assertExtractor(article, htmlArticle); 512 assertExtractor(article, htmlArticle);
513 } 513 }
514 514
515 public void testOnlyProcessArticleElementWithHiddenArticleElement() {
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
517
518 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<article>" + article + "</article>" +
521 "<article style=\"display:none\">" + article + "</article>";
522
523 assertExtractor(article, htmlArticle);
524 }
525
515 public void testOnlyProcessArticleElementMultiple() { 526 public void testOnlyProcessArticleElementMultiple() {
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
517 528
518 final String htmlArticle = 529 final String htmlArticle =
519 "<h1>" + CONTENT_TEXT + "</h1>" + 530 "<h1>" + CONTENT_TEXT + "</h1>" +
520 "<article>" + article + "</article>" + 531 "<article>" + article + "</article>" +
521 "<article>" + article + "</article>"; 532 "<article>" + article + "</article>";
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
523 534
524 // The existence of multiple articles disables the fast path. 535 // The existence of multiple articles disables the fast path.
525 assertExtractor(expected, htmlArticle); 536 assertExtractor(expected, htmlArticle);
526 } 537 }
527 538
539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() {
540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
541
542 final String htmlArticle =
543 "<h1>" + CONTENT_TEXT + "</h1>" +
544 "<article>" + article + "</article>" +
545 "<article style=\"display:none\">" + article + "</article>" +
546 "<article>" + article + "</article>";
547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;
548
549 // The existence of multiple articles disables the fast path.
550 assertExtractor(expected, htmlArticle);
551 }
552
528 public void testOnlyProcessOGArticle() { 553 public void testOnlyProcessOGArticle() {
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
530 555
531 final String htmlArticle = 556 final String htmlArticle =
532 "<h1>" + CONTENT_TEXT + "</h1>" + 557 "<h1>" + CONTENT_TEXT + "</h1>" +
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";
534 559
535 assertExtractor(article, htmlArticle); 560 assertExtractor(article, htmlArticle);
536 } 561 }
537 562
563 public void testOnlyProcessOGArticleWithHiddenArticleElement() {
564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
565
566 final String htmlArticle =
567 "<h1>" + CONTENT_TEXT + "</h1>" +
568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" +
569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" +
570 article + "</div>";
571
572 assertExtractor(article, htmlArticle);
573 }
574
538 public void testOnlyProcessOGArticleNews() { 575 public void testOnlyProcessOGArticleNews() {
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
540 577
541 final String htmlArticle = 578 final String htmlArticle =
542 "<h1>" + CONTENT_TEXT + "</h1>" + 579 "<h1>" + CONTENT_TEXT + "</h1>" +
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; 580 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";
544 581
545 assertExtractor(article, htmlArticle); 582 assertExtractor(article, htmlArticle);
546 } 583 }
547 584
585 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() {
586 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
587
588 final String htmlArticle =
589 "<h1>" + CONTENT_TEXT + "</h1>" +
590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" +
591 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" +
592 article + "</div>";
593
594 assertExtractor(article, htmlArticle);
595 }
596
548 public void testOnlyProcessOGArticleBlog() { 597 public void testOnlyProcessOGArticleBlog() {
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 598 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
550 599
551 final String htmlArticle = 600 final String htmlArticle =
552 "<h1>" + CONTENT_TEXT + "</h1>" + 601 "<h1>" + CONTENT_TEXT + "</h1>" +
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; 602 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";
554 603
555 assertExtractor(article, htmlArticle); 604 assertExtractor(article, htmlArticle);
556 } 605 }
557 606
607 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() {
608 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
609
610 final String htmlArticle =
611 "<h1>" + CONTENT_TEXT + "</h1>" +
612 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" +
613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" +
614 article + "</div>";
615
616 assertExtractor(article, htmlArticle);
617 }
618
558 public void testOnlyProcessOGArticleNested() { 619 public void testOnlyProcessOGArticleNested() {
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; 620 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
560 final String article = paragraph + paragraph; 621 final String article = paragraph + paragraph;
561 622
562 final String htmlArticle = 623 final String htmlArticle =
563 "<h1>" + CONTENT_TEXT + "</h1>" + 624 "<h1>" + CONTENT_TEXT + "</h1>" +
564 "<div itemscope itemtype=\"http://schema.org/Article\">" + 625 "<div itemscope itemtype=\"http://schema.org/Article\">" +
565 paragraph + 626 paragraph +
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + 627 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
567 "</div>"; 628 "</div>";
568 629
569 assertExtractor(article, htmlArticle); 630 assertExtractor(article, htmlArticle);
570 } 631 }
571 632
633 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() {
634 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
635 final String article = paragraph + paragraph;
636
637 final String htmlArticle =
638 "<h1>" + CONTENT_TEXT + "</h1>" +
639 "<div itemscope itemtype=\"http://schema.org/Article\">" +
640 paragraph +
641 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +
642 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" +
643 article + "</div>" +
644 "</div>";
645
646 assertExtractor(article, htmlArticle);
647 }
648
649 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() {
650 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";
651 final String article = paragraph + paragraph;
652
653 final String htmlArticle =
654 "<h1>" + CONTENT_TEXT + "</h1>" +
wychen 2015/10/21 21:00:46 nit: You could probably match the indentation styl
655 "<div itemscope itemtype=\"http://schema.org/Article\">" +
656 paragraph +
657 "<div itemscope itemtype=\"http://schema.org/Article\">" + p aragraph +
658 "</div>" +
659 "</div>" +
660 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" +
661 article + "</div>";
662
663 assertExtractor(article, htmlArticle);
664 }
665
572 public void testOnlyProcessOGNonArticleMovie() { 666 public void testOnlyProcessOGNonArticleMovie() {
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; 667 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";
574 668
575 final String htmlArticle = 669 final String htmlArticle =
576 "<h1>" + CONTENT_TEXT + "</h1>" + 670 "<h1>" + CONTENT_TEXT + "</h1>" +
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 671 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 672 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
579 673
580 // Non-article schema.org types should not use the fast path. 674 // Non-article schema.org types should not use the fast path.
581 assertExtractor(expected, htmlArticle); 675 assertExtractor(expected, htmlArticle);
582 } 676 }
583 } 677 }
OLDNEW
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698