OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
505 // Make sure everything is there before using the fast path. | 505 // Make sure everything is there before using the fast path. |
506 assertExtractor(expected, html); | 506 assertExtractor(expected, html); |
507 | 507 |
508 final String htmlArticle = | 508 final String htmlArticle = |
509 "<h1>" + CONTENT_TEXT + "</h1>" + | 509 "<h1>" + CONTENT_TEXT + "</h1>" + |
510 "<article>" + article + "</article>"; | 510 "<article>" + article + "</article>"; |
511 | 511 |
512 assertExtractor(article, htmlArticle); | 512 assertExtractor(article, htmlArticle); |
513 } | 513 } |
514 | 514 |
515 public void testOnlyProcessArticleElementWithHiddenArticleElement() { | |
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
517 | |
518 final String htmlArticle = | |
519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
520 "<article>" + article + "</article>" + | |
521 "<article style=\"display:none\">" + article + "</article>"; | |
522 | |
523 assertExtractor(article, htmlArticle); | |
524 } | |
525 | |
515 public void testOnlyProcessArticleElementMultiple() { | 526 public void testOnlyProcessArticleElementMultiple() { |
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
517 | 528 |
518 final String htmlArticle = | 529 final String htmlArticle = |
519 "<h1>" + CONTENT_TEXT + "</h1>" + | 530 "<h1>" + CONTENT_TEXT + "</h1>" + |
520 "<article>" + article + "</article>" + | 531 "<article>" + article + "</article>" + |
521 "<article>" + article + "</article>"; | 532 "<article>" + article + "</article>"; |
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; |
523 | 534 |
524 // The existence of multiple articles disables the fast path. | 535 // The existence of multiple articles disables the fast path. |
525 assertExtractor(expected, htmlArticle); | 536 assertExtractor(expected, htmlArticle); |
526 } | 537 } |
527 | 538 |
539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { | |
540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
541 | |
542 final String htmlArticle = | |
543 "<h1>" + CONTENT_TEXT + "</h1>" + | |
544 "<article>" + article + "</article>" + | |
545 "<article style=\"display:none\">" + article + "</article>" + | |
546 "<article>" + article + "</article>"; | |
547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | |
548 | |
549 // The existence of multiple articles disables the fast path. | |
550 assertExtractor(expected, htmlArticle); | |
551 } | |
552 | |
528 public void testOnlyProcessOGArticle() { | 553 public void testOnlyProcessOGArticle() { |
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
530 | 555 |
531 final String htmlArticle = | 556 final String htmlArticle = |
532 "<h1>" + CONTENT_TEXT + "</h1>" + | 557 "<h1>" + CONTENT_TEXT + "</h1>" + |
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; | 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; |
534 | 559 |
535 assertExtractor(article, htmlArticle); | 560 assertExtractor(article, htmlArticle); |
536 } | 561 } |
537 | 562 |
563 public void testOnlyProcessOGArticleWithHiddenArticleElement() { | |
564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
565 | |
566 final String htmlArticle = | |
567 "<h1>" + CONTENT_TEXT + "</h1>" + | |
568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + | |
569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" + article + "</div>"; | |
570 | |
571 assertExtractor(article, htmlArticle); | |
572 } | |
573 | |
538 public void testOnlyProcessOGArticleNews() { | 574 public void testOnlyProcessOGArticleNews() { |
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 575 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
540 | 576 |
541 final String htmlArticle = | 577 final String htmlArticle = |
542 "<h1>" + CONTENT_TEXT + "</h1>" + | 578 "<h1>" + CONTENT_TEXT + "</h1>" + |
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; | 579 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; |
544 | 580 |
545 assertExtractor(article, htmlArticle); | 581 assertExtractor(article, htmlArticle); |
546 } | 582 } |
547 | 583 |
584 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { | |
585 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
586 | |
587 final String htmlArticle = | |
588 "<h1>" + CONTENT_TEXT + "</h1>" + | |
589 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" + | |
590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" + article + "</div>"; | |
591 | |
592 assertExtractor(article, htmlArticle); | |
593 } | |
594 | |
548 public void testOnlyProcessOGArticleBlog() { | 595 public void testOnlyProcessOGArticleBlog() { |
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 596 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
550 | 597 |
551 final String htmlArticle = | 598 final String htmlArticle = |
552 "<h1>" + CONTENT_TEXT + "</h1>" + | 599 "<h1>" + CONTENT_TEXT + "</h1>" + |
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; | 600 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; |
554 | 601 |
555 assertExtractor(article, htmlArticle); | 602 assertExtractor(article, htmlArticle); |
556 } | 603 } |
557 | 604 |
605 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { | |
606 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
607 | |
608 final String htmlArticle = | |
609 "<h1>" + CONTENT_TEXT + "</h1>" + | |
610 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" + | |
611 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" + article + "</div>"; | |
612 | |
613 assertExtractor(article, htmlArticle); | |
614 } | |
615 | |
558 public void testOnlyProcessOGArticleNested() { | 616 public void testOnlyProcessOGArticleNested() { |
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | 617 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
560 final String article = paragraph + paragraph; | 618 final String article = paragraph + paragraph; |
561 | 619 |
562 final String htmlArticle = | 620 final String htmlArticle = |
563 "<h1>" + CONTENT_TEXT + "</h1>" + | 621 "<h1>" + CONTENT_TEXT + "</h1>" + |
564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | 622 "<div itemscope itemtype=\"http://schema.org/Article\">" + |
565 paragraph + | 623 paragraph + |
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | 624 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + |
567 "</div>"; | 625 "</div>"; |
568 | 626 |
569 assertExtractor(article, htmlArticle); | 627 assertExtractor(article, htmlArticle); |
570 } | 628 } |
571 | 629 |
630 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { | |
631 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
632 final String article = paragraph + paragraph; | |
633 | |
634 final String htmlArticle = | |
635 "<h1>" + CONTENT_TEXT + "</h1>" + | |
636 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
637 paragraph + | |
638 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | |
639 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" + article + "</div>" + | |
640 "</div>"; | |
641 | |
642 assertExtractor(article, htmlArticle); | |
643 } | |
644 | |
645 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { | |
646 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
647 final String article = paragraph + paragraph; | |
648 | |
649 final String htmlArticle = | |
650 "<h1>" + CONTENT_TEXT + "</h1>" + | |
wychen
2015/10/21 17:21:20
nit: inconsistent indentation around here.
| |
651 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
652 paragraph + | |
653 "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" + | |
654 "</div>" + | |
655 "<div itemscope itemtype=\"http://schema.org/Article\" s tyle=\"display:none\">" + article + "</div>"; | |
656 | |
657 assertExtractor(article, htmlArticle); | |
658 } | |
659 | |
572 public void testOnlyProcessOGNonArticleMovie() { | 660 public void testOnlyProcessOGNonArticleMovie() { |
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 661 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
574 | 662 |
575 final String htmlArticle = | 663 final String htmlArticle = |
576 "<h1>" + CONTENT_TEXT + "</h1>" + | 664 "<h1>" + CONTENT_TEXT + "</h1>" + |
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 665 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 666 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
579 | 667 |
580 // Non-article schema.org types should not use the fast path. | 668 // Non-article schema.org types should not use the fast path. |
581 assertExtractor(expected, htmlArticle); | 669 assertExtractor(expected, htmlArticle); |
582 } | 670 } |
583 } | 671 } |
OLD | NEW |