Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 505 // Make sure everything is there before using the fast path. | 505 // Make sure everything is there before using the fast path. |
| 506 assertExtractor(expected, html); | 506 assertExtractor(expected, html); |
| 507 | 507 |
| 508 final String htmlArticle = | 508 final String htmlArticle = |
| 509 "<h1>" + CONTENT_TEXT + "</h1>" + | 509 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 510 "<article>" + article + "</article>"; | 510 "<article>" + article + "</article>"; |
| 511 | 511 |
| 512 assertExtractor(article, htmlArticle); | 512 assertExtractor(article, htmlArticle); |
| 513 } | 513 } |
| 514 | 514 |
| 515 public void testOnlyProcessArticleElementWithHiddenArticleElement() { | |
| 516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 517 | |
| 518 final String htmlArticle = | |
| 519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 520 "<article>" + article + "</article>" + | |
| 521 "<article style=\"display:none\">" + article + "</article>"; | |
| 522 | |
| 523 assertExtractor(article, htmlArticle); | |
| 524 } | |
| 525 | |
| 515 public void testOnlyProcessArticleElementMultiple() { | 526 public void testOnlyProcessArticleElementMultiple() { |
| 516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 517 | 528 |
| 518 final String htmlArticle = | 529 final String htmlArticle = |
| 519 "<h1>" + CONTENT_TEXT + "</h1>" + | 530 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 520 "<article>" + article + "</article>" + | 531 "<article>" + article + "</article>" + |
| 521 "<article>" + article + "</article>"; | 532 "<article>" + article + "</article>"; |
| 522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; |
| 523 | 534 |
| 524 // The existence of multiple articles disables the fast path. | 535 // The existence of multiple articles disables the fast path. |
| 525 assertExtractor(expected, htmlArticle); | 536 assertExtractor(expected, htmlArticle); |
| 526 } | 537 } |
| 527 | 538 |
| 539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { | |
| 540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 541 | |
| 542 final String htmlArticle = | |
| 543 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 544 "<article>" + article + "</article>" + | |
| 545 "<article style=\"display:none\">" + article + "</article>" + | |
| 546 "<article>" + article + "</article>"; | |
| 547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | |
| 548 | |
| 549 // The existence of multiple articles disables the fast path. | |
| 550 assertExtractor(expected, htmlArticle); | |
| 551 } | |
| 552 | |
| 528 public void testOnlyProcessOGArticle() { | 553 public void testOnlyProcessOGArticle() { |
| 529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 530 | 555 |
| 531 final String htmlArticle = | 556 final String htmlArticle = |
| 532 "<h1>" + CONTENT_TEXT + "</h1>" + | 557 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; | 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; |
| 534 | 559 |
| 535 assertExtractor(article, htmlArticle); | 560 assertExtractor(article, htmlArticle); |
| 536 } | 561 } |
| 537 | 562 |
| 563 public void testOnlyProcessOGArticleWithHiddenArticleElement() { | |
| 564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 565 | |
| 566 final String htmlArticle = | |
| 567 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + | |
| 569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" + article + "</div>"; | |
| 570 | |
| 571 assertExtractor(article, htmlArticle); | |
| 572 } | |
| 573 | |
| 538 public void testOnlyProcessOGArticleNews() { | 574 public void testOnlyProcessOGArticleNews() { |
| 539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 575 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 540 | 576 |
| 541 final String htmlArticle = | 577 final String htmlArticle = |
| 542 "<h1>" + CONTENT_TEXT + "</h1>" + | 578 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; | 579 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; |
| 544 | 580 |
| 545 assertExtractor(article, htmlArticle); | 581 assertExtractor(article, htmlArticle); |
| 546 } | 582 } |
| 547 | 583 |
| 584 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { | |
| 585 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 586 | |
| 587 final String htmlArticle = | |
| 588 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 589 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" + | |
| 590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" + article + "</div>"; | |
| 591 | |
| 592 assertExtractor(article, htmlArticle); | |
| 593 } | |
| 594 | |
| 548 public void testOnlyProcessOGArticleBlog() { | 595 public void testOnlyProcessOGArticleBlog() { |
| 549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 596 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 550 | 597 |
| 551 final String htmlArticle = | 598 final String htmlArticle = |
| 552 "<h1>" + CONTENT_TEXT + "</h1>" + | 599 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; | 600 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; |
| 554 | 601 |
| 555 assertExtractor(article, htmlArticle); | 602 assertExtractor(article, htmlArticle); |
| 556 } | 603 } |
| 557 | 604 |
| 605 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { | |
| 606 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 607 | |
| 608 final String htmlArticle = | |
| 609 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 610 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" + | |
| 611 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" + article + "</div>"; | |
| 612 | |
| 613 assertExtractor(article, htmlArticle); | |
| 614 } | |
| 615 | |
| 558 public void testOnlyProcessOGArticleNested() { | 616 public void testOnlyProcessOGArticleNested() { |
| 559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | 617 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
| 560 final String article = paragraph + paragraph; | 618 final String article = paragraph + paragraph; |
| 561 | 619 |
| 562 final String htmlArticle = | 620 final String htmlArticle = |
| 563 "<h1>" + CONTENT_TEXT + "</h1>" + | 621 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | 622 "<div itemscope itemtype=\"http://schema.org/Article\">" + |
| 565 paragraph + | 623 paragraph + |
| 566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | 624 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + |
| 567 "</div>"; | 625 "</div>"; |
| 568 | 626 |
| 569 assertExtractor(article, htmlArticle); | 627 assertExtractor(article, htmlArticle); |
| 570 } | 628 } |
| 571 | 629 |
| 630 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { | |
| 631 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 632 final String article = paragraph + paragraph; | |
| 633 | |
| 634 final String htmlArticle = | |
| 635 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 636 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 637 paragraph + | |
| 638 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | |
| 639 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" + article + "</div>" + | |
| 640 "</div>"; | |
| 641 | |
| 642 assertExtractor(article, htmlArticle); | |
| 643 } | |
| 644 | |
| 645 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { | |
| 646 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 647 final String article = paragraph + paragraph; | |
| 648 | |
| 649 final String htmlArticle = | |
| 650 "<h1>" + CONTENT_TEXT + "</h1>" + | |
|
wychen
2015/10/21 17:21:20
nit: inconsistent indentation around here.
| |
| 651 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 652 paragraph + | |
| 653 "<div itemscope itemtype=\"http://schema.org/Article\">" + paragraph + "</div>" + | |
| 654 "</div>" + | |
| 655 "<div itemscope itemtype=\"http://schema.org/Article\" s tyle=\"display:none\">" + article + "</div>"; | |
| 656 | |
| 657 assertExtractor(article, htmlArticle); | |
| 658 } | |
| 659 | |
| 572 public void testOnlyProcessOGNonArticleMovie() { | 660 public void testOnlyProcessOGNonArticleMovie() { |
| 573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 661 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 574 | 662 |
| 575 final String htmlArticle = | 663 final String htmlArticle = |
| 576 "<h1>" + CONTENT_TEXT + "</h1>" + | 664 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 665 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
| 578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 666 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 579 | 667 |
| 580 // Non-article schema.org types should not use the fast path. | 668 // Non-article schema.org types should not use the fast path. |
| 581 assertExtractor(expected, htmlArticle); | 669 assertExtractor(expected, htmlArticle); |
| 582 } | 670 } |
| 583 } | 671 } |
| OLD | NEW |