Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 489 mBody.setInnerHTML(""); | 489 mBody.setInnerHTML(""); |
| 490 Element div = TestUtil.createDiv(0); | 490 Element div = TestUtil.createDiv(0); |
| 491 mBody.appendChild(div); | 491 mBody.appendChild(div); |
| 492 | 492 |
| 493 div.setInnerHTML(html); | 493 div.setInnerHTML(html); |
| 494 ContentExtractor extractor = new ContentExtractor(mRoot); | 494 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 495 String extractedContent = extractor.extractContent(); | 495 String extractedContent = extractor.extractContent(); |
| 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); | 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); |
| 497 } | 497 } |
| 498 | 498 |
| 499 public void testOnlyProcessArticleElement() { | 499 public void testOnlyProcessArticleElement() { |
|
wychen
2015/11/13 07:10:57
It might be better to directly test getArticleElem
| |
| 500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 501 | 501 |
| 502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>"; | 502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>"; |
| 503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 504 | 504 |
| 505 // Make sure everything is there before using the fast path. | 505 // Make sure everything is there before using the fast path. |
| 506 assertExtractor(expected, html); | 506 assertExtractor(expected, html); |
| 507 | 507 |
| 508 final String htmlArticle = | 508 final String htmlArticle = |
| 509 "<h1>" + CONTENT_TEXT + "</h1>" + | 509 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 510 "<article>" + article + "</article>"; | 510 "<article>" + article + "</article>"; |
| 511 | 511 |
| 512 assertExtractor(article, htmlArticle); | 512 assertExtractor(article, htmlArticle); |
| 513 } | 513 } |
| 514 | 514 |
| 515 public void testOnlyProcessArticleElementWithHiddenArticleElement() { | |
| 516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 517 | |
| 518 final String htmlArticle = | |
| 519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 520 "<article>" + article + "</article>" + | |
| 521 "<article style=\"display:none\">" + article + "</article>"; | |
| 522 | |
| 523 assertExtractor(article, htmlArticle); | |
| 524 } | |
| 525 | |
| 515 public void testOnlyProcessArticleElementMultiple() { | 526 public void testOnlyProcessArticleElementMultiple() { |
| 516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 517 | 528 |
| 518 final String htmlArticle = | 529 final String htmlArticle = |
| 519 "<h1>" + CONTENT_TEXT + "</h1>" + | 530 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 520 "<article>" + article + "</article>" + | 531 "<article>" + article + "</article>" + |
| 521 "<article>" + article + "</article>"; | 532 "<article>" + article + "</article>"; |
| 522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; |
| 523 | 534 |
| 524 // The existence of multiple articles disables the fast path. | 535 // The existence of multiple articles disables the fast path. |
| 525 assertExtractor(expected, htmlArticle); | 536 assertExtractor(expected, htmlArticle); |
| 526 } | 537 } |
| 527 | 538 |
| 539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { | |
| 540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 541 | |
| 542 final String htmlArticle = | |
| 543 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 544 "<article>" + article + "</article>" + | |
| 545 "<article style=\"display:none\">" + article + "</article>" + | |
| 546 "<article>" + article + "</article>"; | |
| 547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | |
| 548 | |
| 549 // The existence of multiple articles disables the fast path. | |
| 550 assertExtractor(expected, htmlArticle); | |
| 551 } | |
| 552 | |
| 528 public void testOnlyProcessOGArticle() { | 553 public void testOnlyProcessOGArticle() { |
| 529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 530 | 555 |
| 531 final String htmlArticle = | 556 final String htmlArticle = |
| 532 "<h1>" + CONTENT_TEXT + "</h1>" + | 557 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; | 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; |
| 534 | 559 |
| 535 assertExtractor(article, htmlArticle); | 560 assertExtractor(article, htmlArticle); |
| 536 } | 561 } |
| 537 | 562 |
| 563 public void testOnlyProcessOGArticleWithHiddenArticleElement() { | |
| 564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 565 | |
| 566 final String htmlArticle = | |
| 567 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + | |
| 569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" + | |
| 570 article + "</div>"; | |
| 571 | |
| 572 assertExtractor(article, htmlArticle); | |
| 573 } | |
| 574 | |
| 538 public void testOnlyProcessOGArticleNews() { | 575 public void testOnlyProcessOGArticleNews() { |
| 539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 540 | 577 |
| 541 final String htmlArticle = | 578 final String htmlArticle = |
| 542 "<h1>" + CONTENT_TEXT + "</h1>" + | 579 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; | 580 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; |
| 544 | 581 |
| 545 assertExtractor(article, htmlArticle); | 582 assertExtractor(article, htmlArticle); |
| 546 } | 583 } |
| 547 | 584 |
| 585 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { | |
| 586 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 587 | |
| 588 final String htmlArticle = | |
| 589 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" + | |
| 591 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" + | |
| 592 article + "</div>"; | |
| 593 | |
| 594 assertExtractor(article, htmlArticle); | |
| 595 } | |
| 596 | |
| 548 public void testOnlyProcessOGArticleBlog() { | 597 public void testOnlyProcessOGArticleBlog() { |
| 549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 598 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 550 | 599 |
| 551 final String htmlArticle = | 600 final String htmlArticle = |
| 552 "<h1>" + CONTENT_TEXT + "</h1>" + | 601 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; | 602 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; |
| 554 | 603 |
| 555 assertExtractor(article, htmlArticle); | 604 assertExtractor(article, htmlArticle); |
| 556 } | 605 } |
| 557 | 606 |
| 607 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { | |
| 608 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
| 609 | |
| 610 final String htmlArticle = | |
| 611 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 612 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" + | |
| 613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" + | |
| 614 article + "</div>"; | |
| 615 | |
| 616 assertExtractor(article, htmlArticle); | |
| 617 } | |
| 618 | |
| 558 public void testOnlyProcessOGArticleNested() { | 619 public void testOnlyProcessOGArticleNested() { |
| 559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | 620 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
| 560 final String article = paragraph + paragraph; | 621 final String article = paragraph + paragraph; |
| 561 | 622 |
| 562 final String htmlArticle = | 623 final String htmlArticle = |
| 563 "<h1>" + CONTENT_TEXT + "</h1>" + | 624 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | 625 "<div itemscope itemtype=\"http://schema.org/Article\">" + |
| 565 paragraph + | 626 paragraph + |
| 566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | 627 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + |
| 567 "</div>"; | 628 "</div>"; |
| 568 | 629 |
| 569 assertExtractor(article, htmlArticle); | 630 assertExtractor(article, htmlArticle); |
| 570 } | 631 } |
| 571 | 632 |
| 633 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { | |
| 634 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 635 final String article = paragraph + paragraph; | |
| 636 | |
| 637 final String htmlArticle = | |
| 638 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 639 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 640 paragraph + | |
| 641 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | |
| 642 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" + | |
| 643 article + "</div>" + | |
| 644 "</div>"; | |
| 645 | |
| 646 assertExtractor(article, htmlArticle); | |
| 647 } | |
| 648 | |
| 649 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { | |
| 650 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 651 final String article = paragraph + paragraph; | |
| 652 | |
| 653 final String htmlArticle = | |
| 654 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 655 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 656 paragraph + | |
| 657 "<div itemscope itemtype=\"http://schema.org/Article\">" + para graph + "</div>" + | |
| 658 "</div>" + | |
| 659 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"disp lay:none\">" + | |
| 660 article + "</div>"; | |
| 661 | |
| 662 assertExtractor(article, htmlArticle); | |
| 663 } | |
| 664 | |
| 572 public void testOnlyProcessOGNonArticleMovie() { | 665 public void testOnlyProcessOGNonArticleMovie() { |
| 573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 666 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
| 574 | 667 |
| 575 final String htmlArticle = | 668 final String htmlArticle = |
| 576 "<h1>" + CONTENT_TEXT + "</h1>" + | 669 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 670 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
| 578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 671 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 579 | 672 |
| 580 // Non-article schema.org types should not use the fast path. | 673 // Non-article schema.org types should not use the fast path. |
| 581 assertExtractor(expected, htmlArticle); | 674 assertExtractor(expected, htmlArticle); |
| 582 } | 675 } |
| 583 } | 676 } |
| OLD | NEW |