| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 549 mBody.setInnerHTML(""); | 549 mBody.setInnerHTML(""); |
| 550 Element div = TestUtil.createDiv(0); | 550 Element div = TestUtil.createDiv(0); |
| 551 mBody.appendChild(div); | 551 mBody.appendChild(div); |
| 552 | 552 |
| 553 div.setInnerHTML(html); | 553 div.setInnerHTML(html); |
| 554 ContentExtractor extractor = new ContentExtractor(mRoot); | 554 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 555 String extractedContent = extractor.extractContent(); | 555 String extractedContent = extractor.extractContent(); |
| 556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); | 556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); |
| 557 } | 557 } |
| 558 | 558 |
| 559 public void testOnlyProcessArticleElement() { | |
| 560 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 561 | |
| 562 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</
div>"; | |
| 563 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
| 564 | |
| 565 // Make sure everything is there before using the fast path. | |
| 566 assertExtractor(expected, html); | |
| 567 | |
| 568 final String htmlArticle = | |
| 569 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 570 "<article>" + article + "</article>"; | |
| 571 | |
| 572 assertExtractor(article, htmlArticle); | |
| 573 } | |
| 574 | |
| 575 public void testOnlyProcessArticleElementMultiple() { | |
| 576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 577 | |
| 578 final String htmlArticle = | |
| 579 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 580 "<article>" + article + "</article>" + | |
| 581 "<article>" + article + "</article>"; | |
| 582 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti
cle; | |
| 583 | |
| 584 // The existence of multiple articles disables the fast path. | |
| 585 assertExtractor(expected, htmlArticle); | |
| 586 } | |
| 587 | |
| 588 public void testOnlyProcessOGArticle() { | |
| 589 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 590 | |
| 591 final String htmlArticle = | |
| 592 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 593 "<div itemscope itemtype=\"http://schema.org/Article\">" + article +
"</div>"; | |
| 594 | |
| 595 assertExtractor(article, htmlArticle); | |
| 596 } | |
| 597 | |
| 598 public void testOnlyProcessOGArticleNews() { | |
| 599 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 600 | |
| 601 final String htmlArticle = | |
| 602 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 603 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic
le + "</div>"; | |
| 604 | |
| 605 assertExtractor(article, htmlArticle); | |
| 606 } | |
| 607 | |
| 608 public void testOnlyProcessOGArticleBlog() { | |
| 609 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 610 | |
| 611 final String htmlArticle = | |
| 612 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic
le + "</div>"; | |
| 614 | |
| 615 assertExtractor(article, htmlArticle); | |
| 616 } | |
| 617 | |
| 618 public void testOnlyProcessOGArticleNested() { | |
| 619 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 620 final String article = paragraph + paragraph; | |
| 621 | |
| 622 final String htmlArticle = | |
| 623 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 624 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 625 paragraph + | |
| 626 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag
raph + "</div>" + | |
| 627 "</div>"; | |
| 628 | |
| 629 assertExtractor(article, htmlArticle); | |
| 630 } | |
| 631 | |
| 632 public void testOnlyProcessOGNonArticleMovie() { | |
| 633 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 634 | |
| 635 final String htmlArticle = | |
| 636 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 637 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | |
| 638 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
| 639 | |
| 640 // Non-article schema.org types should not use the fast path. | |
| 641 assertExtractor(expected, htmlArticle); | |
| 642 } | |
| 643 | |
| 644 public void testDropCap() { | 559 public void testDropCap() { |
| 645 String html = | 560 String html = |
| 646 "<h1>" + | 561 "<h1>" + |
| 647 CONTENT_TEXT + | 562 CONTENT_TEXT + |
| 648 "</h1>" + | 563 "</h1>" + |
| 649 "<p>" + | 564 "<p>" + |
| 650 "<strong><span style=\"float: left\">T</span>est</strong>" + | 565 "<strong><span style=\"float: left\">T</span>est</strong>" + |
| 651 CONTENT_TEXT + | 566 CONTENT_TEXT + |
| 652 "</p>"; | 567 "</p>"; |
| 653 | 568 |
| 654 final String expected = | 569 final String expected = |
| 655 "<h1>" + | 570 "<h1>" + |
| 656 CONTENT_TEXT + | 571 CONTENT_TEXT + |
| 657 "</h1>" + | 572 "</h1>" + |
| 658 "<p>" + | 573 "<p>" + |
| 659 "<strong><span>T</span>est</strong>" + | 574 "<strong><span>T</span>est</strong>" + |
| 660 CONTENT_TEXT + | 575 CONTENT_TEXT + |
| 661 "</p>"; | 576 "</p>"; |
| 662 | 577 |
| 663 mBody.setInnerHTML(html); | 578 mBody.setInnerHTML(html); |
| 664 | 579 |
| 665 ContentExtractor extractor = new ContentExtractor(mRoot); | 580 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 666 String extractedContent = extractor.extractContent(); | 581 String extractedContent = extractor.extractContent(); |
| 667 assertEquals(expected, | 582 assertEquals(expected, |
| 668 TestUtil.removeAllDirAttributes(extractedContent)); | 583 TestUtil.removeAllDirAttributes(extractedContent)); |
| 669 } | 584 } |
| 670 } | 585 } |
| OLD | NEW |