OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
549 mBody.setInnerHTML(""); | 549 mBody.setInnerHTML(""); |
550 Element div = TestUtil.createDiv(0); | 550 Element div = TestUtil.createDiv(0); |
551 mBody.appendChild(div); | 551 mBody.appendChild(div); |
552 | 552 |
553 div.setInnerHTML(html); | 553 div.setInnerHTML(html); |
554 ContentExtractor extractor = new ContentExtractor(mRoot); | 554 ContentExtractor extractor = new ContentExtractor(mRoot); |
555 String extractedContent = extractor.extractContent(); | 555 String extractedContent = extractor.extractContent(); |
556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); | 556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); |
557 } | 557 } |
558 | 558 |
559 public void testOnlyProcessArticleElement() { | |
560 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
561 | |
562 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</
div>"; | |
563 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
564 | |
565 // Make sure everything is there before using the fast path. | |
566 assertExtractor(expected, html); | |
567 | |
568 final String htmlArticle = | |
569 "<h1>" + CONTENT_TEXT + "</h1>" + | |
570 "<article>" + article + "</article>"; | |
571 | |
572 assertExtractor(article, htmlArticle); | |
573 } | |
574 | |
575 public void testOnlyProcessArticleElementMultiple() { | |
576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
577 | |
578 final String htmlArticle = | |
579 "<h1>" + CONTENT_TEXT + "</h1>" + | |
580 "<article>" + article + "</article>" + | |
581 "<article>" + article + "</article>"; | |
582 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti
cle; | |
583 | |
584 // The existence of multiple articles disables the fast path. | |
585 assertExtractor(expected, htmlArticle); | |
586 } | |
587 | |
588 public void testOnlyProcessOGArticle() { | |
589 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
590 | |
591 final String htmlArticle = | |
592 "<h1>" + CONTENT_TEXT + "</h1>" + | |
593 "<div itemscope itemtype=\"http://schema.org/Article\">" + article +
"</div>"; | |
594 | |
595 assertExtractor(article, htmlArticle); | |
596 } | |
597 | |
598 public void testOnlyProcessOGArticleNews() { | |
599 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
600 | |
601 final String htmlArticle = | |
602 "<h1>" + CONTENT_TEXT + "</h1>" + | |
603 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic
le + "</div>"; | |
604 | |
605 assertExtractor(article, htmlArticle); | |
606 } | |
607 | |
608 public void testOnlyProcessOGArticleBlog() { | |
609 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
610 | |
611 final String htmlArticle = | |
612 "<h1>" + CONTENT_TEXT + "</h1>" + | |
613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic
le + "</div>"; | |
614 | |
615 assertExtractor(article, htmlArticle); | |
616 } | |
617 | |
618 public void testOnlyProcessOGArticleNested() { | |
619 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
620 final String article = paragraph + paragraph; | |
621 | |
622 final String htmlArticle = | |
623 "<h1>" + CONTENT_TEXT + "</h1>" + | |
624 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
625 paragraph + | |
626 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag
raph + "</div>" + | |
627 "</div>"; | |
628 | |
629 assertExtractor(article, htmlArticle); | |
630 } | |
631 | |
632 public void testOnlyProcessOGNonArticleMovie() { | |
633 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
634 | |
635 final String htmlArticle = | |
636 "<h1>" + CONTENT_TEXT + "</h1>" + | |
637 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | |
638 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
639 | |
640 // Non-article schema.org types should not use the fast path. | |
641 assertExtractor(expected, htmlArticle); | |
642 } | |
643 | |
644 public void testDropCap() { | 559 public void testDropCap() { |
645 String html = | 560 String html = |
646 "<h1>" + | 561 "<h1>" + |
647 CONTENT_TEXT + | 562 CONTENT_TEXT + |
648 "</h1>" + | 563 "</h1>" + |
649 "<p>" + | 564 "<p>" + |
650 "<strong><span style=\"float: left\">T</span>est</strong>" + | 565 "<strong><span style=\"float: left\">T</span>est</strong>" + |
651 CONTENT_TEXT + | 566 CONTENT_TEXT + |
652 "</p>"; | 567 "</p>"; |
653 | 568 |
654 final String expected = | 569 final String expected = |
655 "<h1>" + | 570 "<h1>" + |
656 CONTENT_TEXT + | 571 CONTENT_TEXT + |
657 "</h1>" + | 572 "</h1>" + |
658 "<p>" + | 573 "<p>" + |
659 "<strong><span>T</span>est</strong>" + | 574 "<strong><span>T</span>est</strong>" + |
660 CONTENT_TEXT + | 575 CONTENT_TEXT + |
661 "</p>"; | 576 "</p>"; |
662 | 577 |
663 mBody.setInnerHTML(html); | 578 mBody.setInnerHTML(html); |
664 | 579 |
665 ContentExtractor extractor = new ContentExtractor(mRoot); | 580 ContentExtractor extractor = new ContentExtractor(mRoot); |
666 String extractedContent = extractor.extractContent(); | 581 String extractedContent = extractor.extractContent(); |
667 assertEquals(expected, | 582 assertEquals(expected, |
668 TestUtil.removeAllDirAttributes(extractedContent)); | 583 TestUtil.removeAllDirAttributes(extractedContent)); |
669 } | 584 } |
670 } | 585 } |
OLD | NEW |