OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
489 mBody.setInnerHTML(""); | 489 mBody.setInnerHTML(""); |
490 Element div = TestUtil.createDiv(0); | 490 Element div = TestUtil.createDiv(0); |
491 mBody.appendChild(div); | 491 mBody.appendChild(div); |
492 | 492 |
493 div.setInnerHTML(html); | 493 div.setInnerHTML(html); |
494 ContentExtractor extractor = new ContentExtractor(mRoot); | 494 ContentExtractor extractor = new ContentExtractor(mRoot); |
495 String extractedContent = extractor.extractContent(); | 495 String extractedContent = extractor.extractContent(); |
496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); | 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); |
497 } | 497 } |
498 | 498 |
499 public void testOnlyProcessArticleElement() { | 499 public void testOnlyProcessArticleElement() { |
wychen
2015/11/13 07:10:57
It might be better to directly test getArticleElem
| |
500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
501 | 501 |
502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>"; | 502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>"; |
503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
504 | 504 |
505 // Make sure everything is there before using the fast path. | 505 // Make sure everything is there before using the fast path. |
506 assertExtractor(expected, html); | 506 assertExtractor(expected, html); |
507 | 507 |
508 final String htmlArticle = | 508 final String htmlArticle = |
509 "<h1>" + CONTENT_TEXT + "</h1>" + | 509 "<h1>" + CONTENT_TEXT + "</h1>" + |
510 "<article>" + article + "</article>"; | 510 "<article>" + article + "</article>"; |
511 | 511 |
512 assertExtractor(article, htmlArticle); | 512 assertExtractor(article, htmlArticle); |
513 } | 513 } |
514 | 514 |
515 public void testOnlyProcessArticleElementWithHiddenArticleElement() { | |
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
517 | |
518 final String htmlArticle = | |
519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
520 "<article>" + article + "</article>" + | |
521 "<article style=\"display:none\">" + article + "</article>"; | |
522 | |
523 assertExtractor(article, htmlArticle); | |
524 } | |
525 | |
515 public void testOnlyProcessArticleElementMultiple() { | 526 public void testOnlyProcessArticleElementMultiple() { |
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 527 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
517 | 528 |
518 final String htmlArticle = | 529 final String htmlArticle = |
519 "<h1>" + CONTENT_TEXT + "</h1>" + | 530 "<h1>" + CONTENT_TEXT + "</h1>" + |
520 "<article>" + article + "</article>" + | 531 "<article>" + article + "</article>" + |
521 "<article>" + article + "</article>"; | 532 "<article>" + article + "</article>"; |
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | 533 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; |
523 | 534 |
524 // The existence of multiple articles disables the fast path. | 535 // The existence of multiple articles disables the fast path. |
525 assertExtractor(expected, htmlArticle); | 536 assertExtractor(expected, htmlArticle); |
526 } | 537 } |
527 | 538 |
539 public void testOnlyProcessArticleElementMultipleWithHiddenArticleElement() { | |
540 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
541 | |
542 final String htmlArticle = | |
543 "<h1>" + CONTENT_TEXT + "</h1>" + | |
544 "<article>" + article + "</article>" + | |
545 "<article style=\"display:none\">" + article + "</article>" + | |
546 "<article>" + article + "</article>"; | |
547 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle; | |
548 | |
549 // The existence of multiple articles disables the fast path. | |
550 assertExtractor(expected, htmlArticle); | |
551 } | |
552 | |
528 public void testOnlyProcessOGArticle() { | 553 public void testOnlyProcessOGArticle() { |
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 554 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
530 | 555 |
531 final String htmlArticle = | 556 final String htmlArticle = |
532 "<h1>" + CONTENT_TEXT + "</h1>" + | 557 "<h1>" + CONTENT_TEXT + "</h1>" + |
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; | 558 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>"; |
534 | 559 |
535 assertExtractor(article, htmlArticle); | 560 assertExtractor(article, htmlArticle); |
536 } | 561 } |
537 | 562 |
563 public void testOnlyProcessOGArticleWithHiddenArticleElement() { | |
564 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
565 | |
566 final String htmlArticle = | |
567 "<h1>" + CONTENT_TEXT + "</h1>" + | |
568 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>" + | |
569 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"displ ay:none\">" + | |
570 article + "</div>"; | |
571 | |
572 assertExtractor(article, htmlArticle); | |
573 } | |
574 | |
538 public void testOnlyProcessOGArticleNews() { | 575 public void testOnlyProcessOGArticleNews() { |
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
540 | 577 |
541 final String htmlArticle = | 578 final String htmlArticle = |
542 "<h1>" + CONTENT_TEXT + "</h1>" + | 579 "<h1>" + CONTENT_TEXT + "</h1>" + |
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; | 580 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>"; |
544 | 581 |
545 assertExtractor(article, htmlArticle); | 582 assertExtractor(article, htmlArticle); |
546 } | 583 } |
547 | 584 |
585 public void testOnlyProcessOGArticleNewsWithHiddenArticleElement() { | |
586 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
587 | |
588 final String htmlArticle = | |
589 "<h1>" + CONTENT_TEXT + "</h1>" + | |
590 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>" + | |
591 "<div itemscope itemtype=\"http://schema.org/NewsArticle\" style=\"d isplay:none\">" + | |
592 article + "</div>"; | |
593 | |
594 assertExtractor(article, htmlArticle); | |
595 } | |
596 | |
548 public void testOnlyProcessOGArticleBlog() { | 597 public void testOnlyProcessOGArticleBlog() { |
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 598 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
550 | 599 |
551 final String htmlArticle = | 600 final String htmlArticle = |
552 "<h1>" + CONTENT_TEXT + "</h1>" + | 601 "<h1>" + CONTENT_TEXT + "</h1>" + |
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; | 602 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>"; |
554 | 603 |
555 assertExtractor(article, htmlArticle); | 604 assertExtractor(article, htmlArticle); |
556 } | 605 } |
557 | 606 |
607 public void testOnlyProcessOGArticleBlogWithHiddenArticleElement() { | |
608 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | |
609 | |
610 final String htmlArticle = | |
611 "<h1>" + CONTENT_TEXT + "</h1>" + | |
612 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>" + | |
613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\" style=\"d isplay:none\">" + | |
614 article + "</div>"; | |
615 | |
616 assertExtractor(article, htmlArticle); | |
617 } | |
618 | |
558 public void testOnlyProcessOGArticleNested() { | 619 public void testOnlyProcessOGArticleNested() { |
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | 620 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; |
560 final String article = paragraph + paragraph; | 621 final String article = paragraph + paragraph; |
561 | 622 |
562 final String htmlArticle = | 623 final String htmlArticle = |
563 "<h1>" + CONTENT_TEXT + "</h1>" + | 624 "<h1>" + CONTENT_TEXT + "</h1>" + |
564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | 625 "<div itemscope itemtype=\"http://schema.org/Article\">" + |
565 paragraph + | 626 paragraph + |
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | 627 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + |
567 "</div>"; | 628 "</div>"; |
568 | 629 |
569 assertExtractor(article, htmlArticle); | 630 assertExtractor(article, htmlArticle); |
570 } | 631 } |
571 | 632 |
633 public void testOnlyProcessOGArticleNestedWithNestedHiddenArticleElement() { | |
634 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
635 final String article = paragraph + paragraph; | |
636 | |
637 final String htmlArticle = | |
638 "<h1>" + CONTENT_TEXT + "</h1>" + | |
639 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
640 paragraph + | |
641 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" + | |
642 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"d isplay:none\">" + | |
643 article + "</div>" + | |
644 "</div>"; | |
645 | |
646 assertExtractor(article, htmlArticle); | |
647 } | |
648 | |
649 public void testOnlyProcessOGArticleNestedWithHiddenArticleElement() { | |
650 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
651 final String article = paragraph + paragraph; | |
652 | |
653 final String htmlArticle = | |
654 "<h1>" + CONTENT_TEXT + "</h1>" + | |
655 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
656 paragraph + | |
657 "<div itemscope itemtype=\"http://schema.org/Article\">" + para graph + "</div>" + | |
658 "</div>" + | |
659 "<div itemscope itemtype=\"http://schema.org/Article\" style=\"disp lay:none\">" + | |
660 article + "</div>"; | |
661 | |
662 assertExtractor(article, htmlArticle); | |
663 } | |
664 | |
572 public void testOnlyProcessOGNonArticleMovie() { | 665 public void testOnlyProcessOGNonArticleMovie() { |
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; | 666 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>"; |
574 | 667 |
575 final String htmlArticle = | 668 final String htmlArticle = |
576 "<h1>" + CONTENT_TEXT + "</h1>" + | 669 "<h1>" + CONTENT_TEXT + "</h1>" + |
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 670 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 671 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
579 | 672 |
580 // Non-article schema.org types should not use the fast path. | 673 // Non-article schema.org types should not use the fast path. |
581 assertExtractor(expected, htmlArticle); | 674 assertExtractor(expected, htmlArticle); |
582 } | 675 } |
583 } | 676 } |
OLD | NEW |