javatests/org/chromium/distiller/ContentExtractorTest.java - Issue 1411603004: Discard hidden articles when using fast path

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master

Patch Set: Comments addressed & master rebased Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« java/org/chromium/distiller/DomUtil.java ('K') | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | javatests/org/chromium/distiller/DomUtilTest.java » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 package org.chromium.distiller;	5 package org.chromium.distiller;

6	6

7 import com.google.gwt.dom.client.Document;	7 import com.google.gwt.dom.client.Document;

8 import com.google.gwt.dom.client.Element;	8 import com.google.gwt.dom.client.Element;

9	9

10 public class ContentExtractorTest extends DomDistillerJsTestCase {	10 public class ContentExtractorTest extends DomDistillerJsTestCase {

(...skipping 538 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
549 mBody.setInnerHTML("");	549 mBody.setInnerHTML("");

550 Element div = TestUtil.createDiv(0);	550 Element div = TestUtil.createDiv(0);

551 mBody.appendChild(div);	551 mBody.appendChild(div);

552	552

553 div.setInnerHTML(html);	553 div.setInnerHTML(html);

554 ContentExtractor extractor = new ContentExtractor(mRoot);	554 ContentExtractor extractor = new ContentExtractor(mRoot);

555 String extractedContent = extractor.extractContent();	555 String extractedContent = extractor.extractContent();

556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );	556 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );

557 }	557 }

558	558

559 public void testOnlyProcessArticleElement() {

560 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

561

562 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>";

563 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;

564

565 // Make sure everything is there before using the fast path.

566 assertExtractor(expected, html);

567

568 final String htmlArticle =

569 "<h1>" + CONTENT_TEXT + "</h1>" +

570 "<article>" + article + "</article>";

571

572 assertExtractor(article, htmlArticle);

573 }

574

575 public void testOnlyProcessArticleElementMultiple() {

576 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

577

578 final String htmlArticle =

579 "<h1>" + CONTENT_TEXT + "</h1>" +

580 "<article>" + article + "</article>" +

581 "<article>" + article + "</article>";

582 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;

583

584 // The existence of multiple articles disables the fast path.

585 assertExtractor(expected, htmlArticle);

586 }

587

588 public void testOnlyProcessOGArticle() {

589 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

590

591 final String htmlArticle =

592 "<h1>" + CONTENT_TEXT + "</h1>" +

593 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";

594

595 assertExtractor(article, htmlArticle);

596 }

597

598 public void testOnlyProcessOGArticleNews() {

599 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

600

601 final String htmlArticle =

602 "<h1>" + CONTENT_TEXT + "</h1>" +

603 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";

604

605 assertExtractor(article, htmlArticle);

606 }

607

608 public void testOnlyProcessOGArticleBlog() {

609 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

610

611 final String htmlArticle =

612 "<h1>" + CONTENT_TEXT + "</h1>" +

613 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";

614

615 assertExtractor(article, htmlArticle);

616 }

617

618 public void testOnlyProcessOGArticleNested() {

619 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";

620 final String article = paragraph + paragraph;

621

622 final String htmlArticle =

623 "<h1>" + CONTENT_TEXT + "</h1>" +

624 "<div itemscope itemtype=\"http://schema.org/Article\">" +

625 paragraph +

626 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +

627 "</div>";

628

629 assertExtractor(article, htmlArticle);

630 }

631

632 public void testOnlyProcessOGNonArticleMovie() {

633 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

634

635 final String htmlArticle =

636 "<h1>" + CONTENT_TEXT + "</h1>" +

637 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";

638 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;

639

640 // Non-article schema.org types should not use the fast path.

641 assertExtractor(expected, htmlArticle);

642 }

643

644 public void testDropCap() {	559 public void testDropCap() {

645 String html =	560 String html =

646 "<h1>" +	561 "<h1>" +

647 CONTENT_TEXT +	562 CONTENT_TEXT +

648 "</h1>" +	563 "</h1>" +

649 "<p>" +	564 "<p>" +

650 "<strong><span style=\"float: left\">T</span>est</strong>" +	565 "<strong><span style=\"float: left\">T</span>est</strong>" +

651 CONTENT_TEXT +	566 CONTENT_TEXT +

652 "</p>";	567 "</p>";

653	568

654 final String expected =	569 final String expected =

655 "<h1>" +	570 "<h1>" +

656 CONTENT_TEXT +	571 CONTENT_TEXT +

657 "</h1>" +	572 "</h1>" +

658 "<p>" +	573 "<p>" +

659 "<strong><span>T</span>est</strong>" +	574 "<strong><span>T</span>est</strong>" +

660 CONTENT_TEXT +	575 CONTENT_TEXT +

661 "</p>";	576 "</p>";

662	577

663 mBody.setInnerHTML(html);	578 mBody.setInnerHTML(html);

664	579

665 ContentExtractor extractor = new ContentExtractor(mRoot);	580 ContentExtractor extractor = new ContentExtractor(mRoot);

666 String extractedContent = extractor.extractContent();	581 String extractedContent = extractor.extractContent();

667 assertEquals(expected,	582 assertEquals(expected,

668 TestUtil.removeAllDirAttributes(extractedContent));	583 TestUtil.removeAllDirAttributes(extractedContent));

669 }	584 }

670 }	585 }

OLD	NEW