javatests/org/chromium/distiller/ContentExtractorTest.java - Issue 1411603004: Discard hidden articles when using fast path

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1411603004: Discard hidden articles when using fast path (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master

Patch Set: wychen's comments addressed Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« java/org/chromium/distiller/DomUtil.java ('K') | « java/org/chromium/distiller/DomUtil.java ('k') | javatests/org/chromium/distiller/DomUtilTest.java » ('j') | javatests/org/chromium/distiller/DomUtilTest.java » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 package org.chromium.distiller;	5 package org.chromium.distiller;

6	6

7 import com.google.gwt.dom.client.Document;	7 import com.google.gwt.dom.client.Document;

8 import com.google.gwt.dom.client.Element;	8 import com.google.gwt.dom.client.Element;

9	9

10 public class ContentExtractorTest extends DomDistillerJsTestCase {	10 public class ContentExtractorTest extends DomDistillerJsTestCase {

(...skipping 477 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
488 private void assertExtractor(String expected, String html) {	488 private void assertExtractor(String expected, String html) {

489 mBody.setInnerHTML("");	489 mBody.setInnerHTML("");

490 Element div = TestUtil.createDiv(0);	490 Element div = TestUtil.createDiv(0);

491 mBody.appendChild(div);	491 mBody.appendChild(div);

492	492

493 div.setInnerHTML(html);	493 div.setInnerHTML(html);

494 ContentExtractor extractor = new ContentExtractor(mRoot);	494 ContentExtractor extractor = new ContentExtractor(mRoot);

495 String extractedContent = extractor.extractContent();	495 String extractedContent = extractor.extractContent();

496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );	496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );

497 }	497 }

498

499 public void testOnlyProcessArticleElement() {

500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

501

502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</ div>";

503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;

504

505 // Make sure everything is there before using the fast path.

506 assertExtractor(expected, html);

507

508 final String htmlArticle =

509 "<h1>" + CONTENT_TEXT + "</h1>" +

510 "<article>" + article + "</article>";

511

512 assertExtractor(article, htmlArticle);

513 }

514

515 public void testOnlyProcessArticleElementMultiple() {

516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

517

518 final String htmlArticle =

519 "<h1>" + CONTENT_TEXT + "</h1>" +

520 "<article>" + article + "</article>" +

521 "<article>" + article + "</article>";

522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti cle;

523

524 // The existence of multiple articles disables the fast path.

525 assertExtractor(expected, htmlArticle);

526 }

527

528 public void testOnlyProcessOGArticle() {

529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

530

531 final String htmlArticle =

532 "<h1>" + CONTENT_TEXT + "</h1>" +

533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article + "</div>";

534

535 assertExtractor(article, htmlArticle);

536 }

537

538 public void testOnlyProcessOGArticleNews() {

539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

540

541 final String htmlArticle =

542 "<h1>" + CONTENT_TEXT + "</h1>" +

543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic le + "</div>";

544

545 assertExtractor(article, htmlArticle);

546 }

547

548 public void testOnlyProcessOGArticleBlog() {

549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

550

551 final String htmlArticle =

552 "<h1>" + CONTENT_TEXT + "</h1>" +

553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic le + "</div>";

554

555 assertExtractor(article, htmlArticle);

556 }

557

558 public void testOnlyProcessOGArticleNested() {

559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>";

560 final String article = paragraph + paragraph;

561

562 final String htmlArticle =

563 "<h1>" + CONTENT_TEXT + "</h1>" +

564 "<div itemscope itemtype=\"http://schema.org/Article\">" +

565 paragraph +

566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag raph + "</div>" +

567 "</div>";

568

569 assertExtractor(article, htmlArticle);

570 }

571

572 public void testOnlyProcessOGNonArticleMovie() {

573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT + "</p>";

574

575 final String htmlArticle =

576 "<h1>" + CONTENT_TEXT + "</h1>" +

577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";

578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;

579

580 // Non-article schema.org types should not use the fast path.

581 assertExtractor(expected, htmlArticle);

582 }

583 }	498 }

OLD	NEW