| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 488 private void assertExtractor(String expected, String html) { | 488 private void assertExtractor(String expected, String html) { |
| 489 mBody.setInnerHTML(""); | 489 mBody.setInnerHTML(""); |
| 490 Element div = TestUtil.createDiv(0); | 490 Element div = TestUtil.createDiv(0); |
| 491 mBody.appendChild(div); | 491 mBody.appendChild(div); |
| 492 | 492 |
| 493 div.setInnerHTML(html); | 493 div.setInnerHTML(html); |
| 494 ContentExtractor extractor = new ContentExtractor(mRoot); | 494 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 495 String extractedContent = extractor.extractContent(); | 495 String extractedContent = extractor.extractContent(); |
| 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); | 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); |
| 497 } | 497 } |
| 498 | |
| 499 public void testOnlyProcessArticleElement() { | |
| 500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 501 | |
| 502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</
div>"; | |
| 503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
| 504 | |
| 505 // Make sure everything is there before using the fast path. | |
| 506 assertExtractor(expected, html); | |
| 507 | |
| 508 final String htmlArticle = | |
| 509 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 510 "<article>" + article + "</article>"; | |
| 511 | |
| 512 assertExtractor(article, htmlArticle); | |
| 513 } | |
| 514 | |
| 515 public void testOnlyProcessArticleElementMultiple() { | |
| 516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 517 | |
| 518 final String htmlArticle = | |
| 519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 520 "<article>" + article + "</article>" + | |
| 521 "<article>" + article + "</article>"; | |
| 522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti
cle; | |
| 523 | |
| 524 // The existence of multiple articles disables the fast path. | |
| 525 assertExtractor(expected, htmlArticle); | |
| 526 } | |
| 527 | |
| 528 public void testOnlyProcessOGArticle() { | |
| 529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 530 | |
| 531 final String htmlArticle = | |
| 532 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article +
"</div>"; | |
| 534 | |
| 535 assertExtractor(article, htmlArticle); | |
| 536 } | |
| 537 | |
| 538 public void testOnlyProcessOGArticleNews() { | |
| 539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 540 | |
| 541 final String htmlArticle = | |
| 542 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic
le + "</div>"; | |
| 544 | |
| 545 assertExtractor(article, htmlArticle); | |
| 546 } | |
| 547 | |
| 548 public void testOnlyProcessOGArticleBlog() { | |
| 549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 550 | |
| 551 final String htmlArticle = | |
| 552 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic
le + "</div>"; | |
| 554 | |
| 555 assertExtractor(article, htmlArticle); | |
| 556 } | |
| 557 | |
| 558 public void testOnlyProcessOGArticleNested() { | |
| 559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
| 560 final String article = paragraph + paragraph; | |
| 561 | |
| 562 final String htmlArticle = | |
| 563 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
| 565 paragraph + | |
| 566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag
raph + "</div>" + | |
| 567 "</div>"; | |
| 568 | |
| 569 assertExtractor(article, htmlArticle); | |
| 570 } | |
| 571 | |
| 572 public void testOnlyProcessOGNonArticleMovie() { | |
| 573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
| 574 | |
| 575 final String htmlArticle = | |
| 576 "<h1>" + CONTENT_TEXT + "</h1>" + | |
| 577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | |
| 578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
| 579 | |
| 580 // Non-article schema.org types should not use the fast path. | |
| 581 assertExtractor(expected, htmlArticle); | |
| 582 } | |
| 583 } | 498 } |
| OLD | NEW |