OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
488 private void assertExtractor(String expected, String html) { | 488 private void assertExtractor(String expected, String html) { |
489 mBody.setInnerHTML(""); | 489 mBody.setInnerHTML(""); |
490 Element div = TestUtil.createDiv(0); | 490 Element div = TestUtil.createDiv(0); |
491 mBody.appendChild(div); | 491 mBody.appendChild(div); |
492 | 492 |
493 div.setInnerHTML(html); | 493 div.setInnerHTML(html); |
494 ContentExtractor extractor = new ContentExtractor(mRoot); | 494 ContentExtractor extractor = new ContentExtractor(mRoot); |
495 String extractedContent = extractor.extractContent(); | 495 String extractedContent = extractor.extractContent(); |
496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); | 496 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); |
497 } | 497 } |
498 | |
499 public void testOnlyProcessArticleElement() { | |
500 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
501 | |
502 final String html = "<h1>" + CONTENT_TEXT + "</h1><div>" + article + "</
div>"; | |
503 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
504 | |
505 // Make sure everything is there before using the fast path. | |
506 assertExtractor(expected, html); | |
507 | |
508 final String htmlArticle = | |
509 "<h1>" + CONTENT_TEXT + "</h1>" + | |
510 "<article>" + article + "</article>"; | |
511 | |
512 assertExtractor(article, htmlArticle); | |
513 } | |
514 | |
515 public void testOnlyProcessArticleElementMultiple() { | |
516 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
517 | |
518 final String htmlArticle = | |
519 "<h1>" + CONTENT_TEXT + "</h1>" + | |
520 "<article>" + article + "</article>" + | |
521 "<article>" + article + "</article>"; | |
522 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article + arti
cle; | |
523 | |
524 // The existence of multiple articles disables the fast path. | |
525 assertExtractor(expected, htmlArticle); | |
526 } | |
527 | |
528 public void testOnlyProcessOGArticle() { | |
529 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
530 | |
531 final String htmlArticle = | |
532 "<h1>" + CONTENT_TEXT + "</h1>" + | |
533 "<div itemscope itemtype=\"http://schema.org/Article\">" + article +
"</div>"; | |
534 | |
535 assertExtractor(article, htmlArticle); | |
536 } | |
537 | |
538 public void testOnlyProcessOGArticleNews() { | |
539 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
540 | |
541 final String htmlArticle = | |
542 "<h1>" + CONTENT_TEXT + "</h1>" + | |
543 "<div itemscope itemtype=\"http://schema.org/NewsArticle\">" + artic
le + "</div>"; | |
544 | |
545 assertExtractor(article, htmlArticle); | |
546 } | |
547 | |
548 public void testOnlyProcessOGArticleBlog() { | |
549 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
550 | |
551 final String htmlArticle = | |
552 "<h1>" + CONTENT_TEXT + "</h1>" + | |
553 "<div itemscope itemtype=\"http://schema.org/BlogPosting\">" + artic
le + "</div>"; | |
554 | |
555 assertExtractor(article, htmlArticle); | |
556 } | |
557 | |
558 public void testOnlyProcessOGArticleNested() { | |
559 final String paragraph = "<p>" + CONTENT_TEXT + "</p>"; | |
560 final String article = paragraph + paragraph; | |
561 | |
562 final String htmlArticle = | |
563 "<h1>" + CONTENT_TEXT + "</h1>" + | |
564 "<div itemscope itemtype=\"http://schema.org/Article\">" + | |
565 paragraph + | |
566 "<div itemscope itemtype=\"http://schema.org/Article\">" + parag
raph + "</div>" + | |
567 "</div>"; | |
568 | |
569 assertExtractor(article, htmlArticle); | |
570 } | |
571 | |
572 public void testOnlyProcessOGNonArticleMovie() { | |
573 final String article = "<p>" + CONTENT_TEXT + "</p><p>" + CONTENT_TEXT +
"</p>"; | |
574 | |
575 final String htmlArticle = | |
576 "<h1>" + CONTENT_TEXT + "</h1>" + | |
577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | |
578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | |
579 | |
580 // Non-article schema.org types should not use the fast path. | |
581 assertExtractor(expected, htmlArticle); | |
582 } | |
583 } | 498 } |
OLD | NEW |