| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 79 assertTrue(parser != null); | 79 assertTrue(parser != null); |
| 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); | 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); |
| 81 | 81 |
| 82 Document.get().setTitle(TITLE_TEXT); | 82 Document.get().setTitle(TITLE_TEXT); |
| 83 | 83 |
| 84 ContentExtractor extractor = new ContentExtractor(mRoot); | 84 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 85 assertEquals("OpenGraph title should be picked over document.title", | 85 assertEquals("OpenGraph title should be picked over document.title", |
| 86 MARKUP_PARSER_TITLE, extractor.extractTitle()); | 86 MARKUP_PARSER_TITLE, extractor.extractTitle()); |
| 87 } | 87 } |
| 88 | 88 |
| 89 public void testImageWithSrcset() { | 89 public void testImage() { |
| 90 // Test the absolute and different kinds of relative URLs for image sour
ces, | 90 // Test the absolute and different kinds of relative URLs for image sour
ces, |
| 91 // and also add an extra comma (,) as malformed srcset syntax for robust
ness. | 91 // and also add an extra comma (,) as malformed srcset syntax for robust
ness. |
| 92 // Also test images in WebImage and WebTable. |
| 93 // TODO(wychen): add images in WebText when it is supported. |
| 92 final String html = | 94 final String html = |
| 93 "<h1>" + CONTENT_TEXT + "</h1>" + | 95 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 94 "<img src=\"image\" srcset=\"image200 200w, //example.org/image400 4
00w\">" + | 96 "<img id=\"a\" style=\"typo\" align=\"left\" src=\"image\" srcset=\"
image200 200w, //example.org/image400 400w\">" + |
| 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src=
\"image2\">" + |
| 95 "<table role=\"grid\"><tbody><tr><td>" + | 98 "<table role=\"grid\"><tbody><tr><td>" + |
| 96 "<img src=\"/image\" srcset=\"https://example.com/image2x 2x, /i
mage4x 4x,\">" + | 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt
ps://example.com/image2x 2x, /image4x 4x,\">" + |
| 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + |
| 97 "</td></tr></tbody></table>" + | 101 "</td></tr></tbody></table>" + |
| 98 "<p>" + CONTENT_TEXT + "</p>"; | 102 "<p>" + CONTENT_TEXT + "</p>"; |
| 99 | 103 |
| 100 final String expected = | 104 final String expected = |
| 101 "<h1>" + CONTENT_TEXT + "</h1>" + | 105 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 102 "<img src=\"http://example.com/path/image\" " + | 106 "<img src=\"http://example.com/path/image\" " + |
| 103 "srcset=\"http://example.com/path/image200 200w, http://example
.org/image400 400w\">" + | 107 "srcset=\"http://example.com/path/image200 200w, http://example
.org/image400 400w\">" + |
| 108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" + |
| 104 "<table role=\"grid\"><tbody><tr><td>" + | 109 "<table role=\"grid\"><tbody><tr><td>" + |
| 105 "<img src=\"http://example.com/image\" " + | 110 "<img alt=\"b\" src=\"http://example.com/image\" " + |
| 106 "srcset=\"https://example.com/image2x 2x, http://example.co
m/image4x 4x, \">" + | 111 "srcset=\"https://example.com/image2x 2x, http://example.co
m/image4x 4x, \">" + |
| 112 "<img src=\"http://example.com/image2\">" + |
| 107 "</td></tr></tbody></table>" + | 113 "</td></tr></tbody></table>" + |
| 108 "<p>" + CONTENT_TEXT + "</p>"; | 114 "<p>" + CONTENT_TEXT + "</p>"; |
| 109 | 115 |
| 110 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); | 116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); |
| 111 mBody.setInnerHTML(html); | 117 mBody.setInnerHTML(html); |
| 112 | 118 |
| 113 ContentExtractor extractor = new ContentExtractor(mRoot); | 119 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 114 String extractedContent = extractor.extractContent(); | 120 String extractedContent = extractor.extractContent(); |
| 115 | 121 |
| 116 assertEquals(expected, | 122 assertEquals(expected, |
| (...skipping 27 matching lines...) Expand all Loading... |
| 144 } | 150 } |
| 145 | 151 |
| 146 public void testRemoveStyleAttributes() { | 152 public void testRemoveStyleAttributes() { |
| 147 String html = | 153 String html = |
| 148 "<h1 style=\"font-weight: folder\">" + | 154 "<h1 style=\"font-weight: folder\">" + |
| 149 CONTENT_TEXT + | 155 CONTENT_TEXT + |
| 150 "</h1>" + | 156 "</h1>" + |
| 151 "<p style=\"\">" + | 157 "<p style=\"\">" + |
| 152 CONTENT_TEXT + | 158 CONTENT_TEXT + |
| 153 "</p>" + | 159 "</p>" + |
| 160 "<img style=\"align: left\" src=\"/test.png\">" + |
| 154 "<table style=\"position: absolute\">" + | 161 "<table style=\"position: absolute\">" + |
| 155 "<tbody style=\"font-size: 2\">" + | 162 "<tbody style=\"font-size: 2\">" + |
| 156 "<tr style=\"z-index: 0\">" + | 163 "<tr style=\"z-index: 0\">" + |
| 157 "<th style=\"top: 0px\">" + CONTENT_TEXT + "</th>" + | 164 "<th style=\"top: 0px\">" + CONTENT_TEXT + |
| 165 "<img style=\"align: left\" src=\"/test.png\">" + |
| 166 "</th>" + |
| 158 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + | 167 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + |
| 159 "</tr><tr style=\"left: 0\">" + | 168 "</tr><tr style=\"left: 0\">" + |
| 160 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>"
+ | 169 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>"
+ |
| 161 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + | 170 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + |
| 162 "</tr>" + | 171 "</tr>" + |
| 163 "</tbody>" + | 172 "</tbody>" + |
| 164 "</table>"; | 173 "</table>"; |
| 165 | 174 |
| 166 final String expected = | 175 final String expected = |
| 167 "<h1>" + | 176 "<h1>" + |
| 168 CONTENT_TEXT + | 177 CONTENT_TEXT + |
| 169 "</h1>" + | 178 "</h1>" + |
| 170 "<p>" + | 179 "<p>" + |
| 171 CONTENT_TEXT + | 180 CONTENT_TEXT + |
| 172 "</p>" + | 181 "</p>" + |
| 182 "<img src=\"http://example.com/test.png\">" + |
| 173 "<table>" + | 183 "<table>" + |
| 174 "<tbody>" + | 184 "<tbody>" + |
| 175 "<tr>" + | 185 "<tr>" + |
| 176 "<th>" + CONTENT_TEXT + "</th>" + | 186 "<th>" + CONTENT_TEXT + |
| 187 "<img src=\"http://example.com/test.png\">" + |
| 188 "</th>" + |
| 177 "<th>" + CONTENT_TEXT + "</th>" + | 189 "<th>" + CONTENT_TEXT + "</th>" + |
| 178 "</tr><tr>" + | 190 "</tr><tr>" + |
| 179 "<td>" + CONTENT_TEXT + "</td>" + | 191 "<td>" + CONTENT_TEXT + "</td>" + |
| 180 "<td>" + CONTENT_TEXT + "</td>" + | 192 "<td>" + CONTENT_TEXT + "</td>" + |
| 181 "</tr>" + | 193 "</tr>" + |
| 182 "</tbody>" + | 194 "</tbody>" + |
| 183 "</table>"; | 195 "</table>"; |
| 184 | 196 |
| 197 mHead.setInnerHTML("<base href=\"http://example.com/\">"); |
| 185 mBody.setInnerHTML(html); | 198 mBody.setInnerHTML(html); |
| 186 | 199 |
| 187 ContentExtractor extractor = new ContentExtractor(mRoot); | 200 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 188 String extractedContent = extractor.extractContent(); | 201 String extractedContent = extractor.extractContent(); |
| 189 assertEquals(expected, | 202 assertEquals(expected, |
| 190 TestUtil.removeAllDirAttributes(extractedContent)); | 203 TestUtil.removeAllDirAttributes(extractedContent)); |
| 191 } | 204 } |
| 192 | 205 |
| 193 public void testPreserveOrderedList() { | 206 public void testPreserveOrderedList() { |
| 194 Element outerListTag = Document.get().createElement("OL"); | 207 Element outerListTag = Document.get().createElement("OL"); |
| (...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 574 | 587 |
| 575 final String htmlArticle = | 588 final String htmlArticle = |
| 576 "<h1>" + CONTENT_TEXT + "</h1>" + | 589 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | 590 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; |
| 578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 591 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 579 | 592 |
| 580 // Non-article schema.org types should not use the fast path. | 593 // Non-article schema.org types should not use the fast path. |
| 581 assertExtractor(expected, htmlArticle); | 594 assertExtractor(expected, htmlArticle); |
| 582 } | 595 } |
| 583 } | 596 } |
| OLD | NEW |