Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 79 assertTrue(parser != null); | 79 assertTrue(parser != null); |
| 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); | 80 assertEquals(MARKUP_PARSER_TITLE, parser.getTitle()); |
| 81 | 81 |
| 82 Document.get().setTitle(TITLE_TEXT); | 82 Document.get().setTitle(TITLE_TEXT); |
| 83 | 83 |
| 84 ContentExtractor extractor = new ContentExtractor(mRoot); | 84 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 85 assertEquals("OpenGraph title should be picked over document.title", | 85 assertEquals("OpenGraph title should be picked over document.title", |
| 86 MARKUP_PARSER_TITLE, extractor.extractTitle()); | 86 MARKUP_PARSER_TITLE, extractor.extractTitle()); |
| 87 } | 87 } |
| 88 | 88 |
| 89 public void testImageWithSrcset() { | 89 public void testImage() { |
| 90 // Test the absolute and different kinds of relative URLs for image sour ces, | 90 // Test the absolute and different kinds of relative URLs for image sour ces, |
| 91 // and also add an extra comma (,) as malformed srcset syntax for robust ness. | 91 // and also add an extra comma (,) as malformed srcset syntax for robust ness. |
| 92 // Also test images in WebImage, WebText, and WebTable. | |
|
wychen
2015/12/09 05:17:40
TODO(wychen): how do I get images inside a WebText
mdjones
2015/12/09 17:40:16
This is an unsolved problem. Since tables are curr
| |
| 92 final String html = | 93 final String html = |
| 93 "<h1>" + CONTENT_TEXT + "</h1>" + | 94 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 94 "<img src=\"image\" srcset=\"image200 200w, //example.org/image400 4 00w\">" + | 95 "<img id=\"a\" style=\"a\" align=\"left\" src=\"image\" srcset=\"ima ge200 200w, //example.org/image400 400w\">" + |
| 96 "<img id=\"a\" style=\"a\" align=\"left\" src=\"image2\">" + | |
| 95 "<table role=\"grid\"><tbody><tr><td>" + | 97 "<table role=\"grid\"><tbody><tr><td>" + |
| 96 "<img src=\"/image\" srcset=\"https://example.com/image2x 2x, /i mage4x 4x,\">" + | 98 "<img id=\"a\" style=\"a\" align=\"left\" src=\"/image\" srcset= \"https://example.com/image2x 2x, /image4x 4x,\">" + |
| 99 "<img id=\"a\" style=\"a\" align=\"left\" src=\"/image2\">" + | |
| 97 "</td></tr></tbody></table>" + | 100 "</td></tr></tbody></table>" + |
| 98 "<p>" + CONTENT_TEXT + "</p>"; | 101 "<p>" + CONTENT_TEXT + "</p>"; |
| 99 | 102 |
| 100 final String expected = | 103 final String expected = |
| 101 "<h1>" + CONTENT_TEXT + "</h1>" + | 104 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 102 "<img src=\"http://example.com/path/image\" " + | 105 "<img src=\"http://example.com/path/image\" " + |
| 103 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + | 106 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + |
| 107 "<img src=\"http://example.com/path/image2\">" + | |
| 104 "<table role=\"grid\"><tbody><tr><td>" + | 108 "<table role=\"grid\"><tbody><tr><td>" + |
| 105 "<img src=\"http://example.com/image\" " + | 109 "<img src=\"http://example.com/image\" " + |
| 106 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + | 110 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + |
| 111 "<img src=\"http://example.com/image2\">" + | |
| 107 "</td></tr></tbody></table>" + | 112 "</td></tr></tbody></table>" + |
| 108 "<p>" + CONTENT_TEXT + "</p>"; | 113 "<p>" + CONTENT_TEXT + "</p>"; |
| 109 | 114 |
| 110 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); | 115 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); |
| 111 mBody.setInnerHTML(html); | 116 mBody.setInnerHTML(html); |
| 112 | 117 |
| 113 ContentExtractor extractor = new ContentExtractor(mRoot); | 118 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 114 String extractedContent = extractor.extractContent(); | 119 String extractedContent = extractor.extractContent(); |
| 115 | 120 |
| 116 assertEquals(expected, | 121 assertEquals(expected, |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 144 } | 149 } |
| 145 | 150 |
| 146 public void testRemoveStyleAttributes() { | 151 public void testRemoveStyleAttributes() { |
| 147 String html = | 152 String html = |
| 148 "<h1 style=\"font-weight: folder\">" + | 153 "<h1 style=\"font-weight: folder\">" + |
| 149 CONTENT_TEXT + | 154 CONTENT_TEXT + |
| 150 "</h1>" + | 155 "</h1>" + |
| 151 "<p style=\"\">" + | 156 "<p style=\"\">" + |
| 152 CONTENT_TEXT + | 157 CONTENT_TEXT + |
| 153 "</p>" + | 158 "</p>" + |
| 159 "<img style=\"align: left\" src=\"/test.png\">" + | |
| 154 "<table style=\"position: absolute\">" + | 160 "<table style=\"position: absolute\">" + |
| 155 "<tbody style=\"font-size: 2\">" + | 161 "<tbody style=\"font-size: 2\">" + |
| 156 "<tr style=\"z-index: 0\">" + | 162 "<tr style=\"z-index: 0\">" + |
| 157 "<th style=\"top: 0px\">" + CONTENT_TEXT + "</th>" + | 163 "<th style=\"top: 0px\">" + CONTENT_TEXT + |
| 164 "<img style=\"align: left\" src=\"/test.png\">" + | |
| 165 "</th>" + | |
| 158 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + | 166 "<th style=\"width: 20px\">" + CONTENT_TEXT + "</th>" + |
| 159 "</tr><tr style=\"left: 0\">" + | 167 "</tr><tr style=\"left: 0\">" + |
| 160 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" + | 168 "<td style=\"display: block\">" + CONTENT_TEXT + "</td>" + |
| 161 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + | 169 "<td style=\"color: #123\">" + CONTENT_TEXT + "</td>" + |
| 162 "</tr>" + | 170 "</tr>" + |
| 163 "</tbody>" + | 171 "</tbody>" + |
| 164 "</table>"; | 172 "</table>"; |
| 165 | 173 |
| 166 final String expected = | 174 final String expected = |
| 167 "<h1>" + | 175 "<h1>" + |
| 168 CONTENT_TEXT + | 176 CONTENT_TEXT + |
| 169 "</h1>" + | 177 "</h1>" + |
| 170 "<p>" + | 178 "<p>" + |
| 171 CONTENT_TEXT + | 179 CONTENT_TEXT + |
| 172 "</p>" + | 180 "</p>" + |
| 181 "<img src=\"http://example.com/test.png\">" + | |
| 173 "<table>" + | 182 "<table>" + |
| 174 "<tbody>" + | 183 "<tbody>" + |
| 175 "<tr>" + | 184 "<tr>" + |
| 176 "<th>" + CONTENT_TEXT + "</th>" + | 185 "<th>" + CONTENT_TEXT + |
| 186 "<img src=\"http://example.com/test.png\">" + | |
| 187 "</th>" + | |
| 177 "<th>" + CONTENT_TEXT + "</th>" + | 188 "<th>" + CONTENT_TEXT + "</th>" + |
| 178 "</tr><tr>" + | 189 "</tr><tr>" + |
| 179 "<td>" + CONTENT_TEXT + "</td>" + | 190 "<td>" + CONTENT_TEXT + "</td>" + |
| 180 "<td>" + CONTENT_TEXT + "</td>" + | 191 "<td>" + CONTENT_TEXT + "</td>" + |
| 181 "</tr>" + | 192 "</tr>" + |
| 182 "</tbody>" + | 193 "</tbody>" + |
| 183 "</table>"; | 194 "</table>"; |
| 184 | 195 |
| 196 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | |
| 185 mBody.setInnerHTML(html); | 197 mBody.setInnerHTML(html); |
| 186 | 198 |
| 187 ContentExtractor extractor = new ContentExtractor(mRoot); | 199 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 188 String extractedContent = extractor.extractContent(); | 200 String extractedContent = extractor.extractContent(); |
| 189 assertEquals(expected, | 201 assertEquals(expected, |
| 190 TestUtil.removeAllDirAttributes(extractedContent)); | 202 TestUtil.removeAllDirAttributes(extractedContent)); |
| 191 } | 203 } |
| 192 | 204 |
| 193 public void testPreserveOrderedList() { | 205 public void testPreserveOrderedList() { |
| 194 Element outerListTag = Document.get().createElement("OL"); | 206 Element outerListTag = Document.get().createElement("OL"); |
| (...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 574 | 586 |
| 575 final String htmlArticle = | 587 final String htmlArticle = |
| 576 "<h1>" + CONTENT_TEXT + "</h1>" + | 588 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 577 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 589 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
| 578 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 590 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 579 | 591 |
| 580 // Non-article schema.org types should not use the fast path. | 592 // Non-article schema.org types should not use the fast path. |
| 581 assertExtractor(expected, htmlArticle); | 593 assertExtractor(expected, htmlArticle); |
| 582 } | 594 } |
| 583 } | 595 } |
| OLD | NEW |