Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + | 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + |
| 98 "<table role=\"grid\"><tbody><tr><td>" + | 98 "<table role=\"grid\"><tbody><tr><td>" + |
| 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + | 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + |
| 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + | 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + |
| 101 "</td></tr></tbody></table>" + | 101 "</td></tr></tbody></table>" + |
| 102 "<p>" + CONTENT_TEXT + "</p>"; | 102 "<p>" + CONTENT_TEXT + "</p>"; |
| 103 | 103 |
| 104 final String expected = | 104 final String expected = |
| 105 "<h1>" + CONTENT_TEXT + "</h1>" + | 105 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 106 "<img src=\"http://example.com/path/image\" " + | 106 "<img src=\"http://example.com/path/image\" " + |
| 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + | 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\" " + |
| 108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" + | 108 "width=\"0\" height=\"0\">" + |
|
wychen
2016/03/11 08:25:35
If the dimension is not specified in the source, w
| |
| 109 "<img alt=\"b\" src=\"http://example.com/path/image2\" " + | |
| 110 "width=\"0\" height=\"0\">" + | |
| 109 "<table role=\"grid\"><tbody><tr><td>" + | 111 "<table role=\"grid\"><tbody><tr><td>" + |
| 110 "<img alt=\"b\" src=\"http://example.com/image\" " + | 112 "<img alt=\"b\" src=\"http://example.com/image\" " + |
| 111 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + | 113 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + |
| 112 "<img src=\"http://example.com/image2\">" + | 114 "<img src=\"http://example.com/image2\">" + |
| 113 "</td></tr></tbody></table>" + | 115 "</td></tr></tbody></table>" + |
| 114 "<p>" + CONTENT_TEXT + "</p>"; | 116 "<p>" + CONTENT_TEXT + "</p>"; |
| 115 | 117 |
| 116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); | 118 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); |
| 117 mBody.setInnerHTML(html); | 119 mBody.setInnerHTML(html); |
| 118 | 120 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 172 "</tbody>" + | 174 "</tbody>" + |
| 173 "</table>"; | 175 "</table>"; |
| 174 | 176 |
| 175 final String expected = | 177 final String expected = |
| 176 "<h1>" + | 178 "<h1>" + |
| 177 CONTENT_TEXT + | 179 CONTENT_TEXT + |
| 178 "</h1>" + | 180 "</h1>" + |
| 179 "<p>" + | 181 "<p>" + |
| 180 CONTENT_TEXT + | 182 CONTENT_TEXT + |
| 181 "</p>" + | 183 "</p>" + |
| 182 "<img src=\"http://example.com/test.png\">" + | 184 "<img src=\"http://example.com/test.png\" " + |
| 185 "width=\"0\" height=\"0\">" + | |
| 183 "<table>" + | 186 "<table>" + |
| 184 "<tbody>" + | 187 "<tbody>" + |
| 185 "<tr>" + | 188 "<tr>" + |
| 186 "<th>" + CONTENT_TEXT + | 189 "<th>" + CONTENT_TEXT + |
| 187 "<img src=\"http://example.com/test.png\">" + | 190 "<img src=\"http://example.com/test.png\">" + |
| 188 "</th>" + | 191 "</th>" + |
| 189 "<th>" + CONTENT_TEXT + "</th>" + | 192 "<th>" + CONTENT_TEXT + "</th>" + |
| 190 "</tr><tr>" + | 193 "</tr><tr>" + |
| 191 "<td>" + CONTENT_TEXT + "</td>" + | 194 "<td>" + CONTENT_TEXT + "</td>" + |
| 192 "<td>" + CONTENT_TEXT + "</td>" + | 195 "<td>" + CONTENT_TEXT + "</td>" + |
| 193 "</tr>" + | 196 "</tr>" + |
| 194 "</tbody>" + | 197 "</tbody>" + |
| 195 "</table>"; | 198 "</table>"; |
| 196 | 199 |
| 197 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | 200 mHead.setInnerHTML("<base href=\"http://example.com/\">"); |
| 198 mBody.setInnerHTML(html); | 201 mBody.setInnerHTML(html); |
| 199 | 202 |
| 200 ContentExtractor extractor = new ContentExtractor(mRoot); | 203 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 201 String extractedContent = extractor.extractContent(); | 204 String extractedContent = extractor.extractContent(); |
| 202 assertEquals(expected, | 205 assertEquals(expected, |
| 203 TestUtil.removeAllDirAttributes(extractedContent)); | 206 TestUtil.removeAllDirAttributes(extractedContent)); |
| 204 } | 207 } |
| 205 | 208 |
| 209 public void testKeepingWidthAndHeightAttributes() { | |
| 210 String html = | |
| 211 "<h1 style=\"font-weight: folder\">" + | |
|
wychen
2016/03/11 08:25:35
The padding text can be simplified. We don't need
dalmirsilva
2016/03/14 18:28:06
Done.
| |
| 212 CONTENT_TEXT + | |
| 213 "</h1>" + | |
| 214 "<p style=\"\">" + | |
| 215 CONTENT_TEXT + | |
| 216 "</p>" + | |
| 217 "<img style=\"align: left\" src=\"/test.png\" " + | |
| 218 "width=\"200\" height=\"300\">"; | |
| 219 | |
| 220 final String expected = | |
| 221 "<h1>" + | |
| 222 CONTENT_TEXT + | |
| 223 "</h1>" + | |
| 224 "<p>" + | |
| 225 CONTENT_TEXT + | |
| 226 "</p>" + | |
| 227 "<img src=\"http://example.com/test.png\" " + | |
| 228 "width=\"200\" height=\"300\">"; | |
|
wychen
2016/03/11 08:25:35
Maybe another img with only width, and one with ne
dalmirsilva
2016/03/14 18:28:06
Done.
| |
| 229 | |
| 230 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | |
| 231 mBody.setInnerHTML(html); | |
| 232 | |
| 233 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 234 String extractedContent = extractor.extractContent(); | |
| 235 assertEquals(expected, | |
| 236 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 237 } | |
| 238 | |
| 206 public void testPreserveOrderedList() { | 239 public void testPreserveOrderedList() { |
| 207 Element outerListTag = Document.get().createElement("OL"); | 240 Element outerListTag = Document.get().createElement("OL"); |
| 208 mBody.appendChild(outerListTag); | 241 mBody.appendChild(outerListTag); |
| 209 | 242 |
| 210 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 243 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 211 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 244 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 245 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 213 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 246 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 214 | 247 |
| 215 ContentExtractor extractor = new ContentExtractor(mRoot); | 248 ContentExtractor extractor = new ContentExtractor(mRoot); |
| (...skipping 382 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 598 | 631 |
| 599 final String htmlArticle = | 632 final String htmlArticle = |
| 600 "<h1>" + CONTENT_TEXT + "</h1>" + | 633 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 601 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 634 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
| 602 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 635 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 603 | 636 |
| 604 // Non-article schema.org types should not use the fast path. | 637 // Non-article schema.org types should not use the fast path. |
| 605 assertExtractor(expected, htmlArticle); | 638 assertExtractor(expected, htmlArticle); |
| 606 } | 639 } |
| 607 } | 640 } |
| OLD | NEW |