Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 | 9 |
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + | 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + |
| 98 "<table role=\"grid\"><tbody><tr><td>" + | 98 "<table role=\"grid\"><tbody><tr><td>" + |
| 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + | 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + |
| 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + | 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + |
| 101 "</td></tr></tbody></table>" + | 101 "</td></tr></tbody></table>" + |
| 102 "<p>" + CONTENT_TEXT + "</p>"; | 102 "<p>" + CONTENT_TEXT + "</p>"; |
| 103 | 103 |
| 104 final String expected = | 104 final String expected = |
| 105 "<h1>" + CONTENT_TEXT + "</h1>" + | 105 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 106 "<img src=\"http://example.com/path/image\" " + | 106 "<img src=\"http://example.com/path/image\" " + |
| 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + | 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\" " + |
| 108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" + | 108 "width=\"0\" height=\"0\">" + |
|
wychen
2016/03/18 08:20:25
I still don't feel 0 is right. If we know nothing
dalmirsilva
2016/03/18 17:20:37
Done.
| |
| 109 "<img alt=\"b\" src=\"http://example.com/path/image2\" " + | |
| 110 "width=\"0\" height=\"0\">" + | |
| 109 "<table role=\"grid\"><tbody><tr><td>" + | 111 "<table role=\"grid\"><tbody><tr><td>" + |
| 110 "<img alt=\"b\" src=\"http://example.com/image\" " + | 112 "<img alt=\"b\" src=\"http://example.com/image\" " + |
| 111 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + | 113 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + |
| 112 "<img src=\"http://example.com/image2\">" + | 114 "<img src=\"http://example.com/image2\">" + |
| 113 "</td></tr></tbody></table>" + | 115 "</td></tr></tbody></table>" + |
| 114 "<p>" + CONTENT_TEXT + "</p>"; | 116 "<p>" + CONTENT_TEXT + "</p>"; |
| 115 | 117 |
| 116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); | 118 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); |
| 117 mBody.setInnerHTML(html); | 119 mBody.setInnerHTML(html); |
| 118 | 120 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 172 "</tbody>" + | 174 "</tbody>" + |
| 173 "</table>"; | 175 "</table>"; |
| 174 | 176 |
| 175 final String expected = | 177 final String expected = |
| 176 "<h1>" + | 178 "<h1>" + |
| 177 CONTENT_TEXT + | 179 CONTENT_TEXT + |
| 178 "</h1>" + | 180 "</h1>" + |
| 179 "<p>" + | 181 "<p>" + |
| 180 CONTENT_TEXT + | 182 CONTENT_TEXT + |
| 181 "</p>" + | 183 "</p>" + |
| 182 "<img src=\"http://example.com/test.png\">" + | 184 "<img src=\"http://example.com/test.png\" " + |
| 185 "width=\"0\" height=\"0\">" + | |
| 183 "<table>" + | 186 "<table>" + |
| 184 "<tbody>" + | 187 "<tbody>" + |
| 185 "<tr>" + | 188 "<tr>" + |
| 186 "<th>" + CONTENT_TEXT + | 189 "<th>" + CONTENT_TEXT + |
| 187 "<img src=\"http://example.com/test.png\">" + | 190 "<img src=\"http://example.com/test.png\">" + |
| 188 "</th>" + | 191 "</th>" + |
| 189 "<th>" + CONTENT_TEXT + "</th>" + | 192 "<th>" + CONTENT_TEXT + "</th>" + |
| 190 "</tr><tr>" + | 193 "</tr><tr>" + |
| 191 "<td>" + CONTENT_TEXT + "</td>" + | 194 "<td>" + CONTENT_TEXT + "</td>" + |
| 192 "<td>" + CONTENT_TEXT + "</td>" + | 195 "<td>" + CONTENT_TEXT + "</td>" + |
| 193 "</tr>" + | 196 "</tr>" + |
| 194 "</tbody>" + | 197 "</tbody>" + |
| 195 "</table>"; | 198 "</table>"; |
| 196 | 199 |
| 197 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | 200 mHead.setInnerHTML("<base href=\"http://example.com/\">"); |
| 198 mBody.setInnerHTML(html); | 201 mBody.setInnerHTML(html); |
| 199 | 202 |
| 200 ContentExtractor extractor = new ContentExtractor(mRoot); | 203 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 201 String extractedContent = extractor.extractContent(); | 204 String extractedContent = extractor.extractContent(); |
| 202 assertEquals(expected, | 205 assertEquals(expected, |
| 203 TestUtil.removeAllDirAttributes(extractedContent)); | 206 TestUtil.removeAllDirAttributes(extractedContent)); |
| 204 } | 207 } |
| 205 | 208 |
| 209 public void testKeepingWidthAndHeightAttributes() { | |
| 210 String html = | |
| 211 "<h1>" + | |
| 212 CONTENT_TEXT + | |
| 213 "</h1>" + | |
| 214 "<p style=\"\">" + | |
|
wychen
2016/03/18 08:20:25
Why specifying style?
dalmirsilva
2016/03/18 17:20:37
Done.
| |
| 215 CONTENT_TEXT + | |
| 216 "</p>" + | |
| 217 "<img style=\"align: left\" src=\"/test.png\" " + | |
| 218 "width=\"200\" height=\"300\">" + | |
| 219 "<img style=\"align: left\" src=\"/test.png\" " + | |
| 220 "width=\"200\">" + | |
| 221 "<img style=\"align: left\" src=\"/test.png\">"; | |
| 222 | |
| 223 final String expected = | |
| 224 "<h1>" + | |
| 225 CONTENT_TEXT + | |
| 226 "</h1>" + | |
| 227 "<p>" + | |
| 228 CONTENT_TEXT + | |
| 229 "</p>" + | |
| 230 "<img src=\"http://example.com/test.png\" " + | |
| 231 "width=\"200\" height=\"300\">" + | |
| 232 "<img src=\"http://example.com/test.png\" " + | |
| 233 "width=\"200\" height=\"0\">" + | |
|
wychen
2016/03/18 08:20:25
0 handling.
dalmirsilva
2016/03/18 17:20:37
Done.
| |
| 234 "<img src=\"http://example.com/test.png\" width=\"0\" " + | |
| 235 "height=\"0\">"; | |
| 236 | |
| 237 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | |
| 238 mBody.setInnerHTML(html); | |
| 239 | |
| 240 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 241 String extractedContent = extractor.extractContent(); | |
| 242 assertEquals(expected, | |
| 243 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 244 } | |
| 245 | |
| 206 public void testPreserveOrderedList() { | 246 public void testPreserveOrderedList() { |
| 207 Element outerListTag = Document.get().createElement("OL"); | 247 Element outerListTag = Document.get().createElement("OL"); |
| 208 mBody.appendChild(outerListTag); | 248 mBody.appendChild(outerListTag); |
| 209 | 249 |
| 210 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 250 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 211 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 251 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 252 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 213 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 253 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 214 | 254 |
| 215 ContentExtractor extractor = new ContentExtractor(mRoot); | 255 ContentExtractor extractor = new ContentExtractor(mRoot); |
| (...skipping 382 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 598 | 638 |
| 599 final String htmlArticle = | 639 final String htmlArticle = |
| 600 "<h1>" + CONTENT_TEXT + "</h1>" + | 640 "<h1>" + CONTENT_TEXT + "</h1>" + |
| 601 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 641 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
| 602 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 642 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
| 603 | 643 |
| 604 // Non-article schema.org types should not use the fast path. | 644 // Non-article schema.org types should not use the fast path. |
| 605 assertExtractor(expected, htmlArticle); | 645 assertExtractor(expected, htmlArticle); |
| 606 } | 646 } |
| 607 } | 647 } |
| OLD | NEW |