OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + | 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + |
98 "<table role=\"grid\"><tbody><tr><td>" + | 98 "<table role=\"grid\"><tbody><tr><td>" + |
99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + | 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + |
100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + | 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + |
101 "</td></tr></tbody></table>" + | 101 "</td></tr></tbody></table>" + |
102 "<p>" + CONTENT_TEXT + "</p>"; | 102 "<p>" + CONTENT_TEXT + "</p>"; |
103 | 103 |
104 final String expected = | 104 final String expected = |
105 "<h1>" + CONTENT_TEXT + "</h1>" + | 105 "<h1>" + CONTENT_TEXT + "</h1>" + |
106 "<img src=\"http://example.com/path/image\" " + | 106 "<img src=\"http://example.com/path/image\" " + |
107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + | 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\" " + |
108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" + | 108 "width=\"0\" height=\"0\">" + |
wychen
2016/03/18 08:20:25
I still don't feel 0 is right. If we know nothing
dalmirsilva
2016/03/18 17:20:37
Done.
| |
109 "<img alt=\"b\" src=\"http://example.com/path/image2\" " + | |
110 "width=\"0\" height=\"0\">" + | |
109 "<table role=\"grid\"><tbody><tr><td>" + | 111 "<table role=\"grid\"><tbody><tr><td>" + |
110 "<img alt=\"b\" src=\"http://example.com/image\" " + | 112 "<img alt=\"b\" src=\"http://example.com/image\" " + |
111 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + | 113 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + |
112 "<img src=\"http://example.com/image2\">" + | 114 "<img src=\"http://example.com/image2\">" + |
113 "</td></tr></tbody></table>" + | 115 "</td></tr></tbody></table>" + |
114 "<p>" + CONTENT_TEXT + "</p>"; | 116 "<p>" + CONTENT_TEXT + "</p>"; |
115 | 117 |
116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); | 118 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); |
117 mBody.setInnerHTML(html); | 119 mBody.setInnerHTML(html); |
118 | 120 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
172 "</tbody>" + | 174 "</tbody>" + |
173 "</table>"; | 175 "</table>"; |
174 | 176 |
175 final String expected = | 177 final String expected = |
176 "<h1>" + | 178 "<h1>" + |
177 CONTENT_TEXT + | 179 CONTENT_TEXT + |
178 "</h1>" + | 180 "</h1>" + |
179 "<p>" + | 181 "<p>" + |
180 CONTENT_TEXT + | 182 CONTENT_TEXT + |
181 "</p>" + | 183 "</p>" + |
182 "<img src=\"http://example.com/test.png\">" + | 184 "<img src=\"http://example.com/test.png\" " + |
185 "width=\"0\" height=\"0\">" + | |
183 "<table>" + | 186 "<table>" + |
184 "<tbody>" + | 187 "<tbody>" + |
185 "<tr>" + | 188 "<tr>" + |
186 "<th>" + CONTENT_TEXT + | 189 "<th>" + CONTENT_TEXT + |
187 "<img src=\"http://example.com/test.png\">" + | 190 "<img src=\"http://example.com/test.png\">" + |
188 "</th>" + | 191 "</th>" + |
189 "<th>" + CONTENT_TEXT + "</th>" + | 192 "<th>" + CONTENT_TEXT + "</th>" + |
190 "</tr><tr>" + | 193 "</tr><tr>" + |
191 "<td>" + CONTENT_TEXT + "</td>" + | 194 "<td>" + CONTENT_TEXT + "</td>" + |
192 "<td>" + CONTENT_TEXT + "</td>" + | 195 "<td>" + CONTENT_TEXT + "</td>" + |
193 "</tr>" + | 196 "</tr>" + |
194 "</tbody>" + | 197 "</tbody>" + |
195 "</table>"; | 198 "</table>"; |
196 | 199 |
197 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | 200 mHead.setInnerHTML("<base href=\"http://example.com/\">"); |
198 mBody.setInnerHTML(html); | 201 mBody.setInnerHTML(html); |
199 | 202 |
200 ContentExtractor extractor = new ContentExtractor(mRoot); | 203 ContentExtractor extractor = new ContentExtractor(mRoot); |
201 String extractedContent = extractor.extractContent(); | 204 String extractedContent = extractor.extractContent(); |
202 assertEquals(expected, | 205 assertEquals(expected, |
203 TestUtil.removeAllDirAttributes(extractedContent)); | 206 TestUtil.removeAllDirAttributes(extractedContent)); |
204 } | 207 } |
205 | 208 |
209 public void testKeepingWidthAndHeightAttributes() { | |
210 String html = | |
211 "<h1>" + | |
212 CONTENT_TEXT + | |
213 "</h1>" + | |
214 "<p style=\"\">" + | |
wychen
2016/03/18 08:20:25
Why specifying style?
dalmirsilva
2016/03/18 17:20:37
Done.
| |
215 CONTENT_TEXT + | |
216 "</p>" + | |
217 "<img style=\"align: left\" src=\"/test.png\" " + | |
218 "width=\"200\" height=\"300\">" + | |
219 "<img style=\"align: left\" src=\"/test.png\" " + | |
220 "width=\"200\">" + | |
221 "<img style=\"align: left\" src=\"/test.png\">"; | |
222 | |
223 final String expected = | |
224 "<h1>" + | |
225 CONTENT_TEXT + | |
226 "</h1>" + | |
227 "<p>" + | |
228 CONTENT_TEXT + | |
229 "</p>" + | |
230 "<img src=\"http://example.com/test.png\" " + | |
231 "width=\"200\" height=\"300\">" + | |
232 "<img src=\"http://example.com/test.png\" " + | |
233 "width=\"200\" height=\"0\">" + | |
wychen
2016/03/18 08:20:25
0 handling.
dalmirsilva
2016/03/18 17:20:37
Done.
| |
234 "<img src=\"http://example.com/test.png\" width=\"0\" " + | |
235 "height=\"0\">"; | |
236 | |
237 mHead.setInnerHTML("<base href=\"http://example.com/\">"); | |
238 mBody.setInnerHTML(html); | |
239 | |
240 ContentExtractor extractor = new ContentExtractor(mRoot); | |
241 String extractedContent = extractor.extractContent(); | |
242 assertEquals(expected, | |
243 TestUtil.removeAllDirAttributes(extractedContent)); | |
244 } | |
245 | |
206 public void testPreserveOrderedList() { | 246 public void testPreserveOrderedList() { |
207 Element outerListTag = Document.get().createElement("OL"); | 247 Element outerListTag = Document.get().createElement("OL"); |
208 mBody.appendChild(outerListTag); | 248 mBody.appendChild(outerListTag); |
209 | 249 |
210 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 250 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
211 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 251 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 252 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
213 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | 253 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
214 | 254 |
215 ContentExtractor extractor = new ContentExtractor(mRoot); | 255 ContentExtractor extractor = new ContentExtractor(mRoot); |
(...skipping 382 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
598 | 638 |
599 final String htmlArticle = | 639 final String htmlArticle = |
600 "<h1>" + CONTENT_TEXT + "</h1>" + | 640 "<h1>" + CONTENT_TEXT + "</h1>" + |
601 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 641 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
602 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 642 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
603 | 643 |
604 // Non-article schema.org types should not use the fast path. | 644 // Non-article schema.org types should not use the fast path. |
605 assertExtractor(expected, htmlArticle); | 645 assertExtractor(expected, htmlArticle); |
606 } | 646 } |
607 } | 647 } |
OLD | NEW |