Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1754213004: Retain image sizes (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Moving preserved attributes to the whitelist. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" + 97 "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src= \"image2\">" +
98 "<table role=\"grid\"><tbody><tr><td>" + 98 "<table role=\"grid\"><tbody><tr><td>" +
99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" + 99 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"htt ps://example.com/image2x 2x, /image4x 4x,\">" +
100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" + 100 "<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" +
101 "</td></tr></tbody></table>" + 101 "</td></tr></tbody></table>" +
102 "<p>" + CONTENT_TEXT + "</p>"; 102 "<p>" + CONTENT_TEXT + "</p>";
103 103
104 final String expected = 104 final String expected =
105 "<h1>" + CONTENT_TEXT + "</h1>" + 105 "<h1>" + CONTENT_TEXT + "</h1>" +
106 "<img src=\"http://example.com/path/image\" " + 106 "<img src=\"http://example.com/path/image\" " +
107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\">" + 107 "srcset=\"http://example.com/path/image200 200w, http://example .org/image400 400w\" " +
108 "<img alt=\"b\" src=\"http://example.com/path/image2\">" + 108 "width=\"0\" height=\"0\">" +
wychen 2016/03/11 08:25:35 If the dimension is not specified in the source, w
109 "<img alt=\"b\" src=\"http://example.com/path/image2\" " +
110 "width=\"0\" height=\"0\">" +
109 "<table role=\"grid\"><tbody><tr><td>" + 111 "<table role=\"grid\"><tbody><tr><td>" +
110 "<img alt=\"b\" src=\"http://example.com/image\" " + 112 "<img alt=\"b\" src=\"http://example.com/image\" " +
111 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" + 113 "srcset=\"https://example.com/image2x 2x, http://example.co m/image4x 4x, \">" +
112 "<img src=\"http://example.com/image2\">" + 114 "<img src=\"http://example.com/image2\">" +
113 "</td></tr></tbody></table>" + 115 "</td></tr></tbody></table>" +
114 "<p>" + CONTENT_TEXT + "</p>"; 116 "<p>" + CONTENT_TEXT + "</p>";
115 117
116 mHead.setInnerHTML("<base href=\"http://example.com/path/\">"); 118 mHead.setInnerHTML("<base href=\"http://example.com/path/\">");
117 mBody.setInnerHTML(html); 119 mBody.setInnerHTML(html);
118 120
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
172 "</tbody>" + 174 "</tbody>" +
173 "</table>"; 175 "</table>";
174 176
175 final String expected = 177 final String expected =
176 "<h1>" + 178 "<h1>" +
177 CONTENT_TEXT + 179 CONTENT_TEXT +
178 "</h1>" + 180 "</h1>" +
179 "<p>" + 181 "<p>" +
180 CONTENT_TEXT + 182 CONTENT_TEXT +
181 "</p>" + 183 "</p>" +
182 "<img src=\"http://example.com/test.png\">" + 184 "<img src=\"http://example.com/test.png\" " +
185 "width=\"0\" height=\"0\">" +
183 "<table>" + 186 "<table>" +
184 "<tbody>" + 187 "<tbody>" +
185 "<tr>" + 188 "<tr>" +
186 "<th>" + CONTENT_TEXT + 189 "<th>" + CONTENT_TEXT +
187 "<img src=\"http://example.com/test.png\">" + 190 "<img src=\"http://example.com/test.png\">" +
188 "</th>" + 191 "</th>" +
189 "<th>" + CONTENT_TEXT + "</th>" + 192 "<th>" + CONTENT_TEXT + "</th>" +
190 "</tr><tr>" + 193 "</tr><tr>" +
191 "<td>" + CONTENT_TEXT + "</td>" + 194 "<td>" + CONTENT_TEXT + "</td>" +
192 "<td>" + CONTENT_TEXT + "</td>" + 195 "<td>" + CONTENT_TEXT + "</td>" +
193 "</tr>" + 196 "</tr>" +
194 "</tbody>" + 197 "</tbody>" +
195 "</table>"; 198 "</table>";
196 199
197 mHead.setInnerHTML("<base href=\"http://example.com/\">"); 200 mHead.setInnerHTML("<base href=\"http://example.com/\">");
198 mBody.setInnerHTML(html); 201 mBody.setInnerHTML(html);
199 202
200 ContentExtractor extractor = new ContentExtractor(mRoot); 203 ContentExtractor extractor = new ContentExtractor(mRoot);
201 String extractedContent = extractor.extractContent(); 204 String extractedContent = extractor.extractContent();
202 assertEquals(expected, 205 assertEquals(expected,
203 TestUtil.removeAllDirAttributes(extractedContent)); 206 TestUtil.removeAllDirAttributes(extractedContent));
204 } 207 }
205 208
209 public void testKeepingWidthAndHeightAttributes() {
210 String html =
211 "<h1 style=\"font-weight: folder\">" +
wychen 2016/03/11 08:25:35 The padding text can be simplified. We don't need
dalmirsilva 2016/03/14 18:28:06 Done.
212 CONTENT_TEXT +
213 "</h1>" +
214 "<p style=\"\">" +
215 CONTENT_TEXT +
216 "</p>" +
217 "<img style=\"align: left\" src=\"/test.png\" " +
218 "width=\"200\" height=\"300\">";
219
220 final String expected =
221 "<h1>" +
222 CONTENT_TEXT +
223 "</h1>" +
224 "<p>" +
225 CONTENT_TEXT +
226 "</p>" +
227 "<img src=\"http://example.com/test.png\" " +
228 "width=\"200\" height=\"300\">";
wychen 2016/03/11 08:25:35 Maybe another img with only width, and one with ne
dalmirsilva 2016/03/14 18:28:06 Done.
229
230 mHead.setInnerHTML("<base href=\"http://example.com/\">");
231 mBody.setInnerHTML(html);
232
233 ContentExtractor extractor = new ContentExtractor(mRoot);
234 String extractedContent = extractor.extractContent();
235 assertEquals(expected,
236 TestUtil.removeAllDirAttributes(extractedContent));
237 }
238
206 public void testPreserveOrderedList() { 239 public void testPreserveOrderedList() {
207 Element outerListTag = Document.get().createElement("OL"); 240 Element outerListTag = Document.get().createElement("OL");
208 mBody.appendChild(outerListTag); 241 mBody.appendChild(outerListTag);
209 242
210 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); 243 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
211 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); 244 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); 245 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
213 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); 246 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
214 247
215 ContentExtractor extractor = new ContentExtractor(mRoot); 248 ContentExtractor extractor = new ContentExtractor(mRoot);
(...skipping 382 matching lines...) Expand 10 before | Expand all | Expand 10 after
598 631
599 final String htmlArticle = 632 final String htmlArticle =
600 "<h1>" + CONTENT_TEXT + "</h1>" + 633 "<h1>" + CONTENT_TEXT + "</h1>" +
601 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 634 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
602 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 635 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
603 636
604 // Non-article schema.org types should not use the fast path. 637 // Non-article schema.org types should not use the fast path.
605 assertExtractor(expected, htmlArticle); 638 assertExtractor(expected, htmlArticle);
606 } 639 }
607 } 640 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698