Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(317)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Fixed imports order Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 outerFontTag.appendChild(TestUtil.createText(" ")); 136 outerFontTag.appendChild(TestUtil.createText(" "));
137 137
138 ContentExtractor extractor = new ContentExtractor(mRoot); 138 ContentExtractor extractor = new ContentExtractor(mRoot);
139 String extractedContent = extractor.extractContent(); 139 String extractedContent = extractor.extractContent();
140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " +
141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" +
142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>",
143 TestUtil.removeAllDirAttributes(extractedContent)); 143 TestUtil.removeAllDirAttributes(extractedContent));
144 } 144 }
145 145
146 public void testPreserveOrderedList() {
147 Element outerListTag = Document.get().createElement("OL");
148 mBody.appendChild(outerListTag);
149
150 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
151 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
152 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
153 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
154
155 ContentExtractor extractor = new ContentExtractor(mRoot);
156 String extractedContent = extractor.extractContent();
157 assertEquals("<OL>" +
158 "<LI>" + CONTENT_TEXT + "</LI>" +
159 "<LI>" + CONTENT_TEXT + "</LI>" +
160 "<LI>" + CONTENT_TEXT + "</LI>" +
161 "<LI>" + CONTENT_TEXT + "</LI>" +
162 "</OL>",
163 TestUtil.removeAllDirAttributes(extractedContent));
164 }
165
166 public void testPreserveNestedOrderedList() {
167 Element outerListTag = Document.get().createElement("OL");
168 Element outerListItem = Document.get().createElement("LI");
169
170 Element innerListTag = Document.get().createElement("OL");
171 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
172 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
173 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
174 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
175
176 outerListItem.appendChild(innerListTag);
177 outerListTag.appendChild(outerListItem);
178 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
179
180 mBody.appendChild(outerListTag);
181 ContentExtractor extractor = new ContentExtractor(mRoot);
182 String extractedContent = extractor.extractContent();
183 assertEquals("<OL>" +
184 "<LI>" +
185 "<OL>" +
186 "<LI>" + CONTENT_TEXT + "</LI>" +
187 "<LI>" + CONTENT_TEXT + "</LI>" +
188 "<LI>" + CONTENT_TEXT + "</LI>" +
189 "<LI>" + CONTENT_TEXT + "</LI>" +
190 "</OL>" +
191 "</LI>" +
192 "<LI>" + CONTENT_TEXT + "</LI>" +
193 "</OL>",
194 TestUtil.removeAllDirAttributes(extractedContent));
195 }
196
197 public void testPreserveNestedOrderedListWithOtherElementsInside() {
198 Element outerListTag = Document.get().createElement("OL");
199 Element outerListItem = Document.get().createElement("LI");
200 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
201 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
202
203 Element innerListTag = Document.get().createElement("OL");
204 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
205 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
206 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
207 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
208 innerListTag.appendChild(TestUtil.createParagraph(""));
209
210 outerListItem.appendChild(innerListTag);
211 outerListTag.appendChild(outerListItem);
212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
213 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
214
215 mBody.appendChild(outerListTag);
216 ContentExtractor extractor = new ContentExtractor(mRoot);
217 String extractedContent = extractor.extractContent();
218 assertEquals("<OL>" +
219 "<LI>" + CONTENT_TEXT +
220 "<p>" + CONTENT_TEXT + "</p>" +
221 "<OL>" +
222 "<LI>" + CONTENT_TEXT + "</LI>" +
223 "<LI>" + CONTENT_TEXT + "</LI>" +
224 "<LI>" + CONTENT_TEXT + "</LI>" +
225 "<LI>" + CONTENT_TEXT + "</LI>" +
226 "</OL>" +
227 "</LI>" +
228 "<LI>" + CONTENT_TEXT + "</LI>" +
229 "<p>" + CONTENT_TEXT + "</p>" +
230 "</OL>",
231 TestUtil.removeAllDirAttributes(extractedContent));
232 }
233
234 public void testPreserveUnorderedList() {
235 Element outerListTag = Document.get().createElement("UL");
236 mBody.appendChild(outerListTag);
237
238 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
239 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
240 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
241 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
242
243 ContentExtractor extractor = new ContentExtractor(mRoot);
244 String extractedContent = extractor.extractContent();
245 assertEquals("<UL>" +
246 "<LI>" + CONTENT_TEXT + "</LI>" +
247 "<LI>" + CONTENT_TEXT + "</LI>" +
248 "<LI>" + CONTENT_TEXT + "</LI>" +
249 "<LI>" + CONTENT_TEXT + "</LI>" +
250 "</UL>",
251 TestUtil.removeAllDirAttributes(extractedContent));
252 }
253
254 public void testPreserveNestedUnorderedList() {
255 Element outerListTag = Document.get().createElement("UL");
256 Element outerListItem = Document.get().createElement("LI");
257
258 Element innerListTag = Document.get().createElement("UL");
259 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
260 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
261 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
262 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
263
264 outerListItem.appendChild(innerListTag);
265 outerListTag.appendChild(outerListItem);
266 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
267
268 mBody.appendChild(outerListTag);
269 ContentExtractor extractor = new ContentExtractor(mRoot);
270 String extractedContent = extractor.extractContent();
271 assertEquals("<UL>" +
272 "<LI>" +
273 "<UL>" +
274 "<LI>" + CONTENT_TEXT + "</LI>" +
275 "<LI>" + CONTENT_TEXT + "</LI>" +
276 "<LI>" + CONTENT_TEXT + "</LI>" +
277 "<LI>" + CONTENT_TEXT + "</LI>" +
278 "</UL>" +
279 "</LI>" +
280 "<LI>" + CONTENT_TEXT + "</LI>" +
281 "</UL>",
282 TestUtil.removeAllDirAttributes(extractedContent));
283 }
284
285 public void testPreserveNestedUnorderedListWithOtherElementsInside() {
286 Element outerListTag = Document.get().createElement("UL");
287 Element outerListItem = Document.get().createElement("LI");
288 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
289 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
290
291 Element innerListTag = Document.get().createElement("UL");
292 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
293 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
294 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
295 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
296 innerListTag.appendChild(TestUtil.createParagraph(""));
297
298 outerListItem.appendChild(innerListTag);
299 outerListTag.appendChild(outerListItem);
300 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
301 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
302
303 mBody.appendChild(outerListTag);
304 ContentExtractor extractor = new ContentExtractor(mRoot);
305 String extractedContent = extractor.extractContent();
306 assertEquals("<UL>" +
307 "<LI>" + CONTENT_TEXT +
308 "<p>" + CONTENT_TEXT + "</p>" +
309 "<UL>" +
310 "<LI>" + CONTENT_TEXT + "</LI>" +
311 "<LI>" + CONTENT_TEXT + "</LI>" +
312 "<LI>" + CONTENT_TEXT + "</LI>" +
313 "<LI>" + CONTENT_TEXT + "</LI>" +
314 "</UL>" +
315 "</LI>" +
316 "<LI>" + CONTENT_TEXT + "</LI>" +
317 "<p>" + CONTENT_TEXT + "</p>" +
318 "</UL>",
319 TestUtil.removeAllDirAttributes(extractedContent));
320 }
321
322 public void testPreserveUnorderedListWithNestedOrderedList() {
323 Element unorderedListTag = Document.get().createElement("UL");
324 Element li = Document.get().createElement("LI");
325 Element orderedList = Document.get().createElement("OL");
326 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
327 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
328 li.appendChild(orderedList);
329 unorderedListTag.appendChild(li);
330 unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
331 mBody.appendChild(unorderedListTag);
332 ContentExtractor extractor = new ContentExtractor(mRoot);
333 String extractedContent = extractor.extractContent();
334 assertEquals("<UL>" +
335 "<LI>" +
336 "<OL>" +
337 "<LI>" + CONTENT_TEXT + "</LI>" +
338 "<LI>" + CONTENT_TEXT + "</LI>" +
339 "</OL>" +
340 "</LI>" +
341 "<LI>" + CONTENT_TEXT + "</LI>" +
342 "</UL>",
343 TestUtil.removeAllDirAttributes(extractedContent));
344 }
345
346 public void testMalFormedListStructureWithExtraLITagEnd() {
wychen 2015/08/04 02:37:01 nit: Malformed is a word, so camel case should fol
347 Element unorderedListTag = Document.get().createElement("UL");
348 String html = "<LI>" + CONTENT_TEXT + "</LI></LI><LI>" + CONTENT_TEXT + "</LI>";
349 unorderedListTag.setInnerHTML(html);
350 mBody.appendChild(unorderedListTag);
351 ContentExtractor extractor = new ContentExtractor(mRoot);
352 String extractedContent = extractor.extractContent();
353 assertEquals("<UL>" +
354 "<LI>" + CONTENT_TEXT + "</LI>" +
355 "<LI>" + CONTENT_TEXT + "</LI>" +
356 "</UL>",
357 TestUtil.removeAllDirAttributes(extractedContent));
358 }
359
360 public void testMalFormedListStructureWithExtraLITagStart() {
361 Element unorderedListTag = Document.get().createElement("OL");
362 String html = "<LI><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + " </LI>";
363 unorderedListTag.setInnerHTML(html);
364 mBody.appendChild(unorderedListTag);
365 ContentExtractor extractor = new ContentExtractor(mRoot);
366 String extractedContent = extractor.extractContent();
367 assertEquals("<OL>" +
368 "<LI>" + CONTENT_TEXT + "</LI>" +
369 "<LI>" + CONTENT_TEXT + "</LI>" +
370 "</OL>",
371 TestUtil.removeAllDirAttributes(extractedContent));
372 }
373
374 public void testMalFormedListStructureWithExtraOLTagStart() {
375 Element unorderedListTag = Document.get().createElement("OL");
376 String html = "<OL><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + " </LI>";
377 unorderedListTag.setInnerHTML(html);
378 mBody.appendChild(unorderedListTag);
379 ContentExtractor extractor = new ContentExtractor(mRoot);
380 String extractedContent = extractor.extractContent();
381 assertEquals("<OL>" +
382 "<OL>" +
383 "<LI>" + CONTENT_TEXT + "</LI>" +
384 "<LI>" + CONTENT_TEXT + "</LI>" +
385 "</OL>" +
386 "</OL>",
387 TestUtil.removeAllDirAttributes(extractedContent));
388 }
389
146 private void assertExtractor(String expected, String html) { 390 private void assertExtractor(String expected, String html) {
147 mBody.setInnerHTML(""); 391 mBody.setInnerHTML("");
148 Element div = TestUtil.createDiv(0); 392 Element div = TestUtil.createDiv(0);
149 mBody.appendChild(div); 393 mBody.appendChild(div);
150 394
151 div.setInnerHTML(html); 395 div.setInnerHTML(html);
152 ContentExtractor extractor = new ContentExtractor(mRoot); 396 ContentExtractor extractor = new ContentExtractor(mRoot);
153 String extractedContent = extractor.extractContent(); 397 String extractedContent = extractor.extractContent();
154 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); 398 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );
155 } 399 }
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 476
233 final String htmlArticle = 477 final String htmlArticle =
234 "<h1>" + CONTENT_TEXT + "</h1>" + 478 "<h1>" + CONTENT_TEXT + "</h1>" +
235 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 479 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
236 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 480 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
237 481
238 // Non-article schema.org types should not use the fast path. 482 // Non-article schema.org types should not use the fast path.
239 assertExtractor(expected, htmlArticle); 483 assertExtractor(expected, htmlArticle);
240 } 484 }
241 } 485 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698