OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
136 outerFontTag.appendChild(TestUtil.createText(" ")); | 136 outerFontTag.appendChild(TestUtil.createText(" ")); |
137 | 137 |
138 ContentExtractor extractor = new ContentExtractor(mRoot); | 138 ContentExtractor extractor = new ContentExtractor(mRoot); |
139 String extractedContent = extractor.extractContent(); | 139 String extractedContent = extractor.extractContent(); |
140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + | 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + |
141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + | 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + |
142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", | 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", |
143 TestUtil.removeAllDirAttributes(extractedContent)); | 143 TestUtil.removeAllDirAttributes(extractedContent)); |
144 } | 144 } |
145 | 145 |
146 public void testPreserveOrderedList() { | |
147 Element outerListTag = Document.get().createElement("OL"); | |
148 mBody.appendChild(outerListTag); | |
149 | |
150 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
151 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
152 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
153 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
154 | |
155 ContentExtractor extractor = new ContentExtractor(mRoot); | |
156 String extractedContent = extractor.extractContent(); | |
157 assertEquals("<OL>" + | |
158 "<LI>" + CONTENT_TEXT + "</LI>" + | |
159 "<LI>" + CONTENT_TEXT + "</LI>" + | |
160 "<LI>" + CONTENT_TEXT + "</LI>" + | |
161 "<LI>" + CONTENT_TEXT + "</LI>" + | |
162 "</OL>", | |
163 TestUtil.removeAllDirAttributes(extractedContent)); | |
164 } | |
165 | |
166 public void testPreserveNestedOrderedList() { | |
167 Element outerListTag = Document.get().createElement("OL"); | |
168 Element outerListItem = Document.get().createElement("LI"); | |
169 | |
170 Element innerListTag = Document.get().createElement("OL"); | |
171 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
172 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
173 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
174 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
175 | |
176 outerListItem.appendChild(innerListTag); | |
177 outerListTag.appendChild(outerListItem); | |
178 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
179 | |
180 mBody.appendChild(outerListTag); | |
181 ContentExtractor extractor = new ContentExtractor(mRoot); | |
182 String extractedContent = extractor.extractContent(); | |
183 assertEquals("<OL>" + | |
184 "<LI>" + | |
185 "<OL>" + | |
186 "<LI>" + CONTENT_TEXT + "</LI>" + | |
187 "<LI>" + CONTENT_TEXT + "</LI>" + | |
188 "<LI>" + CONTENT_TEXT + "</LI>" + | |
189 "<LI>" + CONTENT_TEXT + "</LI>" + | |
190 "</OL>" + | |
191 "</LI>" + | |
192 "<LI>" + CONTENT_TEXT + "</LI>" + | |
193 "</OL>", | |
194 TestUtil.removeAllDirAttributes(extractedContent)); | |
195 } | |
196 | |
197 public void testPreserveNestedOrderedListWithOtherElementsInside() { | |
198 Element outerListTag = Document.get().createElement("OL"); | |
199 Element outerListItem = Document.get().createElement("LI"); | |
200 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); | |
201 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
202 | |
203 Element innerListTag = Document.get().createElement("OL"); | |
204 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
205 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
206 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
207 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
208 innerListTag.appendChild(TestUtil.createParagraph("")); | |
209 | |
210 outerListItem.appendChild(innerListTag); | |
211 outerListTag.appendChild(outerListItem); | |
212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
213 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
214 | |
215 mBody.appendChild(outerListTag); | |
216 ContentExtractor extractor = new ContentExtractor(mRoot); | |
217 String extractedContent = extractor.extractContent(); | |
218 assertEquals("<OL>" + | |
219 "<LI>" + CONTENT_TEXT + | |
220 "<p>" + CONTENT_TEXT + "</p>" + | |
221 "<OL>" + | |
222 "<LI>" + CONTENT_TEXT + "</LI>" + | |
223 "<LI>" + CONTENT_TEXT + "</LI>" + | |
224 "<LI>" + CONTENT_TEXT + "</LI>" + | |
225 "<LI>" + CONTENT_TEXT + "</LI>" + | |
226 "</OL>" + | |
227 "</LI>" + | |
228 "<LI>" + CONTENT_TEXT + "</LI>" + | |
229 "<p>" + CONTENT_TEXT + "</p>" + | |
230 "</OL>", | |
231 TestUtil.removeAllDirAttributes(extractedContent)); | |
232 } | |
233 | |
234 public void testPreserveUnorderedList() { | |
235 Element outerListTag = Document.get().createElement("UL"); | |
236 mBody.appendChild(outerListTag); | |
237 | |
238 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
239 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
240 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
241 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
242 | |
243 ContentExtractor extractor = new ContentExtractor(mRoot); | |
244 String extractedContent = extractor.extractContent(); | |
245 assertEquals("<UL>" + | |
246 "<LI>" + CONTENT_TEXT + "</LI>" + | |
247 "<LI>" + CONTENT_TEXT + "</LI>" + | |
248 "<LI>" + CONTENT_TEXT + "</LI>" + | |
249 "<LI>" + CONTENT_TEXT + "</LI>" + | |
250 "</UL>", | |
251 TestUtil.removeAllDirAttributes(extractedContent)); | |
252 } | |
253 | |
254 public void testPreserveNestedUnorderedList() { | |
255 Element outerListTag = Document.get().createElement("UL"); | |
256 Element outerListItem = Document.get().createElement("LI"); | |
257 | |
258 Element innerListTag = Document.get().createElement("UL"); | |
259 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
260 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
261 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
262 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
263 | |
264 outerListItem.appendChild(innerListTag); | |
265 outerListTag.appendChild(outerListItem); | |
266 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
267 | |
268 mBody.appendChild(outerListTag); | |
269 ContentExtractor extractor = new ContentExtractor(mRoot); | |
270 String extractedContent = extractor.extractContent(); | |
271 assertEquals("<UL>" + | |
272 "<LI>" + | |
273 "<UL>" + | |
274 "<LI>" + CONTENT_TEXT + "</LI>" + | |
275 "<LI>" + CONTENT_TEXT + "</LI>" + | |
276 "<LI>" + CONTENT_TEXT + "</LI>" + | |
277 "<LI>" + CONTENT_TEXT + "</LI>" + | |
278 "</UL>" + | |
279 "</LI>" + | |
280 "<LI>" + CONTENT_TEXT + "</LI>" + | |
281 "</UL>", | |
282 TestUtil.removeAllDirAttributes(extractedContent)); | |
283 } | |
284 | |
285 public void testPreserveNestedUnorderedListWithOtherElementsInside() { | |
286 Element outerListTag = Document.get().createElement("UL"); | |
287 Element outerListItem = Document.get().createElement("LI"); | |
288 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); | |
289 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
290 | |
291 Element innerListTag = Document.get().createElement("UL"); | |
292 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
293 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
294 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
295 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
296 innerListTag.appendChild(TestUtil.createParagraph("")); | |
297 | |
298 outerListItem.appendChild(innerListTag); | |
299 outerListTag.appendChild(outerListItem); | |
300 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
301 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
302 | |
303 mBody.appendChild(outerListTag); | |
304 ContentExtractor extractor = new ContentExtractor(mRoot); | |
305 String extractedContent = extractor.extractContent(); | |
306 assertEquals("<UL>" + | |
307 "<LI>" + CONTENT_TEXT + | |
308 "<p>" + CONTENT_TEXT + "</p>" + | |
309 "<UL>" + | |
310 "<LI>" + CONTENT_TEXT + "</LI>" + | |
311 "<LI>" + CONTENT_TEXT + "</LI>" + | |
312 "<LI>" + CONTENT_TEXT + "</LI>" + | |
313 "<LI>" + CONTENT_TEXT + "</LI>" + | |
314 "</UL>" + | |
315 "</LI>" + | |
316 "<LI>" + CONTENT_TEXT + "</LI>" + | |
317 "<p>" + CONTENT_TEXT + "</p>" + | |
318 "</UL>", | |
319 TestUtil.removeAllDirAttributes(extractedContent)); | |
320 } | |
321 | |
322 public void testPreserveUnorderedListWithNestedOrderedList() { | |
323 Element unorderedListTag = Document.get().createElement("UL"); | |
324 Element li = Document.get().createElement("LI"); | |
325 Element orderedList = Document.get().createElement("OL"); | |
326 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
327 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
328 li.appendChild(orderedList); | |
329 unorderedListTag.appendChild(li); | |
330 unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
331 mBody.appendChild(unorderedListTag); | |
332 ContentExtractor extractor = new ContentExtractor(mRoot); | |
333 String extractedContent = extractor.extractContent(); | |
334 assertEquals("<UL>" + | |
335 "<LI>" + | |
336 "<OL>" + | |
337 "<LI>" + CONTENT_TEXT + "</LI>" + | |
338 "<LI>" + CONTENT_TEXT + "</LI>" + | |
339 "</OL>" + | |
340 "</LI>" + | |
341 "<LI>" + CONTENT_TEXT + "</LI>" + | |
342 "</UL>", | |
343 TestUtil.removeAllDirAttributes(extractedContent)); | |
344 } | |
345 | |
346 public void testMalFormedListStructureWithExtraLITagEnd() { | |
wychen
2015/08/04 02:37:01
nit: Malformed is a word, so camel case should fol
| |
347 Element unorderedListTag = Document.get().createElement("UL"); | |
348 String html = "<LI>" + CONTENT_TEXT + "</LI></LI><LI>" + CONTENT_TEXT + "</LI>"; | |
349 unorderedListTag.setInnerHTML(html); | |
350 mBody.appendChild(unorderedListTag); | |
351 ContentExtractor extractor = new ContentExtractor(mRoot); | |
352 String extractedContent = extractor.extractContent(); | |
353 assertEquals("<UL>" + | |
354 "<LI>" + CONTENT_TEXT + "</LI>" + | |
355 "<LI>" + CONTENT_TEXT + "</LI>" + | |
356 "</UL>", | |
357 TestUtil.removeAllDirAttributes(extractedContent)); | |
358 } | |
359 | |
360 public void testMalFormedListStructureWithExtraLITagStart() { | |
361 Element unorderedListTag = Document.get().createElement("OL"); | |
362 String html = "<LI><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + " </LI>"; | |
363 unorderedListTag.setInnerHTML(html); | |
364 mBody.appendChild(unorderedListTag); | |
365 ContentExtractor extractor = new ContentExtractor(mRoot); | |
366 String extractedContent = extractor.extractContent(); | |
367 assertEquals("<OL>" + | |
368 "<LI>" + CONTENT_TEXT + "</LI>" + | |
369 "<LI>" + CONTENT_TEXT + "</LI>" + | |
370 "</OL>", | |
371 TestUtil.removeAllDirAttributes(extractedContent)); | |
372 } | |
373 | |
374 public void testMalFormedListStructureWithExtraOLTagStart() { | |
375 Element unorderedListTag = Document.get().createElement("OL"); | |
376 String html = "<OL><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + " </LI>"; | |
377 unorderedListTag.setInnerHTML(html); | |
378 mBody.appendChild(unorderedListTag); | |
379 ContentExtractor extractor = new ContentExtractor(mRoot); | |
380 String extractedContent = extractor.extractContent(); | |
381 assertEquals("<OL>" + | |
382 "<OL>" + | |
383 "<LI>" + CONTENT_TEXT + "</LI>" + | |
384 "<LI>" + CONTENT_TEXT + "</LI>" + | |
385 "</OL>" + | |
386 "</OL>", | |
387 TestUtil.removeAllDirAttributes(extractedContent)); | |
388 } | |
389 | |
146 private void assertExtractor(String expected, String html) { | 390 private void assertExtractor(String expected, String html) { |
147 mBody.setInnerHTML(""); | 391 mBody.setInnerHTML(""); |
148 Element div = TestUtil.createDiv(0); | 392 Element div = TestUtil.createDiv(0); |
149 mBody.appendChild(div); | 393 mBody.appendChild(div); |
150 | 394 |
151 div.setInnerHTML(html); | 395 div.setInnerHTML(html); |
152 ContentExtractor extractor = new ContentExtractor(mRoot); | 396 ContentExtractor extractor = new ContentExtractor(mRoot); |
153 String extractedContent = extractor.extractContent(); | 397 String extractedContent = extractor.extractContent(); |
154 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); | 398 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); |
155 } | 399 } |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
232 | 476 |
233 final String htmlArticle = | 477 final String htmlArticle = |
234 "<h1>" + CONTENT_TEXT + "</h1>" + | 478 "<h1>" + CONTENT_TEXT + "</h1>" + |
235 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 479 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; |
236 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 480 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
237 | 481 |
238 // Non-article schema.org types should not use the fast path. | 482 // Non-article schema.org types should not use the fast path. |
239 assertExtractor(expected, htmlArticle); | 483 assertExtractor(expected, htmlArticle); |
240 } | 484 } |
241 } | 485 } |
OLD | NEW |