Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(634)

Side by Side Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Small code refactor for more appropriate names. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import com.google.gwt.dom.client.Document; 7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element; 8 import com.google.gwt.dom.client.Element;
9 9
10 public class ContentExtractorTest extends DomDistillerJsTestCase { 10 public class ContentExtractorTest extends DomDistillerJsTestCase {
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 outerFontTag.appendChild(TestUtil.createText(" ")); 136 outerFontTag.appendChild(TestUtil.createText(" "));
137 137
138 ContentExtractor extractor = new ContentExtractor(mRoot); 138 ContentExtractor extractor = new ContentExtractor(mRoot);
139 String extractedContent = extractor.extractContent(); 139 String extractedContent = extractor.extractContent();
140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " +
141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" +
142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>",
143 TestUtil.removeAllDirAttributes(extractedContent)); 143 TestUtil.removeAllDirAttributes(extractedContent));
144 } 144 }
145 145
146 public void testPreserveOrderedList() {
147 Element outerListTag = Document.get().createElement("OL");
148 mBody.appendChild(outerListTag);
149
150 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
151 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
152 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
153 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
154
155 ContentExtractor extractor = new ContentExtractor(mRoot);
156 String extractedContent = extractor.extractContent();
157 assertEquals("<ol>" +
158 "<li>" + CONTENT_TEXT + "</li>" +
159 "<li>" + CONTENT_TEXT + "</li>" +
160 "<li>" + CONTENT_TEXT + "</li>" +
161 "<li>" + CONTENT_TEXT + "</li>" +
162 "</ol>",
163 TestUtil.removeAllDirAttributes(extractedContent));
164 }
165
166 public void testPreserveNestedOrderedList() {
167 Element outerListTag = Document.get().createElement("OL");
168 Element outerListItem = Document.get().createElement("LI");
169
170 Element innerListTag = Document.get().createElement("OL");
171 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
172 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
173 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
174 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
175
176 outerListItem.appendChild(innerListTag);
177 outerListTag.appendChild(outerListItem);
178 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
179
180 mBody.appendChild(outerListTag);
181 ContentExtractor extractor = new ContentExtractor(mRoot);
182 String extractedContent = extractor.extractContent();
183 assertEquals("<ol>" +
184 "<li>" +
185 "<ol>" +
186 "<li>" + CONTENT_TEXT + "</li>" +
wychen 2015/08/01 01:00:20 nitpick: nested html would be more readable if ind
187 "<li>" + CONTENT_TEXT + "</li>" +
188 "<li>" + CONTENT_TEXT + "</li>" +
189 "<li>" + CONTENT_TEXT + "</li>" +
190 "</ol>" +
191 "</li>" +
192 "<li>" + CONTENT_TEXT + "</li>" +
193 "</ol>",
194 TestUtil.removeAllDirAttributes(extractedContent));
195 }
196
197 public void testPreserveNestedOrderedListWithOtherElementsInside() {
198 Element outerListTag = Document.get().createElement("OL");
199 Element outerListItem = Document.get().createElement("LI");
200 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
201 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
202
203 Element innerListTag = Document.get().createElement("OL");
204 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
205 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
206 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
207 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
208 innerListTag.appendChild(TestUtil.createParagraph(""));
209
210 outerListItem.appendChild(innerListTag);
211 outerListTag.appendChild(outerListItem);
212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
213 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
214
215 mBody.appendChild(outerListTag);
216 ContentExtractor extractor = new ContentExtractor(mRoot);
217 String extractedContent = extractor.extractContent();
218 assertEquals("<ol>" +
219 "<li>" + CONTENT_TEXT +
220 "<p>" + CONTENT_TEXT + "</p>" +
221 "<ol>" +
222 "<li>" + CONTENT_TEXT + "</li>" +
223 "<li>" + CONTENT_TEXT + "</li>" +
224 "<li>" + CONTENT_TEXT + "</li>" +
225 "<li>" + CONTENT_TEXT + "</li>" +
226 "</ol>" +
227 "</li>" +
228 "<li>" + CONTENT_TEXT + "</li>" +
229 "<p>" + CONTENT_TEXT + "</p>" +
230 "</ol>",
231 TestUtil.removeAllDirAttributes(extractedContent));
232 }
233
234 public void testPreserveUnorderedList() {
235 Element outerListTag = Document.get().createElement("UL");
236 mBody.appendChild(outerListTag);
237
238 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
239 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
240 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
241 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
242
243 ContentExtractor extractor = new ContentExtractor(mRoot);
244 String extractedContent = extractor.extractContent();
245 assertEquals("<ul>" +
246 "<li>" + CONTENT_TEXT + "</li>" +
247 "<li>" + CONTENT_TEXT + "</li>" +
248 "<li>" + CONTENT_TEXT + "</li>" +
249 "<li>" + CONTENT_TEXT + "</li>" +
250 "</ul>",
251 TestUtil.removeAllDirAttributes(extractedContent));
252 }
253
254 public void testPreserveNestedUnorderedList() {
wychen 2015/08/01 01:00:20 We can also create a test with malformed html, by
255 Element outerListTag = Document.get().createElement("UL");
256 Element outerListItem = Document.get().createElement("LI");
257
258 Element innerListTag = Document.get().createElement("UL");
259 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
260 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
261 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
262 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
263
264 outerListItem.appendChild(innerListTag);
265 outerListTag.appendChild(outerListItem);
266 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
267
268 mBody.appendChild(outerListTag);
269 ContentExtractor extractor = new ContentExtractor(mRoot);
270 String extractedContent = extractor.extractContent();
271 assertEquals("<ul>" +
272 "<li>" +
273 "<ul>" +
274 "<li>" + CONTENT_TEXT + "</li>" +
275 "<li>" + CONTENT_TEXT + "</li>" +
276 "<li>" + CONTENT_TEXT + "</li>" +
277 "<li>" + CONTENT_TEXT + "</li>" +
278 "</ul>" +
279 "</li>" +
280 "<li>" + CONTENT_TEXT + "</li>" +
281 "</ul>",
282 TestUtil.removeAllDirAttributes(extractedContent));
283 }
284
285 public void testPreserveNestedUnorderedListWithOtherElementsInside() {
286 Element outerListTag = Document.get().createElement("UL");
287 Element outerListItem = Document.get().createElement("LI");
288 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
289 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
290
291 Element innerListTag = Document.get().createElement("UL");
292 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
293 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
294 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
295 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
296 innerListTag.appendChild(TestUtil.createParagraph(""));
297
298 outerListItem.appendChild(innerListTag);
299 outerListTag.appendChild(outerListItem);
300 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
301 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
302
303 mBody.appendChild(outerListTag);
304 ContentExtractor extractor = new ContentExtractor(mRoot);
305 String extractedContent = extractor.extractContent();
306 assertEquals("<ul>" +
307 "<li>" + CONTENT_TEXT +
308 "<p>" + CONTENT_TEXT + "</p>" +
309 "<ul>" +
310 "<li>" + CONTENT_TEXT + "</li>" +
311 "<li>" + CONTENT_TEXT + "</li>" +
312 "<li>" + CONTENT_TEXT + "</li>" +
313 "<li>" + CONTENT_TEXT + "</li>" +
314 "</ul>" +
315 "</li>" +
316 "<li>" + CONTENT_TEXT + "</li>" +
317 "<p>" + CONTENT_TEXT + "</p>" +
318 "</ul>",
319 TestUtil.removeAllDirAttributes(extractedContent));
320 }
321
322 public void testPreserveUnorderedListWithNestedOrderedList() {
323 Element unorderedListTag = Document.get().createElement("UL");
324 Element li = Document.get().createElement("LI");
325 Element orderedList = Document.get().createElement("OL");
326 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
327 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
328 li.appendChild(orderedList);
329 unorderedListTag.appendChild(li);
330 unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
331 mBody.appendChild(unorderedListTag);
332 ContentExtractor extractor = new ContentExtractor(mRoot);
333 String extractedContent = extractor.extractContent();
334 assertEquals("<ul>" +
335 "<li>" +
336 "<ol>" +
337 "<li>" + CONTENT_TEXT + "</li>" +
338 "<li>" + CONTENT_TEXT + "</li>" +
339 "</ol>" +
340 "</li>" +
341 "<li>" + CONTENT_TEXT + "</li>" +
342 "</ul>",
343 TestUtil.removeAllDirAttributes(extractedContent));
344 }
345
146 private void assertExtractor(String expected, String html) { 346 private void assertExtractor(String expected, String html) {
147 mBody.setInnerHTML(""); 347 mBody.setInnerHTML("");
148 Element div = TestUtil.createDiv(0); 348 Element div = TestUtil.createDiv(0);
149 mBody.appendChild(div); 349 mBody.appendChild(div);
150 350
151 div.setInnerHTML(html); 351 div.setInnerHTML(html);
152 ContentExtractor extractor = new ContentExtractor(mRoot); 352 ContentExtractor extractor = new ContentExtractor(mRoot);
153 String extractedContent = extractor.extractContent(); 353 String extractedContent = extractor.extractContent();
154 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); 354 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) );
155 } 355 }
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 432
233 final String htmlArticle = 433 final String htmlArticle =
234 "<h1>" + CONTENT_TEXT + "</h1>" + 434 "<h1>" + CONTENT_TEXT + "</h1>" +
235 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; 435 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>";
236 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; 436 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article;
237 437
238 // Non-article schema.org types should not use the fast path. 438 // Non-article schema.org types should not use the fast path.
239 assertExtractor(expected, htmlArticle); 439 assertExtractor(expected, htmlArticle);
240 } 440 }
241 } 441 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698