Chromium Code Reviews| OLD | NEW | 
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 package org.chromium.distiller; | 5 package org.chromium.distiller; | 
| 6 | 6 | 
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; | 
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; | 
| 9 | 9 | 
| 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 136 outerFontTag.appendChild(TestUtil.createText(" ")); | 136 outerFontTag.appendChild(TestUtil.createText(" ")); | 
| 137 | 137 | 
| 138 ContentExtractor extractor = new ContentExtractor(mRoot); | 138 ContentExtractor extractor = new ContentExtractor(mRoot); | 
| 139 String extractedContent = extractor.extractContent(); | 139 String extractedContent = extractor.extractContent(); | 
| 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + | 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + | 
| 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + | 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + | 
| 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", | 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", | 
| 143 TestUtil.removeAllDirAttributes(extractedContent)); | 143 TestUtil.removeAllDirAttributes(extractedContent)); | 
| 144 } | 144 } | 
| 145 | 145 | 
| 146 public void testPreserveOrderedList() { | |
| 147 Element outerListTag = Document.get().createElement("OL"); | |
| 148 mBody.appendChild(outerListTag); | |
| 149 | |
| 150 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 151 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 152 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 153 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 154 | |
| 155 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 156 String extractedContent = extractor.extractContent(); | |
| 157 assertEquals("<ol>" + | |
| 158 "<li>" + CONTENT_TEXT + "</li>" + | |
| 159 "<li>" + CONTENT_TEXT + "</li>" + | |
| 160 "<li>" + CONTENT_TEXT + "</li>" + | |
| 161 "<li>" + CONTENT_TEXT + "</li>" + | |
| 162 "</ol>", | |
| 163 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 164 } | |
| 165 | |
| 166 public void testPreserveNestedOrderedList() { | |
| 167 Element outerListTag = Document.get().createElement("OL"); | |
| 168 Element outerListItem = Document.get().createElement("LI"); | |
| 169 | |
| 170 Element innerListTag = Document.get().createElement("OL"); | |
| 171 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 172 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 173 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 174 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 175 | |
| 176 outerListItem.appendChild(innerListTag); | |
| 177 outerListTag.appendChild(outerListItem); | |
| 178 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 179 | |
| 180 mBody.appendChild(outerListTag); | |
| 181 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 182 String extractedContent = extractor.extractContent(); | |
| 183 assertEquals("<ol>" + | |
| 184 "<li>" + | |
| 185 "<ol>" + | |
| 186 "<li>" + CONTENT_TEXT + "</li>" + | |
| 
 
wychen
2015/08/01 01:00:20
nitpick: nested html would be more readable if ind
 
 | |
| 187 "<li>" + CONTENT_TEXT + "</li>" + | |
| 188 "<li>" + CONTENT_TEXT + "</li>" + | |
| 189 "<li>" + CONTENT_TEXT + "</li>" + | |
| 190 "</ol>" + | |
| 191 "</li>" + | |
| 192 "<li>" + CONTENT_TEXT + "</li>" + | |
| 193 "</ol>", | |
| 194 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 195 } | |
| 196 | |
| 197 public void testPreserveNestedOrderedListWithOtherElementsInside() { | |
| 198 Element outerListTag = Document.get().createElement("OL"); | |
| 199 Element outerListItem = Document.get().createElement("LI"); | |
| 200 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); | |
| 201 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
| 202 | |
| 203 Element innerListTag = Document.get().createElement("OL"); | |
| 204 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 205 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 206 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 207 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 208 innerListTag.appendChild(TestUtil.createParagraph("")); | |
| 209 | |
| 210 outerListItem.appendChild(innerListTag); | |
| 211 outerListTag.appendChild(outerListItem); | |
| 212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 213 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
| 214 | |
| 215 mBody.appendChild(outerListTag); | |
| 216 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 217 String extractedContent = extractor.extractContent(); | |
| 218 assertEquals("<ol>" + | |
| 219 "<li>" + CONTENT_TEXT + | |
| 220 "<p>" + CONTENT_TEXT + "</p>" + | |
| 221 "<ol>" + | |
| 222 "<li>" + CONTENT_TEXT + "</li>" + | |
| 223 "<li>" + CONTENT_TEXT + "</li>" + | |
| 224 "<li>" + CONTENT_TEXT + "</li>" + | |
| 225 "<li>" + CONTENT_TEXT + "</li>" + | |
| 226 "</ol>" + | |
| 227 "</li>" + | |
| 228 "<li>" + CONTENT_TEXT + "</li>" + | |
| 229 "<p>" + CONTENT_TEXT + "</p>" + | |
| 230 "</ol>", | |
| 231 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 232 } | |
| 233 | |
| 234 public void testPreserveUnorderedList() { | |
| 235 Element outerListTag = Document.get().createElement("UL"); | |
| 236 mBody.appendChild(outerListTag); | |
| 237 | |
| 238 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 239 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 240 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 241 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 242 | |
| 243 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 244 String extractedContent = extractor.extractContent(); | |
| 245 assertEquals("<ul>" + | |
| 246 "<li>" + CONTENT_TEXT + "</li>" + | |
| 247 "<li>" + CONTENT_TEXT + "</li>" + | |
| 248 "<li>" + CONTENT_TEXT + "</li>" + | |
| 249 "<li>" + CONTENT_TEXT + "</li>" + | |
| 250 "</ul>", | |
| 251 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 252 } | |
| 253 | |
| 254 public void testPreserveNestedUnorderedList() { | |
| 
 
wychen
2015/08/01 01:00:20
We can also create a test with malformed html, by
 
 | |
| 255 Element outerListTag = Document.get().createElement("UL"); | |
| 256 Element outerListItem = Document.get().createElement("LI"); | |
| 257 | |
| 258 Element innerListTag = Document.get().createElement("UL"); | |
| 259 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 260 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 261 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 262 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 263 | |
| 264 outerListItem.appendChild(innerListTag); | |
| 265 outerListTag.appendChild(outerListItem); | |
| 266 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 267 | |
| 268 mBody.appendChild(outerListTag); | |
| 269 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 270 String extractedContent = extractor.extractContent(); | |
| 271 assertEquals("<ul>" + | |
| 272 "<li>" + | |
| 273 "<ul>" + | |
| 274 "<li>" + CONTENT_TEXT + "</li>" + | |
| 275 "<li>" + CONTENT_TEXT + "</li>" + | |
| 276 "<li>" + CONTENT_TEXT + "</li>" + | |
| 277 "<li>" + CONTENT_TEXT + "</li>" + | |
| 278 "</ul>" + | |
| 279 "</li>" + | |
| 280 "<li>" + CONTENT_TEXT + "</li>" + | |
| 281 "</ul>", | |
| 282 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 283 } | |
| 284 | |
| 285 public void testPreserveNestedUnorderedListWithOtherElementsInside() { | |
| 286 Element outerListTag = Document.get().createElement("UL"); | |
| 287 Element outerListItem = Document.get().createElement("LI"); | |
| 288 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); | |
| 289 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
| 290 | |
| 291 Element innerListTag = Document.get().createElement("UL"); | |
| 292 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 293 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 294 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 295 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 296 innerListTag.appendChild(TestUtil.createParagraph("")); | |
| 297 | |
| 298 outerListItem.appendChild(innerListTag); | |
| 299 outerListTag.appendChild(outerListItem); | |
| 300 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 301 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); | |
| 302 | |
| 303 mBody.appendChild(outerListTag); | |
| 304 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 305 String extractedContent = extractor.extractContent(); | |
| 306 assertEquals("<ul>" + | |
| 307 "<li>" + CONTENT_TEXT + | |
| 308 "<p>" + CONTENT_TEXT + "</p>" + | |
| 309 "<ul>" + | |
| 310 "<li>" + CONTENT_TEXT + "</li>" + | |
| 311 "<li>" + CONTENT_TEXT + "</li>" + | |
| 312 "<li>" + CONTENT_TEXT + "</li>" + | |
| 313 "<li>" + CONTENT_TEXT + "</li>" + | |
| 314 "</ul>" + | |
| 315 "</li>" + | |
| 316 "<li>" + CONTENT_TEXT + "</li>" + | |
| 317 "<p>" + CONTENT_TEXT + "</p>" + | |
| 318 "</ul>", | |
| 319 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 320 } | |
| 321 | |
| 322 public void testPreserveUnorderedListWithNestedOrderedList() { | |
| 323 Element unorderedListTag = Document.get().createElement("UL"); | |
| 324 Element li = Document.get().createElement("LI"); | |
| 325 Element orderedList = Document.get().createElement("OL"); | |
| 326 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 327 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 328 li.appendChild(orderedList); | |
| 329 unorderedListTag.appendChild(li); | |
| 330 unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); | |
| 331 mBody.appendChild(unorderedListTag); | |
| 332 ContentExtractor extractor = new ContentExtractor(mRoot); | |
| 333 String extractedContent = extractor.extractContent(); | |
| 334 assertEquals("<ul>" + | |
| 335 "<li>" + | |
| 336 "<ol>" + | |
| 337 "<li>" + CONTENT_TEXT + "</li>" + | |
| 338 "<li>" + CONTENT_TEXT + "</li>" + | |
| 339 "</ol>" + | |
| 340 "</li>" + | |
| 341 "<li>" + CONTENT_TEXT + "</li>" + | |
| 342 "</ul>", | |
| 343 TestUtil.removeAllDirAttributes(extractedContent)); | |
| 344 } | |
| 345 | |
| 146 private void assertExtractor(String expected, String html) { | 346 private void assertExtractor(String expected, String html) { | 
| 147 mBody.setInnerHTML(""); | 347 mBody.setInnerHTML(""); | 
| 148 Element div = TestUtil.createDiv(0); | 348 Element div = TestUtil.createDiv(0); | 
| 149 mBody.appendChild(div); | 349 mBody.appendChild(div); | 
| 150 | 350 | 
| 151 div.setInnerHTML(html); | 351 div.setInnerHTML(html); | 
| 152 ContentExtractor extractor = new ContentExtractor(mRoot); | 352 ContentExtractor extractor = new ContentExtractor(mRoot); | 
| 153 String extractedContent = extractor.extractContent(); | 353 String extractedContent = extractor.extractContent(); | 
| 154 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); | 354 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent) ); | 
| 155 } | 355 } | 
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 232 | 432 | 
| 233 final String htmlArticle = | 433 final String htmlArticle = | 
| 234 "<h1>" + CONTENT_TEXT + "</h1>" + | 434 "<h1>" + CONTENT_TEXT + "</h1>" + | 
| 235 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 435 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + " </div>"; | 
| 236 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 436 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 
| 237 | 437 | 
| 238 // Non-article schema.org types should not use the fast path. | 438 // Non-article schema.org types should not use the fast path. | 
| 239 assertExtractor(expected, htmlArticle); | 439 assertExtractor(expected, htmlArticle); | 
| 240 } | 440 } | 
| 241 } | 441 } | 
| OLD | NEW |