OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
9 | 9 |
10 public class ContentExtractorTest extends DomDistillerJsTestCase { | 10 public class ContentExtractorTest extends DomDistillerJsTestCase { |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
136 outerFontTag.appendChild(TestUtil.createText(" ")); | 136 outerFontTag.appendChild(TestUtil.createText(" ")); |
137 | 137 |
138 ContentExtractor extractor = new ContentExtractor(mRoot); | 138 ContentExtractor extractor = new ContentExtractor(mRoot); |
139 String extractedContent = extractor.extractContent(); | 139 String extractedContent = extractor.extractContent(); |
140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + | 140 assertEquals("<font><span><font>" + CONTENT_TEXT + "</font></span> " + |
141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + | 141 "<span><font>" + CONTENT_TEXT + "</font></span>\n" + |
142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", | 142 "<span><font>" + CONTENT_TEXT + "</font></span> </font>", |
143 TestUtil.removeAllDirAttributes(extractedContent)); | 143 TestUtil.removeAllDirAttributes(extractedContent)); |
144 } | 144 } |
145 | 145 |
| 146 public void testPreserveOrderedList() { |
| 147 Element outerListTag = Document.get().createElement("OL"); |
| 148 mBody.appendChild(outerListTag); |
| 149 |
| 150 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 151 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 152 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 153 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 154 |
| 155 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 156 String extractedContent = extractor.extractContent(); |
| 157 assertEquals("<ol>" + |
| 158 "<li>" + CONTENT_TEXT + "</li>" + |
| 159 "<li>" + CONTENT_TEXT + "</li>" + |
| 160 "<li>" + CONTENT_TEXT + "</li>" + |
| 161 "<li>" + CONTENT_TEXT + "</li>" + |
| 162 "</ol>", |
| 163 TestUtil.removeAllDirAttributes(extractedContent)); |
| 164 } |
| 165 |
| 166 public void testPreserveNestedOrderedList() { |
| 167 Element outerListTag = Document.get().createElement("OL"); |
| 168 Element outerListItem = Document.get().createElement("LI"); |
| 169 |
| 170 Element innerListTag = Document.get().createElement("OL"); |
| 171 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 172 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 173 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 174 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 175 |
| 176 outerListItem.appendChild(innerListTag); |
| 177 outerListTag.appendChild(outerListItem); |
| 178 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 179 |
| 180 mBody.appendChild(outerListTag); |
| 181 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 182 String extractedContent = extractor.extractContent(); |
| 183 assertEquals("<ol>" + |
| 184 "<li>" + |
| 185 "<ol>" + |
| 186 "<li>" + CONTENT_TEXT + "</li>" + |
| 187 "<li>" + CONTENT_TEXT + "</li>" + |
| 188 "<li>" + CONTENT_TEXT + "</li>" + |
| 189 "<li>" + CONTENT_TEXT + "</li>" + |
| 190 "</ol>" + |
| 191 "</li>" + |
| 192 "<li>" + CONTENT_TEXT + "</li>" + |
| 193 "</ol>", |
| 194 TestUtil.removeAllDirAttributes(extractedContent)); |
| 195 } |
| 196 |
| 197 public void testPreserveNestedOrderedListWithOtherElementsInside() { |
| 198 Element outerListTag = Document.get().createElement("OL"); |
| 199 Element outerListItem = Document.get().createElement("LI"); |
| 200 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); |
| 201 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| 202 |
| 203 Element innerListTag = Document.get().createElement("OL"); |
| 204 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 205 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 206 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 207 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 208 innerListTag.appendChild(TestUtil.createParagraph("")); |
| 209 |
| 210 outerListItem.appendChild(innerListTag); |
| 211 outerListTag.appendChild(outerListItem); |
| 212 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 213 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| 214 |
| 215 mBody.appendChild(outerListTag); |
| 216 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 217 String extractedContent = extractor.extractContent(); |
| 218 assertEquals("<ol>" + |
| 219 "<li>" + CONTENT_TEXT + |
| 220 "<p>" + CONTENT_TEXT + "</p>" + |
| 221 "<ol>" + |
| 222 "<li>" + CONTENT_TEXT + "</li>" + |
| 223 "<li>" + CONTENT_TEXT + "</li>" + |
| 224 "<li>" + CONTENT_TEXT + "</li>" + |
| 225 "<li>" + CONTENT_TEXT + "</li>" + |
| 226 "</ol>" + |
| 227 "</li>" + |
| 228 "<li>" + CONTENT_TEXT + "</li>" + |
| 229 "<p>" + CONTENT_TEXT + "</p>" + |
| 230 "</ol>", |
| 231 TestUtil.removeAllDirAttributes(extractedContent)); |
| 232 } |
| 233 |
| 234 public void testPreserveUnorderedList() { |
| 235 Element outerListTag = Document.get().createElement("UL"); |
| 236 mBody.appendChild(outerListTag); |
| 237 |
| 238 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 239 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 240 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 241 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 242 |
| 243 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 244 String extractedContent = extractor.extractContent(); |
| 245 assertEquals("<ul>" + |
| 246 "<li>" + CONTENT_TEXT + "</li>" + |
| 247 "<li>" + CONTENT_TEXT + "</li>" + |
| 248 "<li>" + CONTENT_TEXT + "</li>" + |
| 249 "<li>" + CONTENT_TEXT + "</li>" + |
| 250 "</ul>", |
| 251 TestUtil.removeAllDirAttributes(extractedContent)); |
| 252 } |
| 253 |
| 254 public void testPreserveNestedUnorderedList() { |
| 255 Element outerListTag = Document.get().createElement("UL"); |
| 256 Element outerListItem = Document.get().createElement("LI"); |
| 257 |
| 258 Element innerListTag = Document.get().createElement("UL"); |
| 259 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 260 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 261 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 262 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 263 |
| 264 outerListItem.appendChild(innerListTag); |
| 265 outerListTag.appendChild(outerListItem); |
| 266 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 267 |
| 268 mBody.appendChild(outerListTag); |
| 269 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 270 String extractedContent = extractor.extractContent(); |
| 271 assertEquals("<ul>" + |
| 272 "<li>" + |
| 273 "<ul>" + |
| 274 "<li>" + CONTENT_TEXT + "</li>" + |
| 275 "<li>" + CONTENT_TEXT + "</li>" + |
| 276 "<li>" + CONTENT_TEXT + "</li>" + |
| 277 "<li>" + CONTENT_TEXT + "</li>" + |
| 278 "</ul>" + |
| 279 "</li>" + |
| 280 "<li>" + CONTENT_TEXT + "</li>" + |
| 281 "</ul>", |
| 282 TestUtil.removeAllDirAttributes(extractedContent)); |
| 283 } |
| 284 |
| 285 public void testPreserveNestedUnorderedListWithOtherElementsInside() { |
| 286 Element outerListTag = Document.get().createElement("UL"); |
| 287 Element outerListItem = Document.get().createElement("LI"); |
| 288 outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); |
| 289 outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| 290 |
| 291 Element innerListTag = Document.get().createElement("UL"); |
| 292 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 293 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 294 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 295 innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 296 innerListTag.appendChild(TestUtil.createParagraph("")); |
| 297 |
| 298 outerListItem.appendChild(innerListTag); |
| 299 outerListTag.appendChild(outerListItem); |
| 300 outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 301 outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| 302 |
| 303 mBody.appendChild(outerListTag); |
| 304 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 305 String extractedContent = extractor.extractContent(); |
| 306 assertEquals("<ul>" + |
| 307 "<li>" + CONTENT_TEXT + |
| 308 "<p>" + CONTENT_TEXT + "</p>" + |
| 309 "<ul>" + |
| 310 "<li>" + CONTENT_TEXT + "</li>" + |
| 311 "<li>" + CONTENT_TEXT + "</li>" + |
| 312 "<li>" + CONTENT_TEXT + "</li>" + |
| 313 "<li>" + CONTENT_TEXT + "</li>" + |
| 314 "</ul>" + |
| 315 "</li>" + |
| 316 "<li>" + CONTENT_TEXT + "</li>" + |
| 317 "<p>" + CONTENT_TEXT + "</p>" + |
| 318 "</ul>", |
| 319 TestUtil.removeAllDirAttributes(extractedContent)); |
| 320 } |
| 321 |
| 322 public void testPreserveUnorderedListWithNestedOrderedList() { |
| 323 Element unorderedListTag = Document.get().createElement("UL"); |
| 324 Element li = Document.get().createElement("LI"); |
| 325 Element orderedList = Document.get().createElement("OL"); |
| 326 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 327 orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 328 li.appendChild(orderedList); |
| 329 unorderedListTag.appendChild(li); |
| 330 unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| 331 mBody.appendChild(unorderedListTag); |
| 332 ContentExtractor extractor = new ContentExtractor(mRoot); |
| 333 String extractedContent = extractor.extractContent(); |
| 334 assertEquals("<ul>" + |
| 335 "<li>" + |
| 336 "<ol>" + |
| 337 "<li>" + CONTENT_TEXT + "</li>" + |
| 338 "<li>" + CONTENT_TEXT + "</li>" + |
| 339 "</ol>" + |
| 340 "</li>" + |
| 341 "<li>" + CONTENT_TEXT + "</li>" + |
| 342 "</ul>", |
| 343 TestUtil.removeAllDirAttributes(extractedContent)); |
| 344 } |
| 345 |
146 private void assertExtractor(String expected, String html) { | 346 private void assertExtractor(String expected, String html) { |
147 mBody.setInnerHTML(""); | 347 mBody.setInnerHTML(""); |
148 Element div = TestUtil.createDiv(0); | 348 Element div = TestUtil.createDiv(0); |
149 mBody.appendChild(div); | 349 mBody.appendChild(div); |
150 | 350 |
151 div.setInnerHTML(html); | 351 div.setInnerHTML(html); |
152 ContentExtractor extractor = new ContentExtractor(mRoot); | 352 ContentExtractor extractor = new ContentExtractor(mRoot); |
153 String extractedContent = extractor.extractContent(); | 353 String extractedContent = extractor.extractContent(); |
154 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); | 354 assertEquals(expected, TestUtil.removeAllDirAttributes(extractedContent)
); |
155 } | 355 } |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
232 | 432 |
233 final String htmlArticle = | 433 final String htmlArticle = |
234 "<h1>" + CONTENT_TEXT + "</h1>" + | 434 "<h1>" + CONTENT_TEXT + "</h1>" + |
235 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; | 435 "<div itemscope itemtype=\"http://schema.org/Movie\">" + article + "
</div>"; |
236 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; | 436 final String expected = "<h1>" + CONTENT_TEXT + "</h1>" + article; |
237 | 437 |
238 // Non-article schema.org types should not use the fast path. | 438 // Non-article schema.org types should not use the fast path. |
239 assertExtractor(expected, htmlArticle); | 439 assertExtractor(expected, htmlArticle); |
240 } | 440 } |
241 } | 441 } |
OLD | NEW |