Chromium Code Reviews| Index: javatests/org/chromium/distiller/ContentExtractorTest.java |
| diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java |
| index 63d349c923a4c341670a5bd85a4632cab8903c8f..1e161b8ea60b83cf7894a5e8722f086ad03389ed 100644 |
| --- a/javatests/org/chromium/distiller/ContentExtractorTest.java |
| +++ b/javatests/org/chromium/distiller/ContentExtractorTest.java |
| @@ -143,6 +143,250 @@ public class ContentExtractorTest extends DomDistillerJsTestCase { |
| TestUtil.removeAllDirAttributes(extractedContent)); |
| } |
| + public void testPreserveOrderedList() { |
| + Element outerListTag = Document.get().createElement("OL"); |
| + mBody.appendChild(outerListTag); |
| + |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveNestedOrderedList() { |
| + Element outerListTag = Document.get().createElement("OL"); |
| + Element outerListItem = Document.get().createElement("LI"); |
| + |
| + Element innerListTag = Document.get().createElement("OL"); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + outerListItem.appendChild(innerListTag); |
| + outerListTag.appendChild(outerListItem); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + mBody.appendChild(outerListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<OL>" + |
| + "<LI>" + |
| + "<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>" + |
| + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveNestedOrderedListWithOtherElementsInside() { |
| + Element outerListTag = Document.get().createElement("OL"); |
| + Element outerListItem = Document.get().createElement("LI"); |
| + outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); |
| + outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| + |
| + Element innerListTag = Document.get().createElement("OL"); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createParagraph("")); |
| + |
| + outerListItem.appendChild(innerListTag); |
| + outerListTag.appendChild(outerListItem); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| + |
| + mBody.appendChild(outerListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<OL>" + |
| + "<LI>" + CONTENT_TEXT + |
| + "<p>" + CONTENT_TEXT + "</p>" + |
| + "<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>" + |
| + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<p>" + CONTENT_TEXT + "</p>" + |
| + "</OL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveUnorderedList() { |
| + Element outerListTag = Document.get().createElement("UL"); |
| + mBody.appendChild(outerListTag); |
| + |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<UL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveNestedUnorderedList() { |
| + Element outerListTag = Document.get().createElement("UL"); |
| + Element outerListItem = Document.get().createElement("LI"); |
| + |
| + Element innerListTag = Document.get().createElement("UL"); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + outerListItem.appendChild(innerListTag); |
| + outerListTag.appendChild(outerListItem); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + |
| + mBody.appendChild(outerListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<UL>" + |
| + "<LI>" + |
| + "<UL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>" + |
| + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveNestedUnorderedListWithOtherElementsInside() { |
| + Element outerListTag = Document.get().createElement("UL"); |
| + Element outerListItem = Document.get().createElement("LI"); |
| + outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT)); |
| + outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| + |
| + Element innerListTag = Document.get().createElement("UL"); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + innerListTag.appendChild(TestUtil.createParagraph("")); |
| + |
| + outerListItem.appendChild(innerListTag); |
| + outerListTag.appendChild(outerListItem); |
| + outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT)); |
| + |
| + mBody.appendChild(outerListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<UL>" + |
| + "<LI>" + CONTENT_TEXT + |
| + "<p>" + CONTENT_TEXT + "</p>" + |
| + "<UL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>" + |
| + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<p>" + CONTENT_TEXT + "</p>" + |
| + "</UL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testPreserveUnorderedListWithNestedOrderedList() { |
| + Element unorderedListTag = Document.get().createElement("UL"); |
| + Element li = Document.get().createElement("LI"); |
| + Element orderedList = Document.get().createElement("OL"); |
| + orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + li.appendChild(orderedList); |
| + unorderedListTag.appendChild(li); |
| + unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT)); |
| + mBody.appendChild(unorderedListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<UL>" + |
| + "<LI>" + |
| + "<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>" + |
| + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testMalFormedListStructureWithExtraLITagEnd() { |
|
wychen
2015/08/04 02:37:01
nit: Malformed is a word, so camel case should fol
|
| + Element unorderedListTag = Document.get().createElement("UL"); |
| + String html = "<LI>" + CONTENT_TEXT + "</LI></LI><LI>" + CONTENT_TEXT + "</LI>"; |
| + unorderedListTag.setInnerHTML(html); |
| + mBody.appendChild(unorderedListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<UL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</UL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testMalFormedListStructureWithExtraLITagStart() { |
| + Element unorderedListTag = Document.get().createElement("OL"); |
| + String html = "<LI><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>"; |
| + unorderedListTag.setInnerHTML(html); |
| + mBody.appendChild(unorderedListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| + public void testMalFormedListStructureWithExtraOLTagStart() { |
| + Element unorderedListTag = Document.get().createElement("OL"); |
| + String html = "<OL><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>"; |
| + unorderedListTag.setInnerHTML(html); |
| + mBody.appendChild(unorderedListTag); |
| + ContentExtractor extractor = new ContentExtractor(mRoot); |
| + String extractedContent = extractor.extractContent(); |
| + assertEquals("<OL>" + |
| + "<OL>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "<LI>" + CONTENT_TEXT + "</LI>" + |
| + "</OL>" + |
| + "</OL>", |
| + TestUtil.removeAllDirAttributes(extractedContent)); |
| + } |
| + |
| private void assertExtractor(String expected, String html) { |
| mBody.setInnerHTML(""); |
| Element div = TestUtil.createDiv(0); |