| Index: javatests/org/chromium/distiller/ContentExtractorTest.java
 | 
| diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
 | 
| index 63d349c923a4c341670a5bd85a4632cab8903c8f..87f502fbe9033e62a3c2610b437fa568d2c21aad 100644
 | 
| --- a/javatests/org/chromium/distiller/ContentExtractorTest.java
 | 
| +++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
 | 
| @@ -143,6 +143,267 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
 | 
|                  TestUtil.removeAllDirAttributes(extractedContent));
 | 
|      }
 | 
|  
 | 
| +    public void testPreserveOrderedList() {
 | 
| +        Element outerListTag = Document.get().createElement("OL");
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</OL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveNestedOrderedList() {
 | 
| +        Element outerListTag = Document.get().createElement("OL");
 | 
| +        Element outerListItem = Document.get().createElement("LI");
 | 
| +
 | 
| +        Element innerListTag = Document.get().createElement("OL");
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        outerListItem.appendChild(innerListTag);
 | 
| +        outerListTag.appendChild(outerListItem);
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<LI>" +
 | 
| +                          "<OL>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "</OL>" +
 | 
| +                        "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</OL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveNestedOrderedListWithOtherElementsInside() {
 | 
| +        Element outerListTag = Document.get().createElement("OL");
 | 
| +        Element outerListItem = Document.get().createElement("LI");
 | 
| +        outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
 | 
| +        outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
 | 
| +
 | 
| +        Element innerListTag = Document.get().createElement("OL");
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createParagraph(""));
 | 
| +
 | 
| +        outerListItem.appendChild(innerListTag);
 | 
| +        outerListTag.appendChild(outerListItem);
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
 | 
| +
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT +
 | 
| +                          "<p>" + CONTENT_TEXT + "</p>" +
 | 
| +                          "<OL>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "</OL>" +
 | 
| +                        "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<p>" + CONTENT_TEXT + "</p>" +
 | 
| +                     "</OL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveUnorderedList() {
 | 
| +        Element outerListTag = Document.get().createElement("UL");
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<UL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</UL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveNestedUnorderedList() {
 | 
| +        Element outerListTag = Document.get().createElement("UL");
 | 
| +        Element outerListItem = Document.get().createElement("LI");
 | 
| +
 | 
| +        Element innerListTag = Document.get().createElement("UL");
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        outerListItem.appendChild(innerListTag);
 | 
| +        outerListTag.appendChild(outerListItem);
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<UL>" +
 | 
| +                        "<LI>" +
 | 
| +                          "<UL>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "</UL>" +
 | 
| +                        "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</UL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveNestedUnorderedListWithOtherElementsInside() {
 | 
| +        Element outerListTag = Document.get().createElement("UL");
 | 
| +        Element outerListItem = Document.get().createElement("LI");
 | 
| +        outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
 | 
| +        outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
 | 
| +
 | 
| +        Element innerListTag = Document.get().createElement("UL");
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        innerListTag.appendChild(TestUtil.createParagraph(""));
 | 
| +
 | 
| +        outerListItem.appendChild(innerListTag);
 | 
| +        outerListTag.appendChild(outerListItem);
 | 
| +        outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
 | 
| +
 | 
| +        mBody.appendChild(outerListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<UL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT +
 | 
| +                          "<p>" + CONTENT_TEXT + "</p>" +
 | 
| +                          "<UL>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "</UL>" +
 | 
| +                        "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<p>" + CONTENT_TEXT + "</p>" +
 | 
| +                     "</UL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testPreserveUnorderedListWithNestedOrderedList() {
 | 
| +        Element unorderedListTag = Document.get().createElement("UL");
 | 
| +        Element li = Document.get().createElement("LI");
 | 
| +        Element orderedList = Document.get().createElement("OL");
 | 
| +        orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        li.appendChild(orderedList);
 | 
| +        unorderedListTag.appendChild(li);
 | 
| +        unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
 | 
| +        mBody.appendChild(unorderedListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<UL>" +
 | 
| +                        "<LI>" +
 | 
| +                          "<OL>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                            "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "</OL>" +
 | 
| +                        "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</UL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testMalformedListStructureWithExtraLITagEnd() {
 | 
| +        Element unorderedListTag = Document.get().createElement("UL");
 | 
| +        String html = "<LI>" +  CONTENT_TEXT + "</LI></LI><LI>" + CONTENT_TEXT + "</LI>";
 | 
| +        unorderedListTag.setInnerHTML(html);
 | 
| +        mBody.appendChild(unorderedListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<UL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</UL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testMalformedListStructureWithExtraLITagStart() {
 | 
| +        Element unorderedListTag = Document.get().createElement("OL");
 | 
| +        String html = "<LI><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>";
 | 
| +        unorderedListTag.setInnerHTML(html);
 | 
| +        mBody.appendChild(unorderedListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</OL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testMalformedListStructureWithExtraOLTagStart() {
 | 
| +        Element unorderedListTag = Document.get().createElement("OL");
 | 
| +        String html = "<OL><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>";
 | 
| +        unorderedListTag.setInnerHTML(html);
 | 
| +        mBody.appendChild(unorderedListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<OL>" +
 | 
| +                          "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                          "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                        "</OL>" +
 | 
| +                     "</OL>",
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
| +    public void testMalformedListStructureWithoutLITag(){
 | 
| +        Element orderedListTag = Document.get().createElement("OL");
 | 
| +        String html = "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                       CONTENT_TEXT +
 | 
| +                      "<LI>" + CONTENT_TEXT + "</LI>";
 | 
| +        orderedListTag.setInnerHTML(html);
 | 
| +        mBody.appendChild(orderedListTag);
 | 
| +        ContentExtractor extractor = new ContentExtractor(mRoot);
 | 
| +        String extractedContent = extractor.extractContent();
 | 
| +        assertEquals("<OL>" +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                         CONTENT_TEXT +
 | 
| +                        "<LI>" + CONTENT_TEXT + "</LI>" +
 | 
| +                     "</OL>" ,
 | 
| +                TestUtil.removeAllDirAttributes(extractedContent));
 | 
| +    }
 | 
| +
 | 
|      private void assertExtractor(String expected, String html) {
 | 
|          mBody.setInnerHTML("");
 | 
|          Element div = TestUtil.createDiv(0);
 | 
| 
 |