Chromium Code Reviews| Index: test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
| diff --git a/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e981138a8dd3ac7972e799c87b154c57698bbe71 |
| --- /dev/null |
| +++ b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
| @@ -0,0 +1,203 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +package com.dom_distiller.client; |
| + |
| +import com.dom_distiller.client.sax.AttributesImpl; |
| +import com.dom_distiller.client.sax.ContentHandler; |
| +import com.google.gwt.dom.client.Document; |
| +import com.google.gwt.dom.client.Element; |
| +import com.google.gwt.junit.client.GWTTestCase; |
| + |
| +import java.util.List; |
| + |
| +import de.l3s.boilerpipe.document.TextBlock; |
| +import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler; |
| + |
| +public class BoilerpipeHTMLContentHandlerTest extends GWTTestCase { |
| + |
| + private static final String TEXT1 = "Some really long text which should be content."; |
| + private static final String TEXT2 = "Another really long text thing which should be content."; |
| + private static final String TEXT3 = "And again a third long text for testing."; |
| + |
| + public String getModuleName() { |
| + return "com.dom_distiller.DomDistillerJUnit"; |
| + } |
| + |
|
Yaron
2014/05/29 01:09:10
Do you want to add tests for the default actions f
nyquist
2014/05/29 23:42:25
It seems like GWT doesn't handle the defaults corr
|
| + public void testSpansAsInline() { |
| + BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
| + handler.startDocument(); |
| + Element body = Document.get().createElement("body"); |
| + startElement(handler, body); |
| + |
| + // <span> |
| + // TEXT1 |
| + // <span> |
| + // TEXT2 |
| + // </span> |
| + // TEXT3 |
| + // </span> |
| + Element outerSpan = Document.get().createElement("span"); |
| + startElement(handler, outerSpan); |
| + addText(handler, TEXT1); |
| + Element innerSpan = Document.get().createElement("span"); |
| + startElement(handler, innerSpan); |
| + addText(handler, TEXT2); |
| + endElement(handler, innerSpan); |
| + addText(handler, TEXT3); |
| + endElement(handler, outerSpan); |
| + |
| + endElement(handler, body); |
| + handler.endDocument(); |
| + |
| + List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
| + assertEquals(1, textBlocks.size()); |
| + assertEquals(1, textBlocks.get(0).getTagLevel()); |
| + } |
| + |
| + public void testDivsAsInline() { |
| + BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
| + handler.startDocument(); |
| + Element body = Document.get().createElement("body"); |
| + startElement(handler, body); |
| + |
| + // <span> |
| + // TEXT1 |
| + // <div style="display: inline;"> |
| + // TEXT2 |
| + // </div> |
| + // TEXT3 |
| + // </span> |
| + Element span = Document.get().createElement("span"); |
| + startElement(handler, span); |
| + addText(handler, TEXT1); |
| + Element div = Document.get().createDivElement(); |
| + div.setAttribute("style", "display: inline;"); |
| + startElement(handler, div); |
| + addText(handler, TEXT2); |
| + endElement(handler, div); |
| + addText(handler, TEXT3); |
| + endElement(handler, span); |
| + |
| + endElement(handler, body); |
| + handler.endDocument(); |
| + |
| + List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
| + assertEquals(1, textBlocks.size()); |
| + assertEquals(1, textBlocks.get(0).getTagLevel()); |
| + } |
| + |
| + public void testDivsAsBlocks() { |
| + BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
| + handler.startDocument(); |
| + Element body = Document.get().createElement("body"); |
| + startElement(handler, body); |
| + |
| + // <div> |
| + // TEXT1 |
| + // <div> |
| + // TEXT2 |
| + // </div> |
| + // TEXT3 |
| + // </div> |
| + Element div = Document.get().createDivElement(); |
| + startElement(handler, div); |
| + addText(handler, TEXT1); |
| + Element span = Document.get().createDivElement(); |
| + startElement(handler, span); |
| + addText(handler, TEXT2); |
| + endElement(handler, span); |
| + addText(handler, TEXT3); |
| + endElement(handler, div); |
| + |
| + endElement(handler, body); |
| + handler.endDocument(); |
| + |
| + List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
| + assertEquals(3, textBlocks.size()); |
| + assertEquals(2, textBlocks.get(0).getTagLevel()); |
| + assertEquals(3, textBlocks.get(1).getTagLevel()); |
| + assertEquals(2, textBlocks.get(2).getTagLevel()); |
| + } |
| + |
| + public void testSpansAsBlocks() { |
| + BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
| + handler.startDocument(); |
| + Element body = Document.get().createElement("body"); |
| + startElement(handler, body); |
| + |
| + // <div> |
| + // TEXT1 |
| + // <span style="display: block;"> |
| + // TEXT2 |
| + // </span> |
| + // TEXT3 |
| + // </div> |
| + Element div = Document.get().createDivElement(); |
| + startElement(handler, div); |
| + addText(handler, TEXT1); |
| + Element span = Document.get().createElement("span"); |
| + span.setAttribute("style", "display: block;"); |
| + startElement(handler, span); |
| + addText(handler, TEXT2); |
| + endElement(handler, span); |
| + addText(handler, TEXT3); |
| + endElement(handler, div); |
| + |
| + endElement(handler, body); |
| + handler.endDocument(); |
| + |
| + List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
| + assertEquals(3, textBlocks.size()); |
| + assertEquals(2, textBlocks.get(0).getTagLevel()); |
| + assertEquals(3, textBlocks.get(1).getTagLevel()); |
| + assertEquals(2, textBlocks.get(2).getTagLevel()); |
| + } |
| + |
| + public void testHeadingsAsBlocks() { |
| + BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
| + handler.startDocument(); |
| + Element body = Document.get().createElement("body"); |
| + startElement(handler, body); |
| + |
| + // <div> |
| + // TEXT1 |
| + // <h1> |
| + // TEXT2 |
| + // </h1> |
| + // TEXT3 |
| + // </div> |
| + Element div = Document.get().createDivElement(); |
| + startElement(handler, div); |
| + addText(handler, TEXT1); |
| + Element h1 = Document.get().createElement("h1"); |
| + h1.setAttribute("style", "display: block;"); |
| + startElement(handler, h1); |
| + addText(handler, TEXT2); |
| + endElement(handler, h1); |
| + addText(handler, TEXT3); |
| + endElement(handler, div); |
| + |
| + endElement(handler, body); |
| + handler.endDocument(); |
| + |
| + List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
| + assertEquals(3, textBlocks.size()); |
| + assertEquals(2, textBlocks.get(0).getTagLevel()); |
| + assertEquals(3, textBlocks.get(1).getTagLevel()); |
| + assertEquals(2, textBlocks.get(2).getTagLevel()); |
| + } |
| + |
| + private void startElement(ContentHandler handler, Element e) { |
| + handler.startElement(null, e.getTagName(), e.getTagName(), e, new AttributesImpl()); |
| + } |
| + |
| + private void addText(ContentHandler handler, String text) { |
| + handler.characters(text.toCharArray(), 0, text.length()); |
| + } |
| + |
| + private void endElement(ContentHandler handler, Element e) { |
| + handler.endElement(null, e.getTagName(), e.getTagName(), e); |
| + } |
| +} |