Index: test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
diff --git a/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
new file mode 100644 |
index 0000000000000000000000000000000000000000..e981138a8dd3ac7972e799c87b154c57698bbe71 |
--- /dev/null |
+++ b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java |
@@ -0,0 +1,203 @@ |
+// Copyright 2014 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+package com.dom_distiller.client; |
+ |
+import com.dom_distiller.client.sax.AttributesImpl; |
+import com.dom_distiller.client.sax.ContentHandler; |
+import com.google.gwt.dom.client.Document; |
+import com.google.gwt.dom.client.Element; |
+import com.google.gwt.junit.client.GWTTestCase; |
+ |
+import java.util.List; |
+ |
+import de.l3s.boilerpipe.document.TextBlock; |
+import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler; |
+ |
+public class BoilerpipeHTMLContentHandlerTest extends GWTTestCase { |
+ |
+ private static final String TEXT1 = "Some really long text which should be content."; |
+ private static final String TEXT2 = "Another really long text thing which should be content."; |
+ private static final String TEXT3 = "And again a third long text for testing."; |
+ |
+ public String getModuleName() { |
+ return "com.dom_distiller.DomDistillerJUnit"; |
+ } |
+ |
Yaron
2014/05/29 01:09:10
Do you want to add tests for the default actions f
nyquist
2014/05/29 23:42:25
It seems like GWT doesn't handle the defaults corr
|
+ public void testSpansAsInline() { |
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
+ handler.startDocument(); |
+ Element body = Document.get().createElement("body"); |
+ startElement(handler, body); |
+ |
+ // <span> |
+ // TEXT1 |
+ // <span> |
+ // TEXT2 |
+ // </span> |
+ // TEXT3 |
+ // </span> |
+ Element outerSpan = Document.get().createElement("span"); |
+ startElement(handler, outerSpan); |
+ addText(handler, TEXT1); |
+ Element innerSpan = Document.get().createElement("span"); |
+ startElement(handler, innerSpan); |
+ addText(handler, TEXT2); |
+ endElement(handler, innerSpan); |
+ addText(handler, TEXT3); |
+ endElement(handler, outerSpan); |
+ |
+ endElement(handler, body); |
+ handler.endDocument(); |
+ |
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
+ assertEquals(1, textBlocks.size()); |
+ assertEquals(1, textBlocks.get(0).getTagLevel()); |
+ } |
+ |
+ public void testDivsAsInline() { |
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
+ handler.startDocument(); |
+ Element body = Document.get().createElement("body"); |
+ startElement(handler, body); |
+ |
+ // <span> |
+ // TEXT1 |
+ // <div style="display: inline;"> |
+ // TEXT2 |
+ // </div> |
+ // TEXT3 |
+ // </span> |
+ Element span = Document.get().createElement("span"); |
+ startElement(handler, span); |
+ addText(handler, TEXT1); |
+ Element div = Document.get().createDivElement(); |
+ div.setAttribute("style", "display: inline;"); |
+ startElement(handler, div); |
+ addText(handler, TEXT2); |
+ endElement(handler, div); |
+ addText(handler, TEXT3); |
+ endElement(handler, span); |
+ |
+ endElement(handler, body); |
+ handler.endDocument(); |
+ |
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
+ assertEquals(1, textBlocks.size()); |
+ assertEquals(1, textBlocks.get(0).getTagLevel()); |
+ } |
+ |
+ public void testDivsAsBlocks() { |
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
+ handler.startDocument(); |
+ Element body = Document.get().createElement("body"); |
+ startElement(handler, body); |
+ |
+ // <div> |
+ // TEXT1 |
+ // <div> |
+ // TEXT2 |
+ // </div> |
+ // TEXT3 |
+ // </div> |
+ Element div = Document.get().createDivElement(); |
+ startElement(handler, div); |
+ addText(handler, TEXT1); |
+ Element span = Document.get().createDivElement(); |
+ startElement(handler, span); |
+ addText(handler, TEXT2); |
+ endElement(handler, span); |
+ addText(handler, TEXT3); |
+ endElement(handler, div); |
+ |
+ endElement(handler, body); |
+ handler.endDocument(); |
+ |
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
+ assertEquals(3, textBlocks.size()); |
+ assertEquals(2, textBlocks.get(0).getTagLevel()); |
+ assertEquals(3, textBlocks.get(1).getTagLevel()); |
+ assertEquals(2, textBlocks.get(2).getTagLevel()); |
+ } |
+ |
+ public void testSpansAsBlocks() { |
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
+ handler.startDocument(); |
+ Element body = Document.get().createElement("body"); |
+ startElement(handler, body); |
+ |
+ // <div> |
+ // TEXT1 |
+ // <span style="display: block;"> |
+ // TEXT2 |
+ // </span> |
+ // TEXT3 |
+ // </div> |
+ Element div = Document.get().createDivElement(); |
+ startElement(handler, div); |
+ addText(handler, TEXT1); |
+ Element span = Document.get().createElement("span"); |
+ span.setAttribute("style", "display: block;"); |
+ startElement(handler, span); |
+ addText(handler, TEXT2); |
+ endElement(handler, span); |
+ addText(handler, TEXT3); |
+ endElement(handler, div); |
+ |
+ endElement(handler, body); |
+ handler.endDocument(); |
+ |
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
+ assertEquals(3, textBlocks.size()); |
+ assertEquals(2, textBlocks.get(0).getTagLevel()); |
+ assertEquals(3, textBlocks.get(1).getTagLevel()); |
+ assertEquals(2, textBlocks.get(2).getTagLevel()); |
+ } |
+ |
+ public void testHeadingsAsBlocks() { |
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler(); |
+ handler.startDocument(); |
+ Element body = Document.get().createElement("body"); |
+ startElement(handler, body); |
+ |
+ // <div> |
+ // TEXT1 |
+ // <h1> |
+ // TEXT2 |
+ // </h1> |
+ // TEXT3 |
+ // </div> |
+ Element div = Document.get().createDivElement(); |
+ startElement(handler, div); |
+ addText(handler, TEXT1); |
+ Element h1 = Document.get().createElement("h1"); |
+ h1.setAttribute("style", "display: block;"); |
+ startElement(handler, h1); |
+ addText(handler, TEXT2); |
+ endElement(handler, h1); |
+ addText(handler, TEXT3); |
+ endElement(handler, div); |
+ |
+ endElement(handler, body); |
+ handler.endDocument(); |
+ |
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); |
+ assertEquals(3, textBlocks.size()); |
+ assertEquals(2, textBlocks.get(0).getTagLevel()); |
+ assertEquals(3, textBlocks.get(1).getTagLevel()); |
+ assertEquals(2, textBlocks.get(2).getTagLevel()); |
+ } |
+ |
+ private void startElement(ContentHandler handler, Element e) { |
+ handler.startElement(null, e.getTagName(), e.getTagName(), e, new AttributesImpl()); |
+ } |
+ |
+ private void addText(ContentHandler handler, String text) { |
+ handler.characters(text.toCharArray(), 0, text.length()); |
+ } |
+ |
+ private void endElement(ContentHandler handler, Element e) { |
+ handler.endElement(null, e.getTagName(), e.getTagName(), e); |
+ } |
+} |