Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Unified Diff: test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java

Issue 296113004: Start using computed style instead of default tag actions. (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: Added tests Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java
diff --git a/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..e981138a8dd3ac7972e799c87b154c57698bbe71
--- /dev/null
+++ b/test/com/dom_distiller/client/BoilerpipeHTMLContentHandlerTest.java
@@ -0,0 +1,203 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package com.dom_distiller.client;
+
+import com.dom_distiller.client.sax.AttributesImpl;
+import com.dom_distiller.client.sax.ContentHandler;
+import com.google.gwt.dom.client.Document;
+import com.google.gwt.dom.client.Element;
+import com.google.gwt.junit.client.GWTTestCase;
+
+import java.util.List;
+
+import de.l3s.boilerpipe.document.TextBlock;
+import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler;
+
+public class BoilerpipeHTMLContentHandlerTest extends GWTTestCase {
+
+ private static final String TEXT1 = "Some really long text which should be content.";
+ private static final String TEXT2 = "Another really long text thing which should be content.";
+ private static final String TEXT3 = "And again a third long text for testing.";
+
+ public String getModuleName() {
+ return "com.dom_distiller.DomDistillerJUnit";
+ }
+
Yaron 2014/05/29 01:09:10 Do you want to add tests for the default actions f
nyquist 2014/05/29 23:42:25 It seems like GWT doesn't handle the defaults corr
+ public void testSpansAsInline() {
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler();
+ handler.startDocument();
+ Element body = Document.get().createElement("body");
+ startElement(handler, body);
+
+ // <span>
+ // TEXT1
+ // <span>
+ // TEXT2
+ // </span>
+ // TEXT3
+ // </span>
+ Element outerSpan = Document.get().createElement("span");
+ startElement(handler, outerSpan);
+ addText(handler, TEXT1);
+ Element innerSpan = Document.get().createElement("span");
+ startElement(handler, innerSpan);
+ addText(handler, TEXT2);
+ endElement(handler, innerSpan);
+ addText(handler, TEXT3);
+ endElement(handler, outerSpan);
+
+ endElement(handler, body);
+ handler.endDocument();
+
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks();
+ assertEquals(1, textBlocks.size());
+ assertEquals(1, textBlocks.get(0).getTagLevel());
+ }
+
+ public void testDivsAsInline() {
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler();
+ handler.startDocument();
+ Element body = Document.get().createElement("body");
+ startElement(handler, body);
+
+ // <span>
+ // TEXT1
+ // <div style="display: inline;">
+ // TEXT2
+ // </div>
+ // TEXT3
+ // </span>
+ Element span = Document.get().createElement("span");
+ startElement(handler, span);
+ addText(handler, TEXT1);
+ Element div = Document.get().createDivElement();
+ div.setAttribute("style", "display: inline;");
+ startElement(handler, div);
+ addText(handler, TEXT2);
+ endElement(handler, div);
+ addText(handler, TEXT3);
+ endElement(handler, span);
+
+ endElement(handler, body);
+ handler.endDocument();
+
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks();
+ assertEquals(1, textBlocks.size());
+ assertEquals(1, textBlocks.get(0).getTagLevel());
+ }
+
+ public void testDivsAsBlocks() {
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler();
+ handler.startDocument();
+ Element body = Document.get().createElement("body");
+ startElement(handler, body);
+
+ // <div>
+ // TEXT1
+ // <div>
+ // TEXT2
+ // </div>
+ // TEXT3
+ // </div>
+ Element div = Document.get().createDivElement();
+ startElement(handler, div);
+ addText(handler, TEXT1);
+ Element span = Document.get().createDivElement();
+ startElement(handler, span);
+ addText(handler, TEXT2);
+ endElement(handler, span);
+ addText(handler, TEXT3);
+ endElement(handler, div);
+
+ endElement(handler, body);
+ handler.endDocument();
+
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks();
+ assertEquals(3, textBlocks.size());
+ assertEquals(2, textBlocks.get(0).getTagLevel());
+ assertEquals(3, textBlocks.get(1).getTagLevel());
+ assertEquals(2, textBlocks.get(2).getTagLevel());
+ }
+
+ public void testSpansAsBlocks() {
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler();
+ handler.startDocument();
+ Element body = Document.get().createElement("body");
+ startElement(handler, body);
+
+ // <div>
+ // TEXT1
+ // <span style="display: block;">
+ // TEXT2
+ // </span>
+ // TEXT3
+ // </div>
+ Element div = Document.get().createDivElement();
+ startElement(handler, div);
+ addText(handler, TEXT1);
+ Element span = Document.get().createElement("span");
+ span.setAttribute("style", "display: block;");
+ startElement(handler, span);
+ addText(handler, TEXT2);
+ endElement(handler, span);
+ addText(handler, TEXT3);
+ endElement(handler, div);
+
+ endElement(handler, body);
+ handler.endDocument();
+
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks();
+ assertEquals(3, textBlocks.size());
+ assertEquals(2, textBlocks.get(0).getTagLevel());
+ assertEquals(3, textBlocks.get(1).getTagLevel());
+ assertEquals(2, textBlocks.get(2).getTagLevel());
+ }
+
+ public void testHeadingsAsBlocks() {
+ BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler();
+ handler.startDocument();
+ Element body = Document.get().createElement("body");
+ startElement(handler, body);
+
+ // <div>
+ // TEXT1
+ // <h1>
+ // TEXT2
+ // </h1>
+ // TEXT3
+ // </div>
+ Element div = Document.get().createDivElement();
+ startElement(handler, div);
+ addText(handler, TEXT1);
+ Element h1 = Document.get().createElement("h1");
+ h1.setAttribute("style", "display: block;");
+ startElement(handler, h1);
+ addText(handler, TEXT2);
+ endElement(handler, h1);
+ addText(handler, TEXT3);
+ endElement(handler, div);
+
+ endElement(handler, body);
+ handler.endDocument();
+
+ List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks();
+ assertEquals(3, textBlocks.size());
+ assertEquals(2, textBlocks.get(0).getTagLevel());
+ assertEquals(3, textBlocks.get(1).getTagLevel());
+ assertEquals(2, textBlocks.get(2).getTagLevel());
+ }
+
+ private void startElement(ContentHandler handler, Element e) {
+ handler.startElement(null, e.getTagName(), e.getTagName(), e, new AttributesImpl());
+ }
+
+ private void addText(ContentHandler handler, String text) {
+ handler.characters(text.toCharArray(), 0, text.length());
+ }
+
+ private void endElement(ContentHandler handler, Element e) {
+ handler.endElement(null, e.getTagName(), e.getTagName(), e);
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698