OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 package com.dom_distiller.client; |
| 6 |
| 7 import com.dom_distiller.client.sax.AttributesImpl; |
| 8 import com.google.gwt.dom.client.Document; |
| 9 import com.google.gwt.dom.client.Element; |
| 10 import com.google.gwt.junit.client.GWTTestCase; |
| 11 |
| 12 import java.util.List; |
| 13 |
| 14 import de.l3s.boilerpipe.document.TextBlock; |
| 15 import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler; |
| 16 |
| 17 public class BoilerpipeHTMLContentHandlerTest extends GWTTestCase { |
| 18 |
| 19 private static final String TEXT1 = "Some really long text which should be c
ontent."; |
| 20 private static final String TEXT2 = "Another really long text thing which sh
ould be content."; |
| 21 private static final String TEXT3 = "And again a third long text for testing
."; |
| 22 private BoilerpipeHTMLContentHandler mHandler; |
| 23 private Element mBody; |
| 24 |
| 25 public String getModuleName() { |
| 26 return "com.dom_distiller.DomDistillerJUnit"; |
| 27 } |
| 28 |
| 29 @Override |
| 30 protected void gwtSetUp() throws Exception { |
| 31 super.gwtSetUp(); |
| 32 mHandler = new BoilerpipeHTMLContentHandler(); |
| 33 mHandler.startDocument(); |
| 34 mBody = Document.get().createElement("body"); |
| 35 startElement(mBody); |
| 36 } |
| 37 |
| 38 public void testSpansAsInline() { |
| 39 // <span> |
| 40 // TEXT1 |
| 41 // <span> |
| 42 // TEXT2 |
| 43 // </span> |
| 44 // TEXT3 |
| 45 // </span> |
| 46 Element outerSpan = Document.get().createSpanElement(); |
| 47 startElement(outerSpan); |
| 48 addText(TEXT1); |
| 49 Element innerSpan = Document.get().createSpanElement(); |
| 50 startElement(innerSpan); |
| 51 addText(TEXT2); |
| 52 endElement(innerSpan); |
| 53 addText(TEXT3); |
| 54 endElement(outerSpan); |
| 55 |
| 56 endBodyAndDocument(); |
| 57 |
| 58 assertInline(); |
| 59 } |
| 60 |
| 61 public void testDivsAsInline() { |
| 62 // <span> |
| 63 // TEXT1 |
| 64 // <div style="display: inline;"> |
| 65 // TEXT2 |
| 66 // </div> |
| 67 // TEXT3 |
| 68 // </span> |
| 69 Element span = Document.get().createSpanElement(); |
| 70 startElement(span); |
| 71 addText(TEXT1); |
| 72 Element div = Document.get().createDivElement(); |
| 73 div.setAttribute("style", "display: inline;"); |
| 74 startElement(div); |
| 75 addText(TEXT2); |
| 76 endElement(div); |
| 77 addText(TEXT3); |
| 78 endElement(span); |
| 79 |
| 80 endBodyAndDocument(); |
| 81 |
| 82 assertInline(); |
| 83 } |
| 84 |
| 85 public void testDivsAsBlocks() { |
| 86 // <div> |
| 87 // TEXT1 |
| 88 // <div> |
| 89 // TEXT2 |
| 90 // </div> |
| 91 // TEXT3 |
| 92 // </div> |
| 93 Element div = Document.get().createDivElement(); |
| 94 startElement(div); |
| 95 addText(TEXT1); |
| 96 Element span = Document.get().createDivElement(); |
| 97 startElement(span); |
| 98 addText(TEXT2); |
| 99 endElement(span); |
| 100 addText(TEXT3); |
| 101 endElement(div); |
| 102 |
| 103 endBodyAndDocument(); |
| 104 |
| 105 assertBlock(); |
| 106 } |
| 107 |
| 108 public void testSpansAsBlocks() { |
| 109 // <div> |
| 110 // TEXT1 |
| 111 // <span style="display: block;"> |
| 112 // TEXT2 |
| 113 // </span> |
| 114 // TEXT3 |
| 115 // </div> |
| 116 Element div = Document.get().createDivElement(); |
| 117 startElement(div); |
| 118 addText(TEXT1); |
| 119 Element span = Document.get().createSpanElement(); |
| 120 span.setAttribute("style", "display: block;"); |
| 121 startElement(span); |
| 122 addText(TEXT2); |
| 123 endElement(span); |
| 124 addText(TEXT3); |
| 125 endElement(div); |
| 126 |
| 127 endBodyAndDocument(); |
| 128 |
| 129 assertBlock(); |
| 130 } |
| 131 |
| 132 public void testHeadingsAsBlocks() { |
| 133 // <div> |
| 134 // TEXT1 |
| 135 // <h1> |
| 136 // TEXT2 |
| 137 // </h1> |
| 138 // TEXT3 |
| 139 // </div> |
| 140 Element div = Document.get().createDivElement(); |
| 141 startElement(div); |
| 142 addText(TEXT1); |
| 143 Element h1 = Document.get().createElement("h1"); |
| 144 startElement(h1); |
| 145 addText(TEXT2); |
| 146 endElement(h1); |
| 147 addText(TEXT3); |
| 148 endElement(div); |
| 149 |
| 150 endBodyAndDocument(); |
| 151 |
| 152 assertBlock(); |
| 153 } |
| 154 |
| 155 private void startElement(Element e) { |
| 156 mHandler.startElement(e, new AttributesImpl()); |
| 157 } |
| 158 |
| 159 private void addText(String text) { |
| 160 mHandler.characters(text.toCharArray(), 0, text.length()); |
| 161 } |
| 162 |
| 163 private void endElement(Element e) { |
| 164 mHandler.endElement(e); |
| 165 } |
| 166 |
| 167 private void endBodyAndDocument() { |
| 168 endElement(mBody); |
| 169 mHandler.endDocument(); |
| 170 } |
| 171 |
| 172 private void assertBlock() { |
| 173 List<TextBlock> textBlocks = mHandler.toTextDocument().getTextBlocks(); |
| 174 assertEquals(3, textBlocks.size()); |
| 175 assertEquals(2, textBlocks.get(0).getTagLevel()); |
| 176 assertEquals(3, textBlocks.get(1).getTagLevel()); |
| 177 assertEquals(2, textBlocks.get(2).getTagLevel()); |
| 178 } |
| 179 |
| 180 private void assertInline() { |
| 181 List<TextBlock> textBlocks = mHandler.toTextDocument().getTextBlocks(); |
| 182 assertEquals(1, textBlocks.size()); |
| 183 assertEquals(1, textBlocks.get(0).getTagLevel()); |
| 184 } |
| 185 } |
OLD | NEW |