OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 package com.dom_distiller.client; | |
6 | |
7 import com.dom_distiller.client.sax.AttributesImpl; | |
8 import com.dom_distiller.client.sax.ContentHandler; | |
9 import com.google.gwt.dom.client.Document; | |
10 import com.google.gwt.dom.client.Element; | |
11 import com.google.gwt.junit.client.GWTTestCase; | |
12 | |
13 import java.util.List; | |
14 | |
15 import de.l3s.boilerpipe.document.TextBlock; | |
16 import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler; | |
17 | |
18 public class BoilerpipeHTMLContentHandlerTest extends GWTTestCase { | |
19 | |
20 private static final String TEXT1 = "Some really long text which should be c ontent."; | |
21 private static final String TEXT2 = "Another really long text thing which sh ould be content."; | |
22 private static final String TEXT3 = "And again a third long text for testing ."; | |
23 | |
24 public String getModuleName() { | |
25 return "com.dom_distiller.DomDistillerJUnit"; | |
26 } | |
27 | |
Yaron
2014/05/29 01:09:10
Do you want to add tests for the default actions f
nyquist
2014/05/29 23:42:25
It seems like GWT doesn't handle the defaults corr
| |
28 public void testSpansAsInline() { | |
29 BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler( ); | |
30 handler.startDocument(); | |
31 Element body = Document.get().createElement("body"); | |
32 startElement(handler, body); | |
33 | |
34 // <span> | |
35 // TEXT1 | |
36 // <span> | |
37 // TEXT2 | |
38 // </span> | |
39 // TEXT3 | |
40 // </span> | |
41 Element outerSpan = Document.get().createElement("span"); | |
42 startElement(handler, outerSpan); | |
43 addText(handler, TEXT1); | |
44 Element innerSpan = Document.get().createElement("span"); | |
45 startElement(handler, innerSpan); | |
46 addText(handler, TEXT2); | |
47 endElement(handler, innerSpan); | |
48 addText(handler, TEXT3); | |
49 endElement(handler, outerSpan); | |
50 | |
51 endElement(handler, body); | |
52 handler.endDocument(); | |
53 | |
54 List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); | |
55 assertEquals(1, textBlocks.size()); | |
56 assertEquals(1, textBlocks.get(0).getTagLevel()); | |
57 } | |
58 | |
59 public void testDivsAsInline() { | |
60 BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler( ); | |
61 handler.startDocument(); | |
62 Element body = Document.get().createElement("body"); | |
63 startElement(handler, body); | |
64 | |
65 // <span> | |
66 // TEXT1 | |
67 // <div style="display: inline;"> | |
68 // TEXT2 | |
69 // </div> | |
70 // TEXT3 | |
71 // </span> | |
72 Element span = Document.get().createElement("span"); | |
73 startElement(handler, span); | |
74 addText(handler, TEXT1); | |
75 Element div = Document.get().createDivElement(); | |
76 div.setAttribute("style", "display: inline;"); | |
77 startElement(handler, div); | |
78 addText(handler, TEXT2); | |
79 endElement(handler, div); | |
80 addText(handler, TEXT3); | |
81 endElement(handler, span); | |
82 | |
83 endElement(handler, body); | |
84 handler.endDocument(); | |
85 | |
86 List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); | |
87 assertEquals(1, textBlocks.size()); | |
88 assertEquals(1, textBlocks.get(0).getTagLevel()); | |
89 } | |
90 | |
91 public void testDivsAsBlocks() { | |
92 BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler( ); | |
93 handler.startDocument(); | |
94 Element body = Document.get().createElement("body"); | |
95 startElement(handler, body); | |
96 | |
97 // <div> | |
98 // TEXT1 | |
99 // <div> | |
100 // TEXT2 | |
101 // </div> | |
102 // TEXT3 | |
103 // </div> | |
104 Element div = Document.get().createDivElement(); | |
105 startElement(handler, div); | |
106 addText(handler, TEXT1); | |
107 Element span = Document.get().createDivElement(); | |
108 startElement(handler, span); | |
109 addText(handler, TEXT2); | |
110 endElement(handler, span); | |
111 addText(handler, TEXT3); | |
112 endElement(handler, div); | |
113 | |
114 endElement(handler, body); | |
115 handler.endDocument(); | |
116 | |
117 List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); | |
118 assertEquals(3, textBlocks.size()); | |
119 assertEquals(2, textBlocks.get(0).getTagLevel()); | |
120 assertEquals(3, textBlocks.get(1).getTagLevel()); | |
121 assertEquals(2, textBlocks.get(2).getTagLevel()); | |
122 } | |
123 | |
124 public void testSpansAsBlocks() { | |
125 BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler( ); | |
126 handler.startDocument(); | |
127 Element body = Document.get().createElement("body"); | |
128 startElement(handler, body); | |
129 | |
130 // <div> | |
131 // TEXT1 | |
132 // <span style="display: block;"> | |
133 // TEXT2 | |
134 // </span> | |
135 // TEXT3 | |
136 // </div> | |
137 Element div = Document.get().createDivElement(); | |
138 startElement(handler, div); | |
139 addText(handler, TEXT1); | |
140 Element span = Document.get().createElement("span"); | |
141 span.setAttribute("style", "display: block;"); | |
142 startElement(handler, span); | |
143 addText(handler, TEXT2); | |
144 endElement(handler, span); | |
145 addText(handler, TEXT3); | |
146 endElement(handler, div); | |
147 | |
148 endElement(handler, body); | |
149 handler.endDocument(); | |
150 | |
151 List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); | |
152 assertEquals(3, textBlocks.size()); | |
153 assertEquals(2, textBlocks.get(0).getTagLevel()); | |
154 assertEquals(3, textBlocks.get(1).getTagLevel()); | |
155 assertEquals(2, textBlocks.get(2).getTagLevel()); | |
156 } | |
157 | |
158 public void testHeadingsAsBlocks() { | |
159 BoilerpipeHTMLContentHandler handler = new BoilerpipeHTMLContentHandler( ); | |
160 handler.startDocument(); | |
161 Element body = Document.get().createElement("body"); | |
162 startElement(handler, body); | |
163 | |
164 // <div> | |
165 // TEXT1 | |
166 // <h1> | |
167 // TEXT2 | |
168 // </h1> | |
169 // TEXT3 | |
170 // </div> | |
171 Element div = Document.get().createDivElement(); | |
172 startElement(handler, div); | |
173 addText(handler, TEXT1); | |
174 Element h1 = Document.get().createElement("h1"); | |
175 h1.setAttribute("style", "display: block;"); | |
176 startElement(handler, h1); | |
177 addText(handler, TEXT2); | |
178 endElement(handler, h1); | |
179 addText(handler, TEXT3); | |
180 endElement(handler, div); | |
181 | |
182 endElement(handler, body); | |
183 handler.endDocument(); | |
184 | |
185 List<TextBlock> textBlocks = handler.toTextDocument().getTextBlocks(); | |
186 assertEquals(3, textBlocks.size()); | |
187 assertEquals(2, textBlocks.get(0).getTagLevel()); | |
188 assertEquals(3, textBlocks.get(1).getTagLevel()); | |
189 assertEquals(2, textBlocks.get(2).getTagLevel()); | |
190 } | |
191 | |
192 private void startElement(ContentHandler handler, Element e) { | |
193 handler.startElement(null, e.getTagName(), e.getTagName(), e, new Attrib utesImpl()); | |
194 } | |
195 | |
196 private void addText(ContentHandler handler, String text) { | |
197 handler.characters(text.toCharArray(), 0, text.length()); | |
198 } | |
199 | |
200 private void endElement(ContentHandler handler, Element e) { | |
201 handler.endElement(null, e.getTagName(), e.getTagName(), e); | |
202 } | |
203 } | |
OLD | NEW |