OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 package org.chromium.distiller; | |
6 | |
7 import com.google.gwt.dom.client.BaseElement; | |
8 import com.google.gwt.dom.client.Document; | |
9 | |
10 public class PageParameterParserTest extends DomDistillerJsTestCase { | |
11 private static final String BASE_URL = "http://www.test.com/"; | |
12 private static final String TEST_URL = BASE_URL + "foo/bar"; | |
13 | |
14 public void testBasic() { | |
15 PageParamInfo info = processDocument( | |
16 "1<br>" + | |
17 "<a href=\"/foo/bar/2\">2</a>"); | |
18 assertEquals(2, info.mAllPageInfo.size()); | |
19 | |
20 info = processDocument( | |
21 "1<br>" + | |
22 "<a href=\"/foo/bar/2\">2</a>" + | |
23 "<a href=\"/foo/bar/3\">3</a>"); | |
24 assertEquals(3, info.mAllPageInfo.size()); | |
25 } | |
26 | |
27 public void testRejectOnlyPage2LinkWithoutCurrentPageText() { | |
28 // Although there is a digital outlink to 2nd page, there is no plain te xt "1" | |
29 // before it, so there is no pagination. | |
30 PageParamInfo info = processDocument( | |
31 "If there were a '1', pagination should be detected. But there isn't ." + | |
32 "<a href=\"/foo/bar/2\">2</a>" + | |
33 "Main content"); | |
34 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
35 } | |
36 | |
37 public void testRejectNonAdjacentOutlinks() { | |
38 PageParamInfo info = processDocument( | |
39 "1<br>" + | |
40 "Unrelated terms<br>" + | |
41 "<a href=\"/foo/bar/2\">2</a>" + | |
42 "Unrelated terms<br>" + | |
43 "<a href=\"/foo/bar/3\">3</a>" + | |
44 "<a href=\"/foo/bar/all\">All</a>"); | |
45 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
46 } | |
47 | |
48 public void testAcceptAdjacentOutlinks() { | |
49 PageParamInfo info = processDocumentWithoutBase( | |
50 "Unrelated link: <a href=\"http://www.test.com/other/2\">2</a>" + | |
51 "<p>Main content</p>" + | |
52 "1<br>" + | |
53 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
54 "<a href=\"http://www.test.com/foo/bar/3\">3</a>", | |
55 TEST_URL); | |
56 assertEquals(3, info.mAllPageInfo.size()); | |
57 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
58 assertEquals(1, page.mPageNum); | |
59 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
60 page = info.mAllPageInfo.get(1); | |
61 assertEquals(2, page.mPageNum); | |
62 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
63 page = info.mAllPageInfo.get(2); | |
64 assertEquals(3, page.mPageNum); | |
65 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
66 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
67 } | |
68 | |
69 public void testAcceptDuplicatePatterns() { | |
70 PageParamInfo info = processDocument( | |
71 "1<br>" + | |
72 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
73 "<a href=\"http://www.test.com/foo/bar/3\">3</a>" + | |
74 "<p>Main content</p>" + | |
75 "1<br>" + | |
76 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
77 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | |
78 assertEquals(3, info.mAllPageInfo.size()); | |
79 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
80 assertEquals(1, page.mPageNum); | |
81 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
82 page = info.mAllPageInfo.get(1); | |
83 assertEquals(2, page.mPageNum); | |
84 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
85 page = info.mAllPageInfo.get(2); | |
86 assertEquals(3, page.mPageNum); | |
87 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
88 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
89 } | |
90 | |
91 public void testPreferPageNumber() { | |
92 PageParamInfo info = processDocument( | |
93 "<a href=\"http://www.test.com/foo/bar/size-25\">25</a>" + | |
94 "<a href=\"http://www.test.com/foo/bar/size-50\">50</a>" + | |
95 "<a href=\"http://www.test.com/foo/bar/size-100\">100</a>" + | |
96 "<p>Main content</p>" + | |
97 "1<br>" + | |
98 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
99 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | |
100 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | |
101 assertEquals(3, info.mAllPageInfo.size()); | |
102 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
103 assertEquals(1, page.mPageNum); | |
104 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
105 page = info.mAllPageInfo.get(1); | |
106 assertEquals(2, page.mPageNum); | |
107 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
108 page = info.mAllPageInfo.get(2); | |
109 assertEquals(3, page.mPageNum); | |
110 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
111 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
112 } | |
113 | |
114 public void testRejectMultiplePageNumberPatterns() { | |
115 PageParamInfo info = processDocumentWithoutBase( | |
116 "<a href=\"http://www.google.com/test/list.php?start=10\">2</a>" + | |
117 "<a href=\"http://www.google.com/test/list.php?start=20\">3</a>" + | |
118 "<a href=\"http://www.google.com/test/list.php?start=30\">4</a>" + | |
119 "<p>Main content</p>" + | |
120 "<a href=\"http://www.google.com/test/list.php?offset=10\">2</a>" + | |
121 "<a href=\"http://www.google.com/test/list.php?offset=20\">3</a>" + | |
122 "<a href=\"http://www.google.com/test/list.php?offset=30\">4</a>" + | |
123 "<a href=\"http://www.google.com/test/list.php?offset=all\">All</a>" , | |
124 "http://www.google.com/test/list.php"); | |
125 | |
126 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | |
127 assertEquals(4, info.mAllPageInfo.size()); | |
128 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
129 assertEquals(1, page.mPageNum); | |
130 assertEquals("http://www.google.com/test/list.php", page.mUrl); | |
131 page = info.mAllPageInfo.get(1); | |
132 assertEquals(2, page.mPageNum); | |
133 assertEquals("http://www.google.com/test/list.php?start=10", page.mUrl); | |
134 page = info.mAllPageInfo.get(2); | |
135 assertEquals(3, page.mPageNum); | |
136 assertEquals("http://www.google.com/test/list.php?start=20", page.mUrl); | |
137 page = info.mAllPageInfo.get(3); | |
138 assertEquals(4, page.mPageNum); | |
139 assertEquals("http://www.google.com/test/list.php?start=30", page.mUrl); | |
140 assertTrue(info.mFormula != null); | |
141 assertEquals(10, info.mFormula.mCoefficient); | |
142 assertEquals(-10, info.mFormula.mDelta); | |
143 assertEquals("http://www.google.com/test/list.php?start=10", info.mNextP agingUrl); | |
144 } | |
145 | |
146 public void testInvalidAndVoidLinks() { | |
147 PageParamInfo info = processDocument( | |
148 "1<br>" + | |
149 "<a href=\"javascript:void(0)\">2</a>"); | |
150 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
151 } | |
152 | |
153 public void testDifferentHostLinks() { | |
154 PageParamInfo info = processDocumentWithoutBase( | |
155 "1<br>" + | |
156 "<a href=\"http://www.foo.com/foo/bar/2\">2</a>", | |
157 TEST_URL); | |
158 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
159 } | |
160 | |
161 public void testWhitespaceSibling() { | |
162 PageParamInfo info = processDocument( | |
163 "1<br>" + | |
164 " " + | |
165 "<a href=\"/foo/bar/2\">2</a>"); | |
166 assertEquals(2, info.mAllPageInfo.size()); | |
167 } | |
168 | |
169 public void testPunctuationSibling() { | |
170 PageParamInfo info = processDocument( | |
171 "<a href=\"/foo/bar/1\">1</a>" + | |
172 "," + | |
173 "<a href=\"/foo/bar/2\">2</a>"); | |
174 assertEquals(2, info.mAllPageInfo.size()); | |
175 } | |
176 | |
177 public void testParentSibling0() { | |
wychen
2015/09/21 23:08:03
Should we add tests for things like this to test s
kuan
2015/10/02 15:59:17
Done. fyi, i already had testPuncationSibling() t
| |
178 PageParamInfo info = processDocumentWithoutBase( | |
179 "<div>begin" + | |
180 "<strong>1</strong>" + | |
181 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | |
182 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | |
183 "end</div>", | |
184 TEST_URL); | |
185 assertEquals(3, info.mAllPageInfo.size()); | |
186 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
187 assertEquals(1, page.mPageNum); | |
188 assertEquals(TEST_URL, page.mUrl); | |
189 page = info.mAllPageInfo.get(1); | |
190 assertEquals(2, page.mPageNum); | |
191 assertEquals(TEST_URL + "/2", page.mUrl); | |
192 page = info.mAllPageInfo.get(2); | |
193 assertEquals(3, page.mPageNum); | |
194 assertEquals(TEST_URL + "/3", page.mUrl); | |
195 assertEquals("http://www.test.com/foo/bar/2", info.mNextPagingUrl); | |
196 } | |
197 | |
198 public void testParentSibling1() { | |
199 PageParamInfo info = processDocumentWithoutBase( | |
200 "<div>begin" + | |
201 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | |
202 "<strong>2</strong>" + | |
203 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | |
204 "end</div>", | |
205 "http://www.test.com/foo/bar/2"); | |
206 assertEquals(2, info.mAllPageInfo.size()); | |
207 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
208 assertEquals(1, page.mPageNum); | |
209 assertEquals(TEST_URL, page.mUrl); | |
210 page = info.mAllPageInfo.get(1); | |
211 assertEquals(3, page.mPageNum); | |
212 assertEquals(TEST_URL + "/3", page.mUrl); | |
213 assertEquals("http://www.test.com/foo/bar/3", info.mNextPagingUrl); | |
214 } | |
215 | |
216 public void testParentSibling2() { | |
217 PageParamInfo info = processDocumentWithoutBase( | |
218 "<div>begin" + | |
219 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | |
220 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | |
221 "<strong>3</strong>" + | |
222 "end</div>", | |
223 "http://www.test.com/foo/bar/3"); | |
224 assertEquals(2, info.mAllPageInfo.size()); | |
225 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
226 assertEquals(1, page.mPageNum); | |
227 assertEquals(TEST_URL, page.mUrl); | |
228 page = info.mAllPageInfo.get(1); | |
229 assertEquals(2, page.mPageNum); | |
230 assertEquals(TEST_URL + "/2", page.mUrl); | |
231 assertTrue(info.mNextPagingUrl.isEmpty()); | |
232 } | |
233 | |
234 private PageParamInfo processDocument(String content) { | |
235 // Create and add a <base> element so that all anchors are based off it. | |
236 BaseElement baseTag = Document.get().createBaseElement(); | |
237 baseTag.setHref(BASE_URL); | |
238 mHead.appendChild(baseTag); | |
239 | |
240 // Append content to body. | |
241 mBody.setInnerHTML(content); | |
242 | |
243 PageParamInfo info = PageParameterParser.parse(TEST_URL, null); | |
244 mHead.removeChild(baseTag); | |
245 return info; | |
246 } | |
247 | |
248 private PageParamInfo processDocumentWithoutBase(String content, String orig inalUrl) { | |
249 // Append content to body. | |
250 mBody.setInnerHTML(content); | |
251 return PageParameterParser.parse(originalUrl, null); | |
252 } | |
253 | |
254 } | |
OLD | NEW |