Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 package org.chromium.distiller; | |
| 6 | |
| 7 import com.google.gwt.dom.client.BaseElement; | |
| 8 import com.google.gwt.dom.client.Document; | |
| 9 | |
| 10 public class PageParameterParserTest extends DomDistillerJsTestCase { | |
| 11 private static final String BASE_URL = "http://www.test.com/"; | |
| 12 private static final String TEST_URL = BASE_URL + "foo/bar"; | |
| 13 | |
| 14 public void testBasic() { | |
| 15 PageParamInfo info = processDocument( | |
| 16 "1<br>" + | |
| 17 "<a href=\"/foo/bar/2\">2</a>"); | |
| 18 assertEquals(2, info.mAllPageInfo.size()); | |
| 19 | |
| 20 info = processDocument( | |
| 21 "1<br>" + | |
| 22 "<a href=\"/foo/bar/2\">2</a>" + | |
| 23 "<a href=\"/foo/bar/3\">3</a>"); | |
| 24 assertEquals(3, info.mAllPageInfo.size()); | |
| 25 } | |
| 26 | |
| 27 public void testRejectOnlyPage2LinkWithoutCurrentPageText() { | |
| 28 // Although there is a digital outlink to 2nd page, there is no plain te xt "1" | |
| 29 // before it, so there is no pagination. | |
| 30 PageParamInfo info = processDocument( | |
| 31 "If there were a '1', pagination should be detected. But there isn't ." + | |
| 32 "<a href=\"/foo/bar/2\">2</a>" + | |
| 33 "Main content"); | |
| 34 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
| 35 } | |
| 36 | |
| 37 public void testRejectNonAdjacentOutlinks() { | |
| 38 PageParamInfo info = processDocument( | |
| 39 "1<br>" + | |
| 40 "Unrelated terms<br>" + | |
| 41 "<a href=\"/foo/bar/2\">2</a>" + | |
| 42 "Unrelated terms<br>" + | |
| 43 "<a href=\"/foo/bar/3\">3</a>" + | |
| 44 "<a href=\"/foo/bar/all\">All</a>"); | |
| 45 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
| 46 } | |
| 47 | |
| 48 public void testAcceptAdjacentOutlinks() { | |
| 49 PageParamInfo info = processDocumentWithoutBase( | |
| 50 "Unrelated link: <a href=\"http://www.test.com/other/2\">2</a>" + | |
| 51 "<p>Main content</p>" + | |
| 52 "1<br>" + | |
| 53 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
| 54 "<a href=\"http://www.test.com/foo/bar/3\">3</a>", | |
| 55 TEST_URL); | |
| 56 assertEquals(3, info.mAllPageInfo.size()); | |
| 57 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 58 assertEquals(1, page.mPageNum); | |
| 59 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
| 60 page = info.mAllPageInfo.get(1); | |
| 61 assertEquals(2, page.mPageNum); | |
| 62 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
| 63 page = info.mAllPageInfo.get(2); | |
| 64 assertEquals(3, page.mPageNum); | |
| 65 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
| 66 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
| 67 } | |
| 68 | |
| 69 public void testAcceptDuplicatePatterns() { | |
| 70 PageParamInfo info = processDocument( | |
| 71 "1<br>" + | |
| 72 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
| 73 "<a href=\"http://www.test.com/foo/bar/3\">3</a>" + | |
| 74 "<p>Main content</p>" + | |
| 75 "1<br>" + | |
| 76 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
| 77 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | |
| 78 assertEquals(3, info.mAllPageInfo.size()); | |
| 79 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 80 assertEquals(1, page.mPageNum); | |
| 81 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
| 82 page = info.mAllPageInfo.get(1); | |
| 83 assertEquals(2, page.mPageNum); | |
| 84 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
| 85 page = info.mAllPageInfo.get(2); | |
| 86 assertEquals(3, page.mPageNum); | |
| 87 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
| 88 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
| 89 } | |
| 90 | |
| 91 public void testPreferPageNumber() { | |
| 92 PageParamInfo info = processDocument( | |
| 93 "<a href=\"http://www.test.com/foo/bar/size-25\">25</a>" + | |
| 94 "<a href=\"http://www.test.com/foo/bar/size-50\">50</a>" + | |
| 95 "<a href=\"http://www.test.com/foo/bar/size-100\">100</a>" + | |
| 96 "<p>Main content</p>" + | |
| 97 "1<br>" + | |
| 98 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | |
| 99 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | |
| 100 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | |
| 101 assertEquals(3, info.mAllPageInfo.size()); | |
| 102 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 103 assertEquals(1, page.mPageNum); | |
| 104 assertEquals(BASE_URL + "foo/bar", page.mUrl); | |
| 105 page = info.mAllPageInfo.get(1); | |
| 106 assertEquals(2, page.mPageNum); | |
| 107 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | |
| 108 page = info.mAllPageInfo.get(2); | |
| 109 assertEquals(3, page.mPageNum); | |
| 110 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | |
| 111 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | |
| 112 } | |
| 113 | |
| 114 public void testRejectMultiplePageNumberPatterns() { | |
| 115 PageParamInfo info = processDocumentWithoutBase( | |
| 116 "<a href=\"http://www.google.com/test/list.php?start=10\">2</a>" + | |
| 117 "<a href=\"http://www.google.com/test/list.php?start=20\">3</a>" + | |
| 118 "<a href=\"http://www.google.com/test/list.php?start=30\">4</a>" + | |
| 119 "<p>Main content</p>" + | |
| 120 "<a href=\"http://www.google.com/test/list.php?offset=10\">2</a>" + | |
| 121 "<a href=\"http://www.google.com/test/list.php?offset=20\">3</a>" + | |
| 122 "<a href=\"http://www.google.com/test/list.php?offset=30\">4</a>" + | |
| 123 "<a href=\"http://www.google.com/test/list.php?offset=all\">All</a>" , | |
| 124 "http://www.google.com/test/list.php"); | |
| 125 | |
| 126 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | |
| 127 assertEquals(4, info.mAllPageInfo.size()); | |
| 128 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 129 assertEquals(1, page.mPageNum); | |
| 130 assertEquals("http://www.google.com/test/list.php", page.mUrl); | |
| 131 page = info.mAllPageInfo.get(1); | |
| 132 assertEquals(2, page.mPageNum); | |
| 133 assertEquals("http://www.google.com/test/list.php?start=10", page.mUrl); | |
| 134 page = info.mAllPageInfo.get(2); | |
| 135 assertEquals(3, page.mPageNum); | |
| 136 assertEquals("http://www.google.com/test/list.php?start=20", page.mUrl); | |
| 137 page = info.mAllPageInfo.get(3); | |
| 138 assertEquals(4, page.mPageNum); | |
| 139 assertEquals("http://www.google.com/test/list.php?start=30", page.mUrl); | |
| 140 assertTrue(info.mFormula != null); | |
| 141 assertEquals(10, info.mFormula.mCoefficient); | |
| 142 assertEquals(-10, info.mFormula.mDelta); | |
| 143 assertEquals("http://www.google.com/test/list.php?start=10", info.mNextP agingUrl); | |
| 144 } | |
| 145 | |
| 146 public void testInvalidAndVoidLinks() { | |
| 147 PageParamInfo info = processDocument( | |
| 148 "1<br>" + | |
| 149 "<a href=\"javascript:void(0)\">2</a>"); | |
| 150 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
| 151 } | |
| 152 | |
| 153 public void testDifferentHostLinks() { | |
| 154 PageParamInfo info = processDocumentWithoutBase( | |
| 155 "1<br>" + | |
| 156 "<a href=\"http://www.foo.com/foo/bar/2\">2</a>", | |
| 157 TEST_URL); | |
| 158 PageParameterDetectorTest.expectEmptyPageParamInfo(info); | |
| 159 } | |
| 160 | |
| 161 public void testWhitespaceSibling() { | |
| 162 PageParamInfo info = processDocument( | |
| 163 "1<br>" + | |
| 164 " " + | |
| 165 "<a href=\"/foo/bar/2\">2</a>"); | |
| 166 assertEquals(2, info.mAllPageInfo.size()); | |
| 167 } | |
| 168 | |
| 169 public void testPunctuationSibling() { | |
| 170 PageParamInfo info = processDocument( | |
| 171 "<a href=\"/foo/bar/1\">1</a>" + | |
| 172 "," + | |
| 173 "<a href=\"/foo/bar/2\">2</a>"); | |
| 174 assertEquals(2, info.mAllPageInfo.size()); | |
| 175 } | |
| 176 | |
| 177 public void testParentSibling0() { | |
|
wychen
2015/09/21 23:08:03
Should we add tests for things like this to test s
kuan
2015/10/02 15:59:17
Done. fyi, i already had testPuncationSibling() t
| |
| 178 PageParamInfo info = processDocumentWithoutBase( | |
| 179 "<div>begin" + | |
| 180 "<strong>1</strong>" + | |
| 181 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | |
| 182 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | |
| 183 "end</div>", | |
| 184 TEST_URL); | |
| 185 assertEquals(3, info.mAllPageInfo.size()); | |
| 186 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 187 assertEquals(1, page.mPageNum); | |
| 188 assertEquals(TEST_URL, page.mUrl); | |
| 189 page = info.mAllPageInfo.get(1); | |
| 190 assertEquals(2, page.mPageNum); | |
| 191 assertEquals(TEST_URL + "/2", page.mUrl); | |
| 192 page = info.mAllPageInfo.get(2); | |
| 193 assertEquals(3, page.mPageNum); | |
| 194 assertEquals(TEST_URL + "/3", page.mUrl); | |
| 195 assertEquals("http://www.test.com/foo/bar/2", info.mNextPagingUrl); | |
| 196 } | |
| 197 | |
| 198 public void testParentSibling1() { | |
| 199 PageParamInfo info = processDocumentWithoutBase( | |
| 200 "<div>begin" + | |
| 201 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | |
| 202 "<strong>2</strong>" + | |
| 203 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | |
| 204 "end</div>", | |
| 205 "http://www.test.com/foo/bar/2"); | |
| 206 assertEquals(2, info.mAllPageInfo.size()); | |
| 207 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 208 assertEquals(1, page.mPageNum); | |
| 209 assertEquals(TEST_URL, page.mUrl); | |
| 210 page = info.mAllPageInfo.get(1); | |
| 211 assertEquals(3, page.mPageNum); | |
| 212 assertEquals(TEST_URL + "/3", page.mUrl); | |
| 213 assertEquals("http://www.test.com/foo/bar/3", info.mNextPagingUrl); | |
| 214 } | |
| 215 | |
| 216 public void testParentSibling2() { | |
| 217 PageParamInfo info = processDocumentWithoutBase( | |
| 218 "<div>begin" + | |
| 219 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | |
| 220 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | |
| 221 "<strong>3</strong>" + | |
| 222 "end</div>", | |
| 223 "http://www.test.com/foo/bar/3"); | |
| 224 assertEquals(2, info.mAllPageInfo.size()); | |
| 225 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | |
| 226 assertEquals(1, page.mPageNum); | |
| 227 assertEquals(TEST_URL, page.mUrl); | |
| 228 page = info.mAllPageInfo.get(1); | |
| 229 assertEquals(2, page.mPageNum); | |
| 230 assertEquals(TEST_URL + "/2", page.mUrl); | |
| 231 assertTrue(info.mNextPagingUrl.isEmpty()); | |
| 232 } | |
| 233 | |
| 234 private PageParamInfo processDocument(String content) { | |
| 235 // Create and add a <base> element so that all anchors are based off it. | |
| 236 BaseElement baseTag = Document.get().createBaseElement(); | |
| 237 baseTag.setHref(BASE_URL); | |
| 238 mHead.appendChild(baseTag); | |
| 239 | |
| 240 // Append content to body. | |
| 241 mBody.setInnerHTML(content); | |
| 242 | |
| 243 PageParamInfo info = PageParameterParser.parse(TEST_URL, null); | |
| 244 mHead.removeChild(baseTag); | |
| 245 return info; | |
| 246 } | |
| 247 | |
| 248 private PageParamInfo processDocumentWithoutBase(String content, String orig inalUrl) { | |
| 249 // Append content to body. | |
| 250 mBody.setInnerHTML(content); | |
| 251 return PageParameterParser.parse(originalUrl, null); | |
| 252 } | |
| 253 | |
| 254 } | |
| OLD | NEW |