Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: javatests/org/chromium/distiller/PageParameterDetectorTest.java

Issue 1029593003: implement validations of pagination URLs (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: rename test Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller; 5 package org.chromium.distiller;
6 6
7 import java.util.ArrayList;
8 import java.util.HashMap;
9 import java.util.List;
10 import java.util.Map;
11
7 public class PageParameterDetectorTest extends DomDistillerJsTestCase { 12 public class PageParameterDetectorTest extends DomDistillerJsTestCase {
8 13
14 public void testIsPagingUrl() {
15 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryB=4&queryC =v3",
16 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&que ryC=v3"));
17 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
18 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&que ryC=v3"));
19 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryB=2&queryC=v3",
20 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
21 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryC=v3",
22 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
23 assertTrue(isPagingUrl("http://www.foo.com/a/b", "http://www.foo.com/a/b ?page=[*!]"));
24 assertTrue(isPagingUrl("http://www.foo.com/a/b?page=3",
25 "http://www.foo.com/a/b?page=[*!]"));
26 assertTrue(isPagingUrl("http://www.foo.com/a/b/", "http://www.foo.com/a/ b?page=[*!]"));
27 assertTrue(isPagingUrl("http://www.foo.com/a/b.htm", "http://www.foo.com /a/b?page=[*!]"));
28 assertTrue(isPagingUrl("http://www.foo.com/a/b.html", "http://www.foo.co m/a/b?page=[*!]"));
29 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
30 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")) ;
31 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryB=bar&queryC=v3",
32 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")) ;
33 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryC=v3&queryB=3",
34 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")) ;
35 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1",
36 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&qu eryC=v3"));
37
38 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
39 "http://www.foo.com/a/abc-[*!].html"));
40 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
41 "http://www.foo.com/a/abc-[*!].html"));
42 assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a /abc-[*!]"));
43 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com /a/abc-[*!]"));
44 assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com /a/b-[*!]-c-3"));
45 assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a -[*!]-c-3"));
46 assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.c om/a-p-[*!]-c-3"));
47 assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo .com/a/abc-[*!]"));
48 assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/ abc-[*!]"));
49 assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/ab c-[*!]"));
50
51 assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.co m/a/page/[*!]"));
52 assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/pag e/[*!]"));
53 assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
54 "http://www.foo.com/a/page/[*!]/abc.html"));
55 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
56 "http://www.foo.com/a/page/[*!]/abc.html"));
57 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
58 "http://www.foo.com/a/[*!]/abc.html"));
59 assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
60 "http://www.foo.com/a/[*!]/abc.html"));
61 assertTrue(isPagingUrl("http://www.foo.com/abc.html",
62 "http://www.foo.com/a/[*!]/abc.html"));
63 assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.c om/a/page/[*!]"));
64 assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.c om/p/page/[*!]"));
65 }
66
9 public void testIsLastNumericPathComponentBad() { 67 public void testIsLastNumericPathComponentBad() {
10 // Path component is not numeric i.e. contains non-digits. 68 // Path component is not numeric i.e. contains non-digits.
11 String url = "http://www.foo.com/a2"; 69 String url = "http://www.foo.com/a2";
12 int digitStart = url.indexOf("2"); 70 int digitStart = url.indexOf("2");
13 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18, digitStart, 71 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18, digitStart,
14 digitStart + 1)); 72 digitStart + 1));
15 73
16 // Numeric path component is first. 74 // Numeric path component is first.
17 url = "http://www.foo.com/2"; 75 url = "http://www.foo.com/2";
18 digitStart = url.indexOf("2"); 76 digitStart = url.indexOf("2");
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
50 digitStart + 1)); 108 digitStart + 1));
51 109
52 // .shtml follows digit, previous path component is not a bad page param name, but the one 110 // .shtml follows digit, previous path component is not a bad page param name, but the one
53 // before that is. 111 // before that is.
54 url = "http://www.foo.com/wiki/good/2.shtml"; 112 url = "http://www.foo.com/wiki/good/2.shtml";
55 digitStart = url.indexOf("2"); 113 digitStart = url.indexOf("2");
56 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18, digitStart, 114 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18, digitStart,
57 digitStart + 1)); 115 digitStart + 1));
58 } 116 }
59 117
118 public void testIsPagePatternValid() {
119 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
120 "http://www.google.com/forum-12/page/[*!]"));
121 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
122 "http://www.google.com/forum-12/[*!]"));
123 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
124 "http://www.google.com/forum-12/page-[*!]"));
125 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
126 "http://www.google.com/forum-12?page=[*!]"));
127 assertTrue(isPagePatternValid("http://www.google.com/forum-12?sid=12345" ,
128 "http://www.google.com/forum-12?page=[*!]&sort=d"));
129 assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
130 "http://www.google.com/forum-12/food/for/bar/[*!]"));
131 assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
132 "http://www.google.com/forum-12-food-[*!]"));
133
134 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
135 "http://www.google.com/forum-12/food/2012/01/[*!]"));
136 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012 /01/01",
137 "http://www.google.com/forum-12/food/2012/01/[*!]"));
138 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
139 "http://www.google.com/forum-12?page=[*!]"));
140 }
141
142 public void testArePageNumsAdjacentAndConsecutive() {
143 {
144 final int[] allNums = { 1, 2 };
145 final int[] selectedNums = { 1, 2 };
146 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
147 assertTrue(isAdjacent(result));
148 assertTrue(isConsecutive(result));
149 }
150 {
151 final int[] allNums = { 1, 2, 3 };
152 final int[] selectedNums = { 2, 3 };
153 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
154 assertTrue(isAdjacent(result));
155 assertTrue(isConsecutive(result));
156 }
157 {
158 final int[] allNums = { 1, 5, 6, 7, 10 };
159 final int[] selectedNums = { 1, 5, 7, 10 };
160 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
161 assertTrue(isAdjacent(result));
162 assertTrue(isConsecutive(result));
163 }
164 {
165 final int[] allNums = { 10, 25, 50 };
166 final int[] selectedNums = { 10, 25, 50 }; // No consecutive pairs.
167 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
168 assertTrue(isAdjacent(result));
169 assertFalse(isConsecutive(result));
170 }
171 {
172 final int[] allNums = { 23, 24, 30 };
173 // This list doesn't satisfy consecutive rule. There should be "22" on the left of "23",
174 // or "25" on the right of "24", or "29" on the left of "30".
175 final int[] selectedNums = { 23, 24, 30 };
176 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
177 assertTrue(isAdjacent(result));
178 assertFalse(isConsecutive(result));
179 }
180 {
181 final int[] allNums = { 1, 2, 3, 4, 5 };
182 final int[] selectedNums = { 1, 3, 5 }; // Two gaps.
183 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
184 assertFalse(isAdjacent(result));
185 assertFalse(isConsecutive(result));
186 }
187 {
188 final int[] allNums = { 2, 3, 4, 5 };
189 final int[] selectedNums = { 2, 5 }; // A gap of 2 numbers.
190 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums );
191 assertFalse(isAdjacent(result));
192 assertFalse(isConsecutive(result));
193 }
194 }
195
196 private static boolean isPagingUrl(String url, String pagePattern) {
197 return PageParameterDetector.isPagingUrl(url, pagePattern);
198 }
199
200 private static boolean isPagePatternValid(String url, String pagePattern) {
201 ParsedUrl parsedUrl = ParsedUrl.create(url);
202 assertTrue(parsedUrl != null);
203 return PageParameterDetector.isPagePatternValid(parsedUrl, pagePattern);
204 }
205
206 private static int arePageNumsAdjacentAndConsecutive(int[] selectedNums, int [] allNums) {
207 List<PageParamInfo.PageInfo> ascendingNumbers = new ArrayList<PageParamI nfo.PageInfo>();
208 Map<Integer, Integer> numberToPos = new HashMap<Integer, Integer>();
209
210 for (int i = 0; i < allNums.length; i++) {
211 final int number = allNums[i];
212 numberToPos.put(number, i);
213 ascendingNumbers.add(new PageParamInfo.PageInfo(number, ""));
214 }
215
216 List<PageParameterDetector.LinkInfo> allLinkInfo =
217 new ArrayList<PageParameterDetector.LinkInfo>();
218 for (int i = 0; i < selectedNums.length; i++) {
219 final int number = selectedNums[i];
220 allLinkInfo.add(new PageParameterDetector.LinkInfo(number, number,
221 numberToPos.get(number)));
222 }
223
224 return PageParameterDetector.arePageNumsAdjacentAndConsecutive(allLinkIn fo,
225 ascendingNumbers);
226 }
227
228 private static boolean isAdjacent(int result) {
229 return (result & PageParameterDetector.PAGE_NUM_ADJACENT_MASK) ==
230 PageParameterDetector.PAGE_NUM_ADJACENT_MASK;
231 }
232
233 private static boolean isConsecutive(int result) {
234 return (result & PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK) ==
235 PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK;
236 }
237
60 } 238 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698