OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller; | 5 package org.chromium.distiller; |
6 | 6 |
| 7 import java.util.ArrayList; |
| 8 import java.util.HashMap; |
| 9 import java.util.List; |
| 10 import java.util.Map; |
| 11 |
7 public class PageParameterDetectorTest extends DomDistillerJsTestCase { | 12 public class PageParameterDetectorTest extends DomDistillerJsTestCase { |
8 | 13 |
| 14 public void testIsPagingUrl() { |
| 15 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryB=4&queryC
=v3", |
| 16 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&que
ryC=v3")); |
| 17 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3", |
| 18 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&que
ryC=v3")); |
| 19 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryB=2&queryC=v3", |
| 20 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
| 21 assertTrue(isPagingUrl("http://www.foo.com/a/b?queryC=v3", |
| 22 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
| 23 assertTrue(isPagingUrl("http://www.foo.com/a/b", "http://www.foo.com/a/b
?page=[*!]")); |
| 24 assertTrue(isPagingUrl("http://www.foo.com/a/b?page=3", |
| 25 "http://www.foo.com/a/b?page=[*!]")); |
| 26 assertTrue(isPagingUrl("http://www.foo.com/a/b/", "http://www.foo.com/a/
b?page=[*!]")); |
| 27 assertTrue(isPagingUrl("http://www.foo.com/a/b.htm", "http://www.foo.com
/a/b?page=[*!]")); |
| 28 assertTrue(isPagingUrl("http://www.foo.com/a/b.html", "http://www.foo.co
m/a/b?page=[*!]")); |
| 29 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3", |
| 30 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"))
; |
| 31 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryB=bar&queryC=v3", |
| 32 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"))
; |
| 33 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryC=v3&queryB=3", |
| 34 "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"))
; |
| 35 assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1", |
| 36 "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&qu
eryC=v3")); |
| 37 |
| 38 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html", |
| 39 "http://www.foo.com/a/abc-[*!].html")); |
| 40 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 41 "http://www.foo.com/a/abc-[*!].html")); |
| 42 assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a
/abc-[*!]")); |
| 43 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com
/a/abc-[*!]")); |
| 44 assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com
/a/b-[*!]-c-3")); |
| 45 assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a
-[*!]-c-3")); |
| 46 assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.c
om/a-p-[*!]-c-3")); |
| 47 assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo
.com/a/abc-[*!]")); |
| 48 assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/
abc-[*!]")); |
| 49 assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/ab
c-[*!]")); |
| 50 |
| 51 assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.co
m/a/page/[*!]")); |
| 52 assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/pag
e/[*!]")); |
| 53 assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html", |
| 54 "http://www.foo.com/a/page/[*!]/abc.html")); |
| 55 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 56 "http://www.foo.com/a/page/[*!]/abc.html")); |
| 57 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 58 "http://www.foo.com/a/[*!]/abc.html")); |
| 59 assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html", |
| 60 "http://www.foo.com/a/[*!]/abc.html")); |
| 61 assertTrue(isPagingUrl("http://www.foo.com/abc.html", |
| 62 "http://www.foo.com/a/[*!]/abc.html")); |
| 63 assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.c
om/a/page/[*!]")); |
| 64 assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.c
om/p/page/[*!]")); |
| 65 } |
| 66 |
9 public void testIsLastNumericPathComponentBad() { | 67 public void testIsLastNumericPathComponentBad() { |
10 // Path component is not numeric i.e. contains non-digits. | 68 // Path component is not numeric i.e. contains non-digits. |
11 String url = "http://www.foo.com/a2"; | 69 String url = "http://www.foo.com/a2"; |
12 int digitStart = url.indexOf("2"); | 70 int digitStart = url.indexOf("2"); |
13 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18,
digitStart, | 71 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18,
digitStart, |
14 digitStart + 1)); | 72 digitStart + 1)); |
15 | 73 |
16 // Numeric path component is first. | 74 // Numeric path component is first. |
17 url = "http://www.foo.com/2"; | 75 url = "http://www.foo.com/2"; |
18 digitStart = url.indexOf("2"); | 76 digitStart = url.indexOf("2"); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
50 digitStart + 1)); | 108 digitStart + 1)); |
51 | 109 |
52 // .shtml follows digit, previous path component is not a bad page param
name, but the one | 110 // .shtml follows digit, previous path component is not a bad page param
name, but the one |
53 // before that is. | 111 // before that is. |
54 url = "http://www.foo.com/wiki/good/2.shtml"; | 112 url = "http://www.foo.com/wiki/good/2.shtml"; |
55 digitStart = url.indexOf("2"); | 113 digitStart = url.indexOf("2"); |
56 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18,
digitStart, | 114 assertFalse(PageParameterDetector.isLastNumericPathComponentBad(url, 18,
digitStart, |
57 digitStart + 1)); | 115 digitStart + 1)); |
58 } | 116 } |
59 | 117 |
| 118 public void testIsPagePatternValid() { |
| 119 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 120 "http://www.google.com/forum-12/page/[*!]")); |
| 121 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 122 "http://www.google.com/forum-12/[*!]")); |
| 123 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 124 "http://www.google.com/forum-12/page-[*!]")); |
| 125 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 126 "http://www.google.com/forum-12?page=[*!]")); |
| 127 assertTrue(isPagePatternValid("http://www.google.com/forum-12?sid=12345"
, |
| 128 "http://www.google.com/forum-12?page=[*!]&sort=d")); |
| 129 assertTrue(isPagePatternValid("http://www.google.com/forum-12/food", |
| 130 "http://www.google.com/forum-12/food/for/bar/[*!]")); |
| 131 assertTrue(isPagePatternValid("http://www.google.com/forum-12-food", |
| 132 "http://www.google.com/forum-12-food-[*!]")); |
| 133 |
| 134 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food", |
| 135 "http://www.google.com/forum-12/food/2012/01/[*!]")); |
| 136 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012
/01/01", |
| 137 "http://www.google.com/forum-12/food/2012/01/[*!]")); |
| 138 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food", |
| 139 "http://www.google.com/forum-12?page=[*!]")); |
| 140 } |
| 141 |
| 142 public void testArePageNumsAdjacentAndConsecutive() { |
| 143 { |
| 144 final int[] allNums = { 1, 2 }; |
| 145 final int[] selectedNums = { 1, 2 }; |
| 146 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 147 assertTrue(isAdjacent(result)); |
| 148 assertTrue(isConsecutive(result)); |
| 149 } |
| 150 { |
| 151 final int[] allNums = { 1, 2, 3 }; |
| 152 final int[] selectedNums = { 2, 3 }; |
| 153 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 154 assertTrue(isAdjacent(result)); |
| 155 assertTrue(isConsecutive(result)); |
| 156 } |
| 157 { |
| 158 final int[] allNums = { 1, 5, 6, 7, 10 }; |
| 159 final int[] selectedNums = { 1, 5, 7, 10 }; |
| 160 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 161 assertTrue(isAdjacent(result)); |
| 162 assertTrue(isConsecutive(result)); |
| 163 } |
| 164 { |
| 165 final int[] allNums = { 10, 25, 50 }; |
| 166 final int[] selectedNums = { 10, 25, 50 }; // No consecutive pairs. |
| 167 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 168 assertTrue(isAdjacent(result)); |
| 169 assertFalse(isConsecutive(result)); |
| 170 } |
| 171 { |
| 172 final int[] allNums = { 23, 24, 30 }; |
| 173 // This list doesn't satisfy consecutive rule. There should be "22"
on the left of "23", |
| 174 // or "25" on the right of "24", or "29" on the left of "30". |
| 175 final int[] selectedNums = { 23, 24, 30 }; |
| 176 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 177 assertTrue(isAdjacent(result)); |
| 178 assertFalse(isConsecutive(result)); |
| 179 } |
| 180 { |
| 181 final int[] allNums = { 1, 2, 3, 4, 5 }; |
| 182 final int[] selectedNums = { 1, 3, 5 }; // Two gaps. |
| 183 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 184 assertFalse(isAdjacent(result)); |
| 185 assertFalse(isConsecutive(result)); |
| 186 } |
| 187 { |
| 188 final int[] allNums = { 2, 3, 4, 5 }; |
| 189 final int[] selectedNums = { 2, 5 }; // A gap of 2 numbers. |
| 190 int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums
); |
| 191 assertFalse(isAdjacent(result)); |
| 192 assertFalse(isConsecutive(result)); |
| 193 } |
| 194 } |
| 195 |
| 196 private static boolean isPagingUrl(String url, String pagePattern) { |
| 197 return PageParameterDetector.isPagingUrl(url, pagePattern); |
| 198 } |
| 199 |
| 200 private static boolean isPagePatternValid(String url, String pagePattern) { |
| 201 ParsedUrl parsedUrl = ParsedUrl.create(url); |
| 202 assertTrue(parsedUrl != null); |
| 203 return PageParameterDetector.isPagePatternValid(parsedUrl, pagePattern); |
| 204 } |
| 205 |
| 206 private static int arePageNumsAdjacentAndConsecutive(int[] selectedNums, int
[] allNums) { |
| 207 List<PageParamInfo.PageInfo> ascendingNumbers = new ArrayList<PageParamI
nfo.PageInfo>(); |
| 208 Map<Integer, Integer> numberToPos = new HashMap<Integer, Integer>(); |
| 209 |
| 210 for (int i = 0; i < allNums.length; i++) { |
| 211 final int number = allNums[i]; |
| 212 numberToPos.put(number, i); |
| 213 ascendingNumbers.add(new PageParamInfo.PageInfo(number, "")); |
| 214 } |
| 215 |
| 216 List<PageParameterDetector.LinkInfo> allLinkInfo = |
| 217 new ArrayList<PageParameterDetector.LinkInfo>(); |
| 218 for (int i = 0; i < selectedNums.length; i++) { |
| 219 final int number = selectedNums[i]; |
| 220 allLinkInfo.add(new PageParameterDetector.LinkInfo(number, number, |
| 221 numberToPos.get(number))); |
| 222 } |
| 223 |
| 224 return PageParameterDetector.arePageNumsAdjacentAndConsecutive(allLinkIn
fo, |
| 225 ascendingNumbers); |
| 226 } |
| 227 |
| 228 private static boolean isAdjacent(int result) { |
| 229 return (result & PageParameterDetector.PAGE_NUM_ADJACENT_MASK) == |
| 230 PageParameterDetector.PAGE_NUM_ADJACENT_MASK; |
| 231 } |
| 232 |
| 233 private static boolean isConsecutive(int result) { |
| 234 return (result & PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK) == |
| 235 PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK; |
| 236 } |
| 237 |
60 } | 238 } |
OLD | NEW |