Index: javatests/org/chromium/distiller/PageParameterDetectorTest.java |
diff --git a/javatests/org/chromium/distiller/PageParameterDetectorTest.java b/javatests/org/chromium/distiller/PageParameterDetectorTest.java |
index e545ff07fe2080f52d36473c908b9628cac95525..856e20e90351db01d941f2daee68c4cd2e91a52e 100644 |
--- a/javatests/org/chromium/distiller/PageParameterDetectorTest.java |
+++ b/javatests/org/chromium/distiller/PageParameterDetectorTest.java |
@@ -4,8 +4,66 @@ |
package org.chromium.distiller; |
+import java.util.ArrayList; |
+import java.util.HashMap; |
+import java.util.List; |
+import java.util.Map; |
+ |
public class PageParameterDetectorTest extends DomDistillerJsTestCase { |
+ public void testIsPagingUrl() { |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryB=4&queryC=v3", |
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3", |
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryB=2&queryC=v3", |
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryC=v3", |
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b", "http://www.foo.com/a/b?page=[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?page=3", |
+ "http://www.foo.com/a/b?page=[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b/", "http://www.foo.com/a/b?page=[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b.htm", "http://www.foo.com/a/b?page=[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b.html", "http://www.foo.com/a/b?page=[*!]")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3", |
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryB=bar&queryC=v3", |
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryC=v3&queryB=3", |
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1", |
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3")); |
+ |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html", |
+ "http://www.foo.com/a/abc-[*!].html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
+ "http://www.foo.com/a/abc-[*!].html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a/abc-[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com/a/abc-[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com/a/b-[*!]-c-3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a-[*!]-c-3")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.com/a-p-[*!]-c-3")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo.com/a/abc-[*!]")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/abc-[*!]")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/abc-[*!]")); |
+ |
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.com/a/page/[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/page/[*!]")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html", |
+ "http://www.foo.com/a/page/[*!]/abc.html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
+ "http://www.foo.com/a/page/[*!]/abc.html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
+ "http://www.foo.com/a/[*!]/abc.html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html", |
+ "http://www.foo.com/a/[*!]/abc.html")); |
+ assertTrue(isPagingUrl("http://www.foo.com/abc.html", |
+ "http://www.foo.com/a/[*!]/abc.html")); |
+ assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.com/a/page/[*!]")); |
+ assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.com/p/page/[*!]")); |
+ } |
+ |
public void testIsLastNumericPathComponentBad() { |
// Path component is not numeric i.e. contains non-digits. |
String url = "http://www.foo.com/a2"; |
@@ -57,4 +115,124 @@ public class PageParameterDetectorTest extends DomDistillerJsTestCase { |
digitStart + 1)); |
} |
+ public void testIsPagePatternValid() { |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
+ "http://www.google.com/forum-12/page/[*!]")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
+ "http://www.google.com/forum-12/[*!]")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
+ "http://www.google.com/forum-12/page-[*!]")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
+ "http://www.google.com/forum-12?page=[*!]")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12?sid=12345", |
+ "http://www.google.com/forum-12?page=[*!]&sort=d")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12/food", |
+ "http://www.google.com/forum-12/food/for/bar/[*!]")); |
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12-food", |
+ "http://www.google.com/forum-12-food-[*!]")); |
+ |
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food", |
+ "http://www.google.com/forum-12/food/2012/01/[*!]")); |
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012/01/01", |
+ "http://www.google.com/forum-12/food/2012/01/[*!]")); |
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food", |
+ "http://www.google.com/forum-12?page=[*!]")); |
+ } |
+ |
+ public void testArePageNumsAdjacentAndConsecutive() { |
+ { |
+ final int[] allNums = { 1, 2 }; |
+ final int[] selectedNums = { 1, 2 }; |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertTrue(isAdjacent(result)); |
+ assertTrue(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 1, 2, 3 }; |
+ final int[] selectedNums = { 2, 3 }; |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertTrue(isAdjacent(result)); |
+ assertTrue(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 1, 5, 6, 7, 10 }; |
+ final int[] selectedNums = { 1, 5, 7, 10 }; |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertTrue(isAdjacent(result)); |
+ assertTrue(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 10, 25, 50 }; |
+ final int[] selectedNums = { 10, 25, 50 }; // No consecutive pairs. |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertTrue(isAdjacent(result)); |
+ assertFalse(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 23, 24, 30 }; |
+ // This list doesn't satisfy consecutive rule. There should be "22" on the left of "23", |
+ // or "25" on the right of "24", or "29" on the left of "30". |
+ final int[] selectedNums = { 23, 24, 30 }; |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertTrue(isAdjacent(result)); |
+ assertFalse(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 1, 2, 3, 4, 5 }; |
+ final int[] selectedNums = { 1, 3, 5 }; // Two gaps. |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertFalse(isAdjacent(result)); |
+ assertFalse(isConsecutive(result)); |
+ } |
+ { |
+ final int[] allNums = { 2, 3, 4, 5 }; |
+ final int[] selectedNums = { 2, 5 }; // A gap of 2 numbers. |
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums); |
+ assertFalse(isAdjacent(result)); |
+ assertFalse(isConsecutive(result)); |
+ } |
+ } |
+ |
+ private static boolean isPagingUrl(String url, String pagePattern) { |
+ return PageParameterDetector.isPagingUrl(url, pagePattern); |
+ } |
+ |
+ private static boolean isPagePatternValid(String url, String pagePattern) { |
+ ParsedUrl parsedUrl = ParsedUrl.create(url); |
+ assertTrue(parsedUrl != null); |
+ return PageParameterDetector.isPagePatternValid(parsedUrl, pagePattern); |
+ } |
+ |
+ private static int arePageNumsAdjacentAndConsecutive(int[] selectedNums, int[] allNums) { |
+ List<PageParamInfo.PageInfo> ascendingNumbers = new ArrayList<PageParamInfo.PageInfo>(); |
+ Map<Integer, Integer> numberToPos = new HashMap<Integer, Integer>(); |
+ |
+ for (int i = 0; i < allNums.length; i++) { |
+ final int number = allNums[i]; |
+ numberToPos.put(number, i); |
+ ascendingNumbers.add(new PageParamInfo.PageInfo(number, "")); |
+ } |
+ |
+ List<PageParameterDetector.LinkInfo> allLinkInfo = |
+ new ArrayList<PageParameterDetector.LinkInfo>(); |
+ for (int i = 0; i < selectedNums.length; i++) { |
+ final int number = selectedNums[i]; |
+ allLinkInfo.add(new PageParameterDetector.LinkInfo(number, number, |
+ numberToPos.get(number))); |
+ } |
+ |
+ return PageParameterDetector.arePageNumsAdjacentAndConsecutive(allLinkInfo, |
+ ascendingNumbers); |
+ } |
+ |
+ private static boolean isAdjacent(int result) { |
+ return (result & PageParameterDetector.PAGE_NUM_ADJACENT_MASK) == |
+ PageParameterDetector.PAGE_NUM_ADJACENT_MASK; |
+ } |
+ |
+ private static boolean isConsecutive(int result) { |
+ return (result & PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK) == |
+ PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK; |
+ } |
+ |
} |