| Index: javatests/org/chromium/distiller/PageParameterDetectorTest.java
|
| diff --git a/javatests/org/chromium/distiller/PageParameterDetectorTest.java b/javatests/org/chromium/distiller/PageParameterDetectorTest.java
|
| index e545ff07fe2080f52d36473c908b9628cac95525..856e20e90351db01d941f2daee68c4cd2e91a52e 100644
|
| --- a/javatests/org/chromium/distiller/PageParameterDetectorTest.java
|
| +++ b/javatests/org/chromium/distiller/PageParameterDetectorTest.java
|
| @@ -4,8 +4,66 @@
|
|
|
| package org.chromium.distiller;
|
|
|
| +import java.util.ArrayList;
|
| +import java.util.HashMap;
|
| +import java.util.List;
|
| +import java.util.Map;
|
| +
|
| public class PageParameterDetectorTest extends DomDistillerJsTestCase {
|
|
|
| + public void testIsPagingUrl() {
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryB=4&queryC=v3",
|
| + "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
|
| + "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b?queryB=2&queryC=v3",
|
| + "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b?queryC=v3",
|
| + "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b", "http://www.foo.com/a/b?page=[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b?page=3",
|
| + "http://www.foo.com/a/b?page=[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b/", "http://www.foo.com/a/b?page=[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b.htm", "http://www.foo.com/a/b?page=[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b.html", "http://www.foo.com/a/b?page=[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
|
| + "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/b?queryB=bar&queryC=v3",
|
| + "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/b?queryC=v3&queryB=3",
|
| + "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1",
|
| + "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
|
| +
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
|
| + "http://www.foo.com/a/abc-[*!].html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/abc-[*!].html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a/abc-[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com/a/abc-[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com/a/b-[*!]-c-3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a-[*!]-c-3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.com/a-p-[*!]-c-3"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo.com/a/abc-[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/abc-[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/abc-[*!]"));
|
| +
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.com/a/page/[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/page/[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
|
| + "http://www.foo.com/a/page/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/page/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.com/a/page/[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.com/p/page/[*!]"));
|
| + }
|
| +
|
| public void testIsLastNumericPathComponentBad() {
|
| // Path component is not numeric i.e. contains non-digits.
|
| String url = "http://www.foo.com/a2";
|
| @@ -57,4 +115,124 @@ public class PageParameterDetectorTest extends DomDistillerJsTestCase {
|
| digitStart + 1));
|
| }
|
|
|
| + public void testIsPagePatternValid() {
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/page/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/page-[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12?page=[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12?sid=12345",
|
| + "http://www.google.com/forum-12?page=[*!]&sort=d"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
|
| + "http://www.google.com/forum-12/food/for/bar/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
|
| + "http://www.google.com/forum-12-food-[*!]"));
|
| +
|
| + assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
|
| + "http://www.google.com/forum-12/food/2012/01/[*!]"));
|
| + assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012/01/01",
|
| + "http://www.google.com/forum-12/food/2012/01/[*!]"));
|
| + assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
|
| + "http://www.google.com/forum-12?page=[*!]"));
|
| + }
|
| +
|
| + public void testArePageNumsAdjacentAndConsecutive() {
|
| + {
|
| + final int[] allNums = { 1, 2 };
|
| + final int[] selectedNums = { 1, 2 };
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertTrue(isAdjacent(result));
|
| + assertTrue(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 1, 2, 3 };
|
| + final int[] selectedNums = { 2, 3 };
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertTrue(isAdjacent(result));
|
| + assertTrue(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 1, 5, 6, 7, 10 };
|
| + final int[] selectedNums = { 1, 5, 7, 10 };
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertTrue(isAdjacent(result));
|
| + assertTrue(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 10, 25, 50 };
|
| + final int[] selectedNums = { 10, 25, 50 }; // No consecutive pairs.
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertTrue(isAdjacent(result));
|
| + assertFalse(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 23, 24, 30 };
|
| + // This list doesn't satisfy consecutive rule. There should be "22" on the left of "23",
|
| + // or "25" on the right of "24", or "29" on the left of "30".
|
| + final int[] selectedNums = { 23, 24, 30 };
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertTrue(isAdjacent(result));
|
| + assertFalse(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 1, 2, 3, 4, 5 };
|
| + final int[] selectedNums = { 1, 3, 5 }; // Two gaps.
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertFalse(isAdjacent(result));
|
| + assertFalse(isConsecutive(result));
|
| + }
|
| + {
|
| + final int[] allNums = { 2, 3, 4, 5 };
|
| + final int[] selectedNums = { 2, 5 }; // A gap of 2 numbers.
|
| + int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
|
| + assertFalse(isAdjacent(result));
|
| + assertFalse(isConsecutive(result));
|
| + }
|
| + }
|
| +
|
| + private static boolean isPagingUrl(String url, String pagePattern) {
|
| + return PageParameterDetector.isPagingUrl(url, pagePattern);
|
| + }
|
| +
|
| + private static boolean isPagePatternValid(String url, String pagePattern) {
|
| + ParsedUrl parsedUrl = ParsedUrl.create(url);
|
| + assertTrue(parsedUrl != null);
|
| + return PageParameterDetector.isPagePatternValid(parsedUrl, pagePattern);
|
| + }
|
| +
|
| + private static int arePageNumsAdjacentAndConsecutive(int[] selectedNums, int[] allNums) {
|
| + List<PageParamInfo.PageInfo> ascendingNumbers = new ArrayList<PageParamInfo.PageInfo>();
|
| + Map<Integer, Integer> numberToPos = new HashMap<Integer, Integer>();
|
| +
|
| + for (int i = 0; i < allNums.length; i++) {
|
| + final int number = allNums[i];
|
| + numberToPos.put(number, i);
|
| + ascendingNumbers.add(new PageParamInfo.PageInfo(number, ""));
|
| + }
|
| +
|
| + List<PageParameterDetector.LinkInfo> allLinkInfo =
|
| + new ArrayList<PageParameterDetector.LinkInfo>();
|
| + for (int i = 0; i < selectedNums.length; i++) {
|
| + final int number = selectedNums[i];
|
| + allLinkInfo.add(new PageParameterDetector.LinkInfo(number, number,
|
| + numberToPos.get(number)));
|
| + }
|
| +
|
| + return PageParameterDetector.arePageNumsAdjacentAndConsecutive(allLinkInfo,
|
| + ascendingNumbers);
|
| + }
|
| +
|
| + private static boolean isAdjacent(int result) {
|
| + return (result & PageParameterDetector.PAGE_NUM_ADJACENT_MASK) ==
|
| + PageParameterDetector.PAGE_NUM_ADJACENT_MASK;
|
| + }
|
| +
|
| + private static boolean isConsecutive(int result) {
|
| + return (result & PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK) ==
|
| + PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK;
|
| + }
|
| +
|
| }
|
|
|