Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: javatests/org/chromium/distiller/PageParameterDetectorTest.java

Issue 1029593003: implement validations of pagination URLs (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: rename test Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: javatests/org/chromium/distiller/PageParameterDetectorTest.java
diff --git a/javatests/org/chromium/distiller/PageParameterDetectorTest.java b/javatests/org/chromium/distiller/PageParameterDetectorTest.java
index e545ff07fe2080f52d36473c908b9628cac95525..856e20e90351db01d941f2daee68c4cd2e91a52e 100644
--- a/javatests/org/chromium/distiller/PageParameterDetectorTest.java
+++ b/javatests/org/chromium/distiller/PageParameterDetectorTest.java
@@ -4,8 +4,66 @@
package org.chromium.distiller;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
public class PageParameterDetectorTest extends DomDistillerJsTestCase {
+ public void testIsPagingUrl() {
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryB=4&queryC=v3",
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryB=2&queryC=v3",
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?queryC=v3",
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b", "http://www.foo.com/a/b?page=[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b?page=3",
+ "http://www.foo.com/a/b?page=[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b/", "http://www.foo.com/a/b?page=[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b.htm", "http://www.foo.com/a/b?page=[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b.html", "http://www.foo.com/a/b?page=[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1&queryC=v3",
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryB=bar&queryC=v3",
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryC=v3&queryB=3",
+ "http://www.foo.com/a/b?queryB=[*!]&queryC=v3"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/b?queryA=v1",
+ "http://www.foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3"));
+
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
+ "http://www.foo.com/a/abc-[*!].html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/abc-[*!].html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a/abc-[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com/a/abc-[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com/a/b-[*!]-c-3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a-[*!]-c-3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.com/a-p-[*!]-c-3"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo.com/a/abc-[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/abc-[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/abc-[*!]"));
+
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.com/a/page/[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/page/[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
+ "http://www.foo.com/a/page/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/page/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.com/a/page/[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.com/p/page/[*!]"));
+ }
+
public void testIsLastNumericPathComponentBad() {
// Path component is not numeric i.e. contains non-digits.
String url = "http://www.foo.com/a2";
@@ -57,4 +115,124 @@ public class PageParameterDetectorTest extends DomDistillerJsTestCase {
digitStart + 1));
}
+ public void testIsPagePatternValid() {
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/page/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/page-[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12?page=[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12?sid=12345",
+ "http://www.google.com/forum-12?page=[*!]&sort=d"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
+ "http://www.google.com/forum-12/food/for/bar/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
+ "http://www.google.com/forum-12-food-[*!]"));
+
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
+ "http://www.google.com/forum-12/food/2012/01/[*!]"));
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012/01/01",
+ "http://www.google.com/forum-12/food/2012/01/[*!]"));
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
+ "http://www.google.com/forum-12?page=[*!]"));
+ }
+
+ public void testArePageNumsAdjacentAndConsecutive() {
+ {
+ final int[] allNums = { 1, 2 };
+ final int[] selectedNums = { 1, 2 };
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertTrue(isAdjacent(result));
+ assertTrue(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 1, 2, 3 };
+ final int[] selectedNums = { 2, 3 };
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertTrue(isAdjacent(result));
+ assertTrue(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 1, 5, 6, 7, 10 };
+ final int[] selectedNums = { 1, 5, 7, 10 };
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertTrue(isAdjacent(result));
+ assertTrue(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 10, 25, 50 };
+ final int[] selectedNums = { 10, 25, 50 }; // No consecutive pairs.
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertTrue(isAdjacent(result));
+ assertFalse(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 23, 24, 30 };
+ // This list doesn't satisfy consecutive rule. There should be "22" on the left of "23",
+ // or "25" on the right of "24", or "29" on the left of "30".
+ final int[] selectedNums = { 23, 24, 30 };
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertTrue(isAdjacent(result));
+ assertFalse(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 1, 2, 3, 4, 5 };
+ final int[] selectedNums = { 1, 3, 5 }; // Two gaps.
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertFalse(isAdjacent(result));
+ assertFalse(isConsecutive(result));
+ }
+ {
+ final int[] allNums = { 2, 3, 4, 5 };
+ final int[] selectedNums = { 2, 5 }; // A gap of 2 numbers.
+ int result = arePageNumsAdjacentAndConsecutive(selectedNums, allNums);
+ assertFalse(isAdjacent(result));
+ assertFalse(isConsecutive(result));
+ }
+ }
+
+ private static boolean isPagingUrl(String url, String pagePattern) {
+ return PageParameterDetector.isPagingUrl(url, pagePattern);
+ }
+
+ private static boolean isPagePatternValid(String url, String pagePattern) {
+ ParsedUrl parsedUrl = ParsedUrl.create(url);
+ assertTrue(parsedUrl != null);
+ return PageParameterDetector.isPagePatternValid(parsedUrl, pagePattern);
+ }
+
+ private static int arePageNumsAdjacentAndConsecutive(int[] selectedNums, int[] allNums) {
+ List<PageParamInfo.PageInfo> ascendingNumbers = new ArrayList<PageParamInfo.PageInfo>();
+ Map<Integer, Integer> numberToPos = new HashMap<Integer, Integer>();
+
+ for (int i = 0; i < allNums.length; i++) {
+ final int number = allNums[i];
+ numberToPos.put(number, i);
+ ascendingNumbers.add(new PageParamInfo.PageInfo(number, ""));
+ }
+
+ List<PageParameterDetector.LinkInfo> allLinkInfo =
+ new ArrayList<PageParameterDetector.LinkInfo>();
+ for (int i = 0; i < selectedNums.length; i++) {
+ final int number = selectedNums[i];
+ allLinkInfo.add(new PageParameterDetector.LinkInfo(number, number,
+ numberToPos.get(number)));
+ }
+
+ return PageParameterDetector.arePageNumsAdjacentAndConsecutive(allLinkInfo,
+ ascendingNumbers);
+ }
+
+ private static boolean isAdjacent(int result) {
+ return (result & PageParameterDetector.PAGE_NUM_ADJACENT_MASK) ==
+ PageParameterDetector.PAGE_NUM_ADJACENT_MASK;
+ }
+
+ private static boolean isConsecutive(int result) {
+ return (result & PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK) ==
+ PageParameterDetector.PAGE_NUM_CONSECUTIVE_MASK;
+ }
+
}

Powered by Google App Engine
This is Rietveld 408576698