| Index: javatests/org/chromium/distiller/PathComponentPagePatternTest.java
|
| diff --git a/javatests/org/chromium/distiller/PathComponentPagePatternTest.java b/javatests/org/chromium/distiller/PathComponentPagePatternTest.java
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f8426677ec018baf0b146c0dc2bb89322d144ac3
|
| --- /dev/null
|
| +++ b/javatests/org/chromium/distiller/PathComponentPagePatternTest.java
|
| @@ -0,0 +1,151 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +package org.chromium.distiller;
|
| +
|
| +import com.google.gwt.regexp.shared.MatchResult;
|
| +import com.google.gwt.regexp.shared.RegExp;
|
| +
|
| +public class PathComponentPagePatternTest extends DomDistillerJsTestCase {
|
| + private static final String PAGE_PARAM_VALUE = "8";
|
| + private static final RegExp sDigitsRegExp = RegExp.compile("(\\d+)", "gi");
|
| +
|
| + public void testIsPagingUrl() {
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
|
| + "http://www.foo.com/a/abc-[*!].html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/abc-[*!].html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a/abc-[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com/a/abc-[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com/a/b-[*!]-c-3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a-[*!]-c-3"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.com/a-p-[*!]-c-3"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo.com/a/abc-[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/abc-[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/abc-[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/abc[*!].html"));
|
| +
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.com/a/page/[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/page/[*!]"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
|
| + "http://www.foo.com/a/page/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/page/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/abc.html",
|
| + "http://www.foo.com/a/[*!]/abc.html"));
|
| + assertTrue(isPagingUrl("http://www.foo.com/a/page/2page",
|
| + "http://www.foo.com/a/page/[*!]page"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/page/2",
|
| + "http://www.foo.com/a/page/[*!]page"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.com/a/page/[*!]"));
|
| + assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.com/p/page/[*!]"));
|
| + }
|
| +
|
| + public void testIsPagePatternValid() {
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/page/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12",
|
| + "http://www.google.com/forum-12/page-[*!]"));
|
| +
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
|
| + "http://www.google.com/forum-12/food/for/bar/[*!]"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
|
| + "http://www.google.com/forum-12-food-[*!]"));
|
| +
|
| + assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
|
| + "http://www.google.com/forum-12/food/2012/01/[*!]"));
|
| + assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012/01/01",
|
| + "http://www.google.com/forum-12/food/2012/01/[*!]"));
|
| +
|
| + assertTrue(isPagePatternValid("http://www.google.com/thread/12",
|
| + "http://www.google.com/thread/12/page/[*!]"));
|
| + assertFalse(isPagePatternValid("http://www.google.com/thread/12/foo",
|
| + "http://www.google.com/thread/12/page/[*!]/foo"));
|
| + assertTrue(isPagePatternValid("http://www.google.com/thread/12/foo",
|
| + "http://www.google.com/thread/12/[*!]/foo"));
|
| + }
|
| +
|
| + public void testIsLastNumericPathComponentBad() {
|
| + // Path component is not numeric i.e. contains non-digits.
|
| + String url = "http://www.foo.com/a2";
|
| + int digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // Numeric path component is first.
|
| + url = "http://www.foo.com/2";
|
| + digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // Numeric path component follows a path component that is not a bad page param name.
|
| + url = "http://www.foo.com/good/2";
|
| + digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // Numeric path component follows a path component that is a bad page param name.
|
| + url = "http://www.foo.com/wiki/2";
|
| + digitStart = url.indexOf("2");
|
| + assertTrue(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // (s)htm(l) extension doesn't follow digit.
|
| + url = "http://www.foo.com/2a";
|
| + digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // .htm follows digit, previous path component is not a bad page param name.
|
| + url = "http://www.foo.com/good/2.htm";
|
| + digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // .html follows digit, previous path component is a bad page param name.
|
| + url = "http://www.foo.com/wiki/2.html";
|
| + digitStart = url.indexOf("2");
|
| + assertTrue(isLastNumericPathComponentBad(url, digitStart));
|
| +
|
| + // .shtml follows digit, previous path component is not a bad page param name, but the one
|
| + // before that is.
|
| + url = "http://www.foo.com/wiki/good/2.shtml";
|
| + digitStart = url.indexOf("2");
|
| + assertFalse(isLastNumericPathComponentBad(url, digitStart));
|
| + }
|
| +
|
| + private static boolean isPagingUrl(String urlStr, String patternStr) {
|
| + ParsedUrl url = ParsedUrl.create(urlStr);
|
| + PageParameterDetector.PagePattern pattern = createPagePattern(patternStr);
|
| + assertTrue(pattern != null);
|
| + return pattern.isPagingUrl(urlStr);
|
| + }
|
| +
|
| + private static boolean isPagePatternValid(String urlStr, String patternStr) {
|
| + ParsedUrl url = ParsedUrl.create(urlStr);
|
| + assertTrue(url != null);
|
| + PageParameterDetector.PagePattern pattern = createPagePattern(patternStr);
|
| + assertTrue(pattern != null);
|
| + return pattern.isValidFor(url);
|
| + }
|
| +
|
| + private static boolean isLastNumericPathComponentBad(String url, int digitStart) {
|
| + return PathComponentPagePattern.isLastNumericPathComponentBad(url, 18, digitStart,
|
| + digitStart + 1);
|
| + }
|
| +
|
| + private static PageParameterDetector.PagePattern createPagePattern(String patternStr) {
|
| + int pathStart = patternStr.indexOf('/');
|
| + int digitStart = patternStr.indexOf(PageParameterDetector.PAGE_PARAM_PLACEHOLDER);
|
| + sDigitsRegExp.setLastIndex(digitStart);
|
| + String oriUrlStr = patternStr.replace(PageParameterDetector.PAGE_PARAM_PLACEHOLDER,
|
| + PAGE_PARAM_VALUE);
|
| + MatchResult match = sDigitsRegExp.exec(oriUrlStr);
|
| + if (match == null) return null;
|
| + return PathComponentPagePattern.create(ParsedUrl.create(oriUrlStr), pathStart, digitStart,
|
| + sDigitsRegExp.getLastIndex());
|
| + }
|
| +
|
| +}
|
|
|