Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2748)

Unified Diff: javatests/org/chromium/distiller/PathComponentPagePatternTest.java

Issue 1029593003: implement validations of pagination URLs (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: addr chris's comments Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: javatests/org/chromium/distiller/PathComponentPagePatternTest.java
diff --git a/javatests/org/chromium/distiller/PathComponentPagePatternTest.java b/javatests/org/chromium/distiller/PathComponentPagePatternTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..f8426677ec018baf0b146c0dc2bb89322d144ac3
--- /dev/null
+++ b/javatests/org/chromium/distiller/PathComponentPagePatternTest.java
@@ -0,0 +1,151 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.distiller;
+
+import com.google.gwt.regexp.shared.MatchResult;
+import com.google.gwt.regexp.shared.RegExp;
+
+public class PathComponentPagePatternTest extends DomDistillerJsTestCase {
+ private static final String PAGE_PARAM_VALUE = "8";
+ private static final RegExp sDigitsRegExp = RegExp.compile("(\\d+)", "gi");
+
+ public void testIsPagingUrl() {
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
+ "http://www.foo.com/a/abc-[*!].html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/abc-[*!].html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a/abc-[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com/a/abc-[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com/a/b-[*!]-c-3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a-[*!]-c-3"));
+ assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.com/a-p-[*!]-c-3"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo.com/a/abc-[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/abc-[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/abc-[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/abc[*!].html"));
+
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.com/a/page/[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/page/[*!]"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
+ "http://www.foo.com/a/page/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/page/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/abc.html",
+ "http://www.foo.com/a/[*!]/abc.html"));
+ assertTrue(isPagingUrl("http://www.foo.com/a/page/2page",
+ "http://www.foo.com/a/page/[*!]page"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/page/2",
+ "http://www.foo.com/a/page/[*!]page"));
+ assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.com/a/page/[*!]"));
+ assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.com/p/page/[*!]"));
+ }
+
+ public void testIsPagePatternValid() {
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/page/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12",
+ "http://www.google.com/forum-12/page-[*!]"));
+
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
+ "http://www.google.com/forum-12/food/for/bar/[*!]"));
+ assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
+ "http://www.google.com/forum-12-food-[*!]"));
+
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
+ "http://www.google.com/forum-12/food/2012/01/[*!]"));
+ assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012/01/01",
+ "http://www.google.com/forum-12/food/2012/01/[*!]"));
+
+ assertTrue(isPagePatternValid("http://www.google.com/thread/12",
+ "http://www.google.com/thread/12/page/[*!]"));
+ assertFalse(isPagePatternValid("http://www.google.com/thread/12/foo",
+ "http://www.google.com/thread/12/page/[*!]/foo"));
+ assertTrue(isPagePatternValid("http://www.google.com/thread/12/foo",
+ "http://www.google.com/thread/12/[*!]/foo"));
+ }
+
+ public void testIsLastNumericPathComponentBad() {
+ // Path component is not numeric i.e. contains non-digits.
+ String url = "http://www.foo.com/a2";
+ int digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+
+ // Numeric path component is first.
+ url = "http://www.foo.com/2";
+ digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+
+ // Numeric path component follows a path component that is not a bad page param name.
+ url = "http://www.foo.com/good/2";
+ digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+
+ // Numeric path component follows a path component that is a bad page param name.
+ url = "http://www.foo.com/wiki/2";
+ digitStart = url.indexOf("2");
+ assertTrue(isLastNumericPathComponentBad(url, digitStart));
+
+ // (s)htm(l) extension doesn't follow digit.
+ url = "http://www.foo.com/2a";
+ digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+
+ // .htm follows digit, previous path component is not a bad page param name.
+ url = "http://www.foo.com/good/2.htm";
+ digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+
+ // .html follows digit, previous path component is a bad page param name.
+ url = "http://www.foo.com/wiki/2.html";
+ digitStart = url.indexOf("2");
+ assertTrue(isLastNumericPathComponentBad(url, digitStart));
+
+ // .shtml follows digit, previous path component is not a bad page param name, but the one
+ // before that is.
+ url = "http://www.foo.com/wiki/good/2.shtml";
+ digitStart = url.indexOf("2");
+ assertFalse(isLastNumericPathComponentBad(url, digitStart));
+ }
+
+ private static boolean isPagingUrl(String urlStr, String patternStr) {
+ ParsedUrl url = ParsedUrl.create(urlStr);
+ PageParameterDetector.PagePattern pattern = createPagePattern(patternStr);
+ assertTrue(pattern != null);
+ return pattern.isPagingUrl(urlStr);
+ }
+
+ private static boolean isPagePatternValid(String urlStr, String patternStr) {
+ ParsedUrl url = ParsedUrl.create(urlStr);
+ assertTrue(url != null);
+ PageParameterDetector.PagePattern pattern = createPagePattern(patternStr);
+ assertTrue(pattern != null);
+ return pattern.isValidFor(url);
+ }
+
+ private static boolean isLastNumericPathComponentBad(String url, int digitStart) {
+ return PathComponentPagePattern.isLastNumericPathComponentBad(url, 18, digitStart,
+ digitStart + 1);
+ }
+
+ private static PageParameterDetector.PagePattern createPagePattern(String patternStr) {
+ int pathStart = patternStr.indexOf('/');
+ int digitStart = patternStr.indexOf(PageParameterDetector.PAGE_PARAM_PLACEHOLDER);
+ sDigitsRegExp.setLastIndex(digitStart);
+ String oriUrlStr = patternStr.replace(PageParameterDetector.PAGE_PARAM_PLACEHOLDER,
+ PAGE_PARAM_VALUE);
+ MatchResult match = sDigitsRegExp.exec(oriUrlStr);
+ if (match == null) return null;
+ return PathComponentPagePattern.create(ParsedUrl.create(oriUrlStr), pathStart, digitStart,
+ sDigitsRegExp.getLastIndex());
+ }
+
+}
« no previous file with comments | « javatests/org/chromium/distiller/ParsedUrlTest.java ('k') | javatests/org/chromium/distiller/QueryParamPagePatternTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698