Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(562)

Side by Side Diff: javatests/org/chromium/distiller/PathComponentPagePatternTest.java

Issue 1029593003: implement validations of pagination URLs (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: addr chris's comments Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package org.chromium.distiller;
6
7 import com.google.gwt.regexp.shared.MatchResult;
8 import com.google.gwt.regexp.shared.RegExp;
9
10 public class PathComponentPagePatternTest extends DomDistillerJsTestCase {
11 private static final String PAGE_PARAM_VALUE = "8";
12 private static final RegExp sDigitsRegExp = RegExp.compile("(\\d+)", "gi");
13
14 public void testIsPagingUrl() {
15 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
16 "http://www.foo.com/a/abc-[*!].html"));
17 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
18 "http://www.foo.com/a/abc-[*!].html"));
19 assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a /abc-[*!]"));
20 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com /a/abc-[*!]"));
21 assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com /a/b-[*!]-c-3"));
22 assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a -[*!]-c-3"));
23 assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.c om/a-p-[*!]-c-3"));
24 assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo .com/a/abc-[*!]"));
25 assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/ abc-[*!]"));
26 assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/ab c-[*!]"));
27 assertFalse(isPagingUrl("http://www.foo.com/a/abc.html",
28 "http://www.foo.com/a/abc[*!].html"));
29
30 assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.co m/a/page/[*!]"));
31 assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/pag e/[*!]"));
32 assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
33 "http://www.foo.com/a/page/[*!]/abc.html"));
34 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
35 "http://www.foo.com/a/page/[*!]/abc.html"));
36 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
37 "http://www.foo.com/a/[*!]/abc.html"));
38 assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
39 "http://www.foo.com/a/[*!]/abc.html"));
40 assertTrue(isPagingUrl("http://www.foo.com/abc.html",
41 "http://www.foo.com/a/[*!]/abc.html"));
42 assertTrue(isPagingUrl("http://www.foo.com/a/page/2page",
43 "http://www.foo.com/a/page/[*!]page"));
44 assertFalse(isPagingUrl("http://www.foo.com/a/page/2",
45 "http://www.foo.com/a/page/[*!]page"));
46 assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.c om/a/page/[*!]"));
47 assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.c om/p/page/[*!]"));
48 }
49
50 public void testIsPagePatternValid() {
51 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
52 "http://www.google.com/forum-12/page/[*!]"));
53 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
54 "http://www.google.com/forum-12/[*!]"));
55 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
56 "http://www.google.com/forum-12/page-[*!]"));
57
58 assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
59 "http://www.google.com/forum-12/food/for/bar/[*!]"));
60 assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
61 "http://www.google.com/forum-12-food-[*!]"));
62
63 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
64 "http://www.google.com/forum-12/food/2012/01/[*!]"));
65 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012 /01/01",
66 "http://www.google.com/forum-12/food/2012/01/[*!]"));
67
68 assertTrue(isPagePatternValid("http://www.google.com/thread/12",
69 "http://www.google.com/thread/12/page/[*!]"));
70 assertFalse(isPagePatternValid("http://www.google.com/thread/12/foo",
71 "http://www.google.com/thread/12/page/[*!]/foo"));
72 assertTrue(isPagePatternValid("http://www.google.com/thread/12/foo",
73 "http://www.google.com/thread/12/[*!]/foo"));
74 }
75
76 public void testIsLastNumericPathComponentBad() {
77 // Path component is not numeric i.e. contains non-digits.
78 String url = "http://www.foo.com/a2";
79 int digitStart = url.indexOf("2");
80 assertFalse(isLastNumericPathComponentBad(url, digitStart));
81
82 // Numeric path component is first.
83 url = "http://www.foo.com/2";
84 digitStart = url.indexOf("2");
85 assertFalse(isLastNumericPathComponentBad(url, digitStart));
86
87 // Numeric path component follows a path component that is not a bad pag e param name.
88 url = "http://www.foo.com/good/2";
89 digitStart = url.indexOf("2");
90 assertFalse(isLastNumericPathComponentBad(url, digitStart));
91
92 // Numeric path component follows a path component that is a bad page pa ram name.
93 url = "http://www.foo.com/wiki/2";
94 digitStart = url.indexOf("2");
95 assertTrue(isLastNumericPathComponentBad(url, digitStart));
96
97 // (s)htm(l) extension doesn't follow digit.
98 url = "http://www.foo.com/2a";
99 digitStart = url.indexOf("2");
100 assertFalse(isLastNumericPathComponentBad(url, digitStart));
101
102 // .htm follows digit, previous path component is not a bad page param n ame.
103 url = "http://www.foo.com/good/2.htm";
104 digitStart = url.indexOf("2");
105 assertFalse(isLastNumericPathComponentBad(url, digitStart));
106
107 // .html follows digit, previous path component is a bad page param name .
108 url = "http://www.foo.com/wiki/2.html";
109 digitStart = url.indexOf("2");
110 assertTrue(isLastNumericPathComponentBad(url, digitStart));
111
112 // .shtml follows digit, previous path component is not a bad page param name, but the one
113 // before that is.
114 url = "http://www.foo.com/wiki/good/2.shtml";
115 digitStart = url.indexOf("2");
116 assertFalse(isLastNumericPathComponentBad(url, digitStart));
117 }
118
119 private static boolean isPagingUrl(String urlStr, String patternStr) {
120 ParsedUrl url = ParsedUrl.create(urlStr);
121 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr );
122 assertTrue(pattern != null);
123 return pattern.isPagingUrl(urlStr);
124 }
125
126 private static boolean isPagePatternValid(String urlStr, String patternStr) {
127 ParsedUrl url = ParsedUrl.create(urlStr);
128 assertTrue(url != null);
129 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr );
130 assertTrue(pattern != null);
131 return pattern.isValidFor(url);
132 }
133
134 private static boolean isLastNumericPathComponentBad(String url, int digitSt art) {
135 return PathComponentPagePattern.isLastNumericPathComponentBad(url, 18, d igitStart,
136 digitStart + 1);
137 }
138
139 private static PageParameterDetector.PagePattern createPagePattern(String pa tternStr) {
140 int pathStart = patternStr.indexOf('/');
141 int digitStart = patternStr.indexOf(PageParameterDetector.PAGE_PARAM_PLA CEHOLDER);
142 sDigitsRegExp.setLastIndex(digitStart);
143 String oriUrlStr = patternStr.replace(PageParameterDetector.PAGE_PARAM_P LACEHOLDER,
144 PAGE_PARAM_VALUE);
145 MatchResult match = sDigitsRegExp.exec(oriUrlStr);
146 if (match == null) return null;
147 return PathComponentPagePattern.create(ParsedUrl.create(oriUrlStr), path Start, digitStart,
148 sDigitsRegExp.getLastIndex());
149 }
150
151 }
OLDNEW
« no previous file with comments | « javatests/org/chromium/distiller/ParsedUrlTest.java ('k') | javatests/org/chromium/distiller/QueryParamPagePatternTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698