Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(716)

Side by Side Diff: javatests/org/chromium/distiller/PathComponentPagePatternTest.java

Issue 1029593003: implement validations of pagination URLs (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: copyright 2016 -> 2015 Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package org.chromium.distiller;
6
7 import com.google.gwt.regexp.shared.MatchResult;
8 import com.google.gwt.regexp.shared.RegExp;
9
10 public class PathComponentPagePatternTest extends DomDistillerJsTestCase {
11 private static final String PAGE_PARAM_VALUE = "8";
12 private static final RegExp sDigitsRegExp = RegExp.compile("(\\d+)", "gi");
13
14 public void testIsPagingUrl() {
15 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html",
16 "http://www.foo.com/a/abc-[*!].html"));
17 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
18 "http://www.foo.com/a/abc-[*!].html"));
19 assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a /abc-[*!]"));
20 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com /a/abc-[*!]"));
21 assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com /a/b-[*!]-c-3"));
22 assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a -[*!]-c-3"));
23 assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.c om/a-p-[*!]-c-3"));
24 assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo .com/a/abc-[*!]"));
25 assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/ abc-[*!]"));
26 assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/ab c-[*!]"));
27 assertFalse(isPagingUrl("http://www.foo.com/a/abc.html",
28 "http://www.foo.com/a/abc[*!].html"));
29
30 assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.co m/a/page/[*!]"));
31 assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/pag e/[*!]"));
32 assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html",
33 "http://www.foo.com/a/page/[*!]/abc.html"));
34 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
35 "http://www.foo.com/a/page/[*!]/abc.html"));
36 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html",
37 "http://www.foo.com/a/[*!]/abc.html"));
38 assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html",
39 "http://www.foo.com/a/[*!]/abc.html"));
40 assertTrue(isPagingUrl("http://www.foo.com/abc.html",
41 "http://www.foo.com/a/[*!]/abc.html"));
42 assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.c om/a/page/[*!]"));
43 assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.c om/p/page/[*!]"));
44 }
45
46 public void testIsPagePatternValid() {
47 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
48 "http://www.google.com/forum-12/page/[*!]"));
49 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
50 "http://www.google.com/forum-12/[*!]"));
51 assertTrue(isPagePatternValid("http://www.google.com/forum-12",
52 "http://www.google.com/forum-12/page-[*!]"));
53
54 assertTrue(isPagePatternValid("http://www.google.com/forum-12/food",
55 "http://www.google.com/forum-12/food/for/bar/[*!]"));
56 assertTrue(isPagePatternValid("http://www.google.com/forum-12-food",
57 "http://www.google.com/forum-12-food-[*!]"));
58
59 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food",
60 "http://www.google.com/forum-12/food/2012/01/[*!]"));
61 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012 /01/01",
62 "http://www.google.com/forum-12/food/2012/01/[*!]"));
63
64 assertTrue(isPagePatternValid("http://www.google.com/thread/12",
65 "http://www.google.com/thread/12/page/[*!]"));
66 assertFalse(isPagePatternValid("http://www.google.com/thread/12/foo",
67 "http://www.google.com/thread/12/page/[*!]/foo"));
68 assertTrue(isPagePatternValid("http://www.google.com/thread/12/foo",
69 "http://www.google.com/thread/12/[*!]/foo"));
70 }
71
72 public void testIsLastNumericPathComponentBad() {
73 // Path component is not numeric i.e. contains non-digits.
74 String url = "http://www.foo.com/a2";
75 int digitStart = url.indexOf("2");
76 assertFalse(isLastNumericPathComponentBad(url, digitStart));
77
78 // Numeric path component is first.
79 url = "http://www.foo.com/2";
80 digitStart = url.indexOf("2");
81 assertFalse(isLastNumericPathComponentBad(url, digitStart));
82
83 // Numeric path component follows a path component that is not a bad pag e param name.
84 url = "http://www.foo.com/good/2";
85 digitStart = url.indexOf("2");
86 assertFalse(isLastNumericPathComponentBad(url, digitStart));
87
88 // Numeric path component follows a path component that is a bad page pa ram name.
89 url = "http://www.foo.com/wiki/2";
90 digitStart = url.indexOf("2");
91 assertTrue(isLastNumericPathComponentBad(url, digitStart));
92
93 // (s)htm(l) extension doesn't follow digit.
94 url = "http://www.foo.com/2a";
95 digitStart = url.indexOf("2");
96 assertFalse(isLastNumericPathComponentBad(url, digitStart));
97
98 // .htm follows digit, previous path component is not a bad page param n ame.
99 url = "http://www.foo.com/good/2.htm";
100 digitStart = url.indexOf("2");
101 assertFalse(isLastNumericPathComponentBad(url, digitStart));
102
103 // .html follows digit, previous path component is a bad page param name .
104 url = "http://www.foo.com/wiki/2.html";
105 digitStart = url.indexOf("2");
106 assertTrue(isLastNumericPathComponentBad(url, digitStart));
107
108 // .shtml follows digit, previous path component is not a bad page param name, but the one
109 // before that is.
110 url = "http://www.foo.com/wiki/good/2.shtml";
111 digitStart = url.indexOf("2");
112 assertFalse(isLastNumericPathComponentBad(url, digitStart));
113 }
114
115 private static boolean isPagingUrl(String urlStr, String patternStr) {
116 ParsedUrl url = ParsedUrl.create(urlStr);
117 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr );
118 assertTrue(pattern != null);
119 return pattern.isPagingUrl(urlStr);
120 }
121
122 private static boolean isPagePatternValid(String urlStr, String patternStr) {
123 ParsedUrl url = ParsedUrl.create(urlStr);
124 assertTrue(url != null);
125 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr );
126 assertTrue(pattern != null);
127 return pattern.isValidFor(url);
128 }
129
130 private static boolean isLastNumericPathComponentBad(String url, int digitSt art) {
131 return PathComponentPagePattern.isLastNumericPathComponentBad(url, 18, d igitStart,
132 digitStart + 1);
133 }
134
135 private static PageParameterDetector.PagePattern createPagePattern(String pa tternStr) {
136 int pathStart = patternStr.indexOf('/');
137 int digitStart = patternStr.indexOf(PageParameterDetector.PAGE_PARAM_PLA CEHOLDER);
138 sDigitsRegExp.setLastIndex(digitStart);
139 String oriUrlStr = patternStr.replace(PageParameterDetector.PAGE_PARAM_P LACEHOLDER,
140 PAGE_PARAM_VALUE);
141 MatchResult match = sDigitsRegExp.exec(oriUrlStr);
142 if (match == null) return null;
143 return PathComponentPagePattern.create(ParsedUrl.create(oriUrlStr), path Start, digitStart,
144 sDigitsRegExp.getLastIndex());
145 }
146
147 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698