OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 package org.chromium.distiller; |
| 6 |
| 7 import com.google.gwt.regexp.shared.MatchResult; |
| 8 import com.google.gwt.regexp.shared.RegExp; |
| 9 |
| 10 public class PathComponentPagePatternTest extends DomDistillerJsTestCase { |
| 11 private static final String PAGE_PARAM_VALUE = "8"; |
| 12 private static final RegExp sDigitsRegExp = RegExp.compile("(\\d+)", "gi"); |
| 13 |
| 14 public void testIsPagingUrl() { |
| 15 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2.html", |
| 16 "http://www.foo.com/a/abc-[*!].html")); |
| 17 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 18 "http://www.foo.com/a/abc-[*!].html")); |
| 19 assertTrue(isPagingUrl("http://www.foo.com/a/abc", "http://www.foo.com/a
/abc-[*!]")); |
| 20 assertTrue(isPagingUrl("http://www.foo.com/a/abc-2", "http://www.foo.com
/a/abc-[*!]")); |
| 21 assertTrue(isPagingUrl("http://www.foo.com/a/b-c-3", "http://www.foo.com
/a/b-[*!]-c-3")); |
| 22 assertTrue(isPagingUrl("http://www.foo.com/a-c-3", "http://www.foo.com/a
-[*!]-c-3")); |
| 23 assertTrue(isPagingUrl("http://www.foo.com/a-p-1-c-3", "http://www.foo.c
om/a-p-[*!]-c-3")); |
| 24 assertFalse(isPagingUrl("http://www.foo.com/a/abc-page", "http://www.foo
.com/a/abc-[*!]")); |
| 25 assertFalse(isPagingUrl("http://www.foo.com/a/2", "http://www.foo.com/a/
abc-[*!]")); |
| 26 assertFalse(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/ab
c-[*!]")); |
| 27 assertFalse(isPagingUrl("http://www.foo.com/a/abc.html", |
| 28 "http://www.foo.com/a/abc[*!].html")); |
| 29 |
| 30 assertTrue(isPagingUrl("http://www.foo.com/a/page/2", "http://www.foo.co
m/a/page/[*!]")); |
| 31 assertTrue(isPagingUrl("http://www.foo.com/a", "http://www.foo.com/a/pag
e/[*!]")); |
| 32 assertTrue(isPagingUrl("http://www.foo.com/a/page/2/abc.html", |
| 33 "http://www.foo.com/a/page/[*!]/abc.html")); |
| 34 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 35 "http://www.foo.com/a/page/[*!]/abc.html")); |
| 36 assertTrue(isPagingUrl("http://www.foo.com/a/abc.html", |
| 37 "http://www.foo.com/a/[*!]/abc.html")); |
| 38 assertTrue(isPagingUrl("http://www.foo.com/a/2/abc.html", |
| 39 "http://www.foo.com/a/[*!]/abc.html")); |
| 40 assertTrue(isPagingUrl("http://www.foo.com/abc.html", |
| 41 "http://www.foo.com/a/[*!]/abc.html")); |
| 42 assertTrue(isPagingUrl("http://www.foo.com/a/page/2page", |
| 43 "http://www.foo.com/a/page/[*!]page")); |
| 44 assertFalse(isPagingUrl("http://www.foo.com/a/page/2", |
| 45 "http://www.foo.com/a/page/[*!]page")); |
| 46 assertFalse(isPagingUrl("http://www.foo.com/a/page/b", "http://www.foo.c
om/a/page/[*!]")); |
| 47 assertFalse(isPagingUrl("http://www.foo.com/m/page/2", "http://www.foo.c
om/p/page/[*!]")); |
| 48 } |
| 49 |
| 50 public void testIsPagePatternValid() { |
| 51 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 52 "http://www.google.com/forum-12/page/[*!]")); |
| 53 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 54 "http://www.google.com/forum-12/[*!]")); |
| 55 assertTrue(isPagePatternValid("http://www.google.com/forum-12", |
| 56 "http://www.google.com/forum-12/page-[*!]")); |
| 57 |
| 58 assertTrue(isPagePatternValid("http://www.google.com/forum-12/food", |
| 59 "http://www.google.com/forum-12/food/for/bar/[*!]")); |
| 60 assertTrue(isPagePatternValid("http://www.google.com/forum-12-food", |
| 61 "http://www.google.com/forum-12-food-[*!]")); |
| 62 |
| 63 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food", |
| 64 "http://www.google.com/forum-12/food/2012/01/[*!]")); |
| 65 assertFalse(isPagePatternValid("http://www.google.com/forum-12/food/2012
/01/01", |
| 66 "http://www.google.com/forum-12/food/2012/01/[*!]")); |
| 67 |
| 68 assertTrue(isPagePatternValid("http://www.google.com/thread/12", |
| 69 "http://www.google.com/thread/12/page/[*!]")); |
| 70 assertFalse(isPagePatternValid("http://www.google.com/thread/12/foo", |
| 71 "http://www.google.com/thread/12/page/[*!]/foo")); |
| 72 assertTrue(isPagePatternValid("http://www.google.com/thread/12/foo", |
| 73 "http://www.google.com/thread/12/[*!]/foo")); |
| 74 } |
| 75 |
| 76 public void testIsLastNumericPathComponentBad() { |
| 77 // Path component is not numeric i.e. contains non-digits. |
| 78 String url = "http://www.foo.com/a2"; |
| 79 int digitStart = url.indexOf("2"); |
| 80 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 81 |
| 82 // Numeric path component is first. |
| 83 url = "http://www.foo.com/2"; |
| 84 digitStart = url.indexOf("2"); |
| 85 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 86 |
| 87 // Numeric path component follows a path component that is not a bad pag
e param name. |
| 88 url = "http://www.foo.com/good/2"; |
| 89 digitStart = url.indexOf("2"); |
| 90 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 91 |
| 92 // Numeric path component follows a path component that is a bad page pa
ram name. |
| 93 url = "http://www.foo.com/wiki/2"; |
| 94 digitStart = url.indexOf("2"); |
| 95 assertTrue(isLastNumericPathComponentBad(url, digitStart)); |
| 96 |
| 97 // (s)htm(l) extension doesn't follow digit. |
| 98 url = "http://www.foo.com/2a"; |
| 99 digitStart = url.indexOf("2"); |
| 100 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 101 |
| 102 // .htm follows digit, previous path component is not a bad page param n
ame. |
| 103 url = "http://www.foo.com/good/2.htm"; |
| 104 digitStart = url.indexOf("2"); |
| 105 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 106 |
| 107 // .html follows digit, previous path component is a bad page param name
. |
| 108 url = "http://www.foo.com/wiki/2.html"; |
| 109 digitStart = url.indexOf("2"); |
| 110 assertTrue(isLastNumericPathComponentBad(url, digitStart)); |
| 111 |
| 112 // .shtml follows digit, previous path component is not a bad page param
name, but the one |
| 113 // before that is. |
| 114 url = "http://www.foo.com/wiki/good/2.shtml"; |
| 115 digitStart = url.indexOf("2"); |
| 116 assertFalse(isLastNumericPathComponentBad(url, digitStart)); |
| 117 } |
| 118 |
| 119 private static boolean isPagingUrl(String urlStr, String patternStr) { |
| 120 ParsedUrl url = ParsedUrl.create(urlStr); |
| 121 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr
); |
| 122 assertTrue(pattern != null); |
| 123 return pattern.isPagingUrl(urlStr); |
| 124 } |
| 125 |
| 126 private static boolean isPagePatternValid(String urlStr, String patternStr)
{ |
| 127 ParsedUrl url = ParsedUrl.create(urlStr); |
| 128 assertTrue(url != null); |
| 129 PageParameterDetector.PagePattern pattern = createPagePattern(patternStr
); |
| 130 assertTrue(pattern != null); |
| 131 return pattern.isValidFor(url); |
| 132 } |
| 133 |
| 134 private static boolean isLastNumericPathComponentBad(String url, int digitSt
art) { |
| 135 return PathComponentPagePattern.isLastNumericPathComponentBad(url, 18, d
igitStart, |
| 136 digitStart + 1); |
| 137 } |
| 138 |
| 139 private static PageParameterDetector.PagePattern createPagePattern(String pa
tternStr) { |
| 140 int pathStart = patternStr.indexOf('/'); |
| 141 int digitStart = patternStr.indexOf(PageParameterDetector.PAGE_PARAM_PLA
CEHOLDER); |
| 142 sDigitsRegExp.setLastIndex(digitStart); |
| 143 String oriUrlStr = patternStr.replace(PageParameterDetector.PAGE_PARAM_P
LACEHOLDER, |
| 144 PAGE_PARAM_VALUE); |
| 145 MatchResult match = sDigitsRegExp.exec(oriUrlStr); |
| 146 if (match == null) return null; |
| 147 return PathComponentPagePattern.create(ParsedUrl.create(oriUrlStr), path
Start, digitStart, |
| 148 sDigitsRegExp.getLastIndex()); |
| 149 } |
| 150 |
| 151 } |
OLD | NEW |