OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 package org.chromium.distiller; |
| 6 |
| 7 import com.google.gwt.regexp.shared.RegExp; |
| 8 |
| 9 /** |
| 10 * This class detects the page parameter in the query of a potential pagination
URL. If detected, |
| 11 * it replaces the page param value with PageParameterDetector.PAGE_PARAM_PLACEH
OLDER, then creates |
| 12 * and returns a new object. This object can then be called via PageParameterDe
tector.PagePattern |
| 13 * interface to: |
| 14 * - validate the generated URL page pattern against the document URL |
| 15 * - determine if a URL is a paging URL based on the page pattern. |
| 16 * Example: if the original url is "http://www.foo.com/a/b/?page=2&query=a", the
page pattern is |
| 17 * "http://www.foo.com/a/b?page=[*!]&query=a". (See comments at top of PageParam
eterDetector.java). |
| 18 */ |
| 19 public class QueryParamPagePattern implements PageParameterDetector.PagePattern
{ |
| 20 private final ParsedUrl mUrl; |
| 21 private final int mPageNumber; |
| 22 private final int mPlaceholderStart; |
| 23 private final String mUrlStr; |
| 24 private final int mQueryStart; |
| 25 // Start position of query param containing placeholder. |
| 26 private int mPlaceholderSegmentStart; |
| 27 private final String mPrefix; // The part of the page pattern before the pl
aceholder. |
| 28 private String mSuffix = ""; // The part of the page pattern after the plac
eholder. |
| 29 // This is not mSuffix.length(), see their initializations in constructor. |
| 30 private final int mSuffixLen; |
| 31 |
| 32 /** |
| 33 * Returns a new QueryParamPagePattern if url is valid and page param is in
the query. |
| 34 */ |
| 35 static PageParameterDetector.PagePattern create(ParsedUrl url, String queryN
ame, |
| 36 String queryValue) { |
| 37 try { |
| 38 return new QueryParamPagePattern(url, queryName, queryValue); |
| 39 } catch (IllegalArgumentException e) { |
| 40 return null; |
| 41 } |
| 42 } |
| 43 |
| 44 @Override |
| 45 public String toString() { |
| 46 return mUrlStr; |
| 47 } |
| 48 |
| 49 @Override |
| 50 public int getPageNumber() { |
| 51 return mPageNumber; |
| 52 } |
| 53 |
| 54 /** |
| 55 * Returns true if page pattern and URL have the same path components. |
| 56 * |
| 57 * @param docUrl the current document URL |
| 58 */ |
| 59 @Override |
| 60 public boolean isValidFor(ParsedUrl docUrl) { |
| 61 return docUrl.getTrimmedPath().equalsIgnoreCase(mUrl.getTrimmedPath()); |
| 62 } |
| 63 |
| 64 private static RegExp sSlashOrHtmExtRegExp = null; // Match either '/' or "
.htm(l)". |
| 65 |
| 66 /** |
| 67 * Returns true if a URL matches this page pattern based on a pipeline of ru
les: |
| 68 * - suffix (part of pattern after page param placeholder) must be same, and |
| 69 * - scheme, host, and path must be same, and |
| 70 * - query params, except that for page number, must be same in order and va
lue, and |
| 71 * - query value must be a plain number. |
| 72 * |
| 73 * @param url the URL to evalutate |
| 74 */ |
| 75 @Override |
| 76 public boolean isPagingUrl(String url) { |
| 77 // Both url and pattern must have the same suffix, if available. |
| 78 if (mSuffixLen != 0 && !url.endsWith(mSuffix)) return false; |
| 79 |
| 80 final int suffixStart = url.length() - mSuffixLen; |
| 81 |
| 82 // The url matches the pattern only when: |
| 83 // 1. has same prefix (scheme, host, path) |
| 84 // 2. has same query params with same value (except page number query)
in the same |
| 85 // order. |
| 86 // Examples: |
| 87 // If page pattern is http://foo.com/a/b?queryA=v1&queryB=[*!]&queryC=v3 |
| 88 // Returns true for: |
| 89 // - http://foo.com/a/b/?queryA=v1&queryC=v3 |
| 90 // - http://foo.com/a/b/?queryA=v1&queyrB=4&queryC=v3 |
| 91 // Otherwise, returns false. |
| 92 // |
| 93 // If page pattern is http://foo.com/a/b?page=[*!]&query=a |
| 94 // Returns true for: |
| 95 // - http://foo.com/a/b?query=a |
| 96 // - http://foo.com/a/b?page=2&query=a |
| 97 // Otherwise, returns false. |
| 98 // |
| 99 // If page pattern is http://foo.com/a/b?page=[*!] |
| 100 // Returns true for: |
| 101 // - http://foo.com/a/b/ |
| 102 // - http://foo.com/a/b.html |
| 103 // - http://foo.com/a/b.htm |
| 104 // - http://foo.com/a/b?page=2 |
| 105 // Otherwise, returns false. |
| 106 |
| 107 // Both url and pattern must have the same prefix. |
| 108 if (!url.startsWith(mPrefix)) return false; |
| 109 |
| 110 // If the url doesn't have page number query, it is fine. |
| 111 if (mPlaceholderSegmentStart == suffixStart) return true; |
| 112 |
| 113 // If the only difference in the page param between url and pattern is "
/", ".htm" or |
| 114 // ".html", it is fine. |
| 115 String diffPart = url.substring(mPlaceholderSegmentStart, suffixStart).t
oLowerCase(); |
| 116 if (sSlashOrHtmExtRegExp == null) { |
| 117 sSlashOrHtmExtRegExp = RegExp.compile("^\\/|(.html?)$", "i"); |
| 118 } |
| 119 if (sSlashOrHtmExtRegExp.test(diffPart)) return true; |
| 120 |
| 121 // Both url and pattern must have the same query name. |
| 122 if (!url.regionMatches(mPlaceholderSegmentStart, mUrlStr, mPlaceholderSe
gmentStart, |
| 123 mPlaceholderStart - mPlaceholderSegmentStart)) { |
| 124 return false; |
| 125 } |
| 126 |
| 127 return PageParameterDetector.isPlainNumber(url.substring(mPlaceholderSta
rt, suffixStart)); |
| 128 } |
| 129 |
| 130 private QueryParamPagePattern(ParsedUrl url, String queryName, String queryV
alue) |
| 131 throws IllegalArgumentException { |
| 132 if (queryName.isEmpty()) throw new IllegalArgumentException("Empty query
name"); |
| 133 if (queryValue.isEmpty()) throw new IllegalArgumentException("Empty quer
y value"); |
| 134 if (!StringUtil.isStringAllDigits(queryValue)) { |
| 135 throw new IllegalArgumentException("Query value has non-digits: " +
queryValue); |
| 136 } |
| 137 if (PageParameterDetector.isPageParamNameBad(queryName)) { |
| 138 throw new IllegalArgumentException("Query name is bad page param nam
e: " + queryName); |
| 139 } |
| 140 |
| 141 int value = StringUtil.toNumber(queryValue); |
| 142 if (value < 0) { |
| 143 throw new IllegalArgumentException("Query value is an invalid number
: " + queryValue); |
| 144 } |
| 145 |
| 146 String pattern = url.replaceQueryValue(queryName, queryValue, |
| 147 PageParameterDetector.PAGE_PARAM_PLACEHOLDER); |
| 148 mUrl = ParsedUrl.create(pattern); |
| 149 if (mUrl == null) throw new IllegalArgumentException("Invalid URL: " + p
attern); |
| 150 mUrlStr = pattern; |
| 151 mPageNumber = value; |
| 152 mPlaceholderStart = pattern.indexOf(PageParameterDetector.PAGE_PARAM_PLA
CEHOLDER); |
| 153 mQueryStart = mUrlStr.lastIndexOf('?', mPlaceholderStart - 1); |
| 154 mPlaceholderSegmentStart = mUrlStr.lastIndexOf('&', mPlaceholderStart -
1); |
| 155 if (mPlaceholderSegmentStart == -1) { // Page param is the first query. |
| 156 mPlaceholderSegmentStart = mQueryStart; |
| 157 } |
| 158 mPrefix = mUrlStr.substring(0, mPlaceholderSegmentStart); |
| 159 // Determine suffix, if available. |
| 160 final int urlLen = mUrlStr.length(); |
| 161 mSuffixLen = urlLen - mPlaceholderStart - PageParameterDetector.PAGE_PAR
AM_PLACEHOLDER_LEN; |
| 162 if (mSuffixLen != 0) { |
| 163 mSuffix = mUrlStr.substring(urlLen - mSuffixLen + 1); // +1 to exclu
de '&' or '?'. |
| 164 } |
| 165 } |
| 166 |
| 167 } |
OLD | NEW |