Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "components/link_header_util/link_header_util.h" | |
| 6 | |
| 7 #include "base/strings/string_util.h" | |
| 8 #include "net/http/http_util.h" | |
| 9 | |
| 10 namespace link_header_util { | |
| 11 | |
| 12 namespace { | |
| 13 | |
| 14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. | |
| 15 // Takes the parsing of StringTokenizer and adds support for quoted strings that | |
| 16 // are quoted by matching <> (and does not support escaping in those strings). | |
| 17 // Also has the behavior of ValuesIterator where it strips whitespace from all | |
| 18 // values and only outputs non-empty values. | |
| 19 // Only supports ',' as separator and supports '' "" and <> as quote chars. | |
| 20 class ValueTokenizer { | |
| 21 public: | |
| 22 ValueTokenizer(std::string::const_iterator begin, | |
| 23 std::string::const_iterator end) | |
| 24 : token_begin_(begin), token_end_(begin), end_(end) {} | |
| 25 | |
| 26 std::string::const_iterator token_begin() const { return token_begin_; } | |
| 27 std::string::const_iterator token_end() const { return token_end_; } | |
| 28 | |
| 29 bool GetNext() { | |
| 30 while (GetNextInternal()) { | |
| 31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_); | |
| 32 | |
| 33 // Only return non-empty values. | |
| 34 if (token_begin_ != token_end_) | |
| 35 return true; | |
| 36 } | |
| 37 return false; | |
| 38 } | |
| 39 | |
| 40 private: | |
| 41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next | |
| 42 // token. Returns false if end-of-string was reached first. | |
| 43 bool GetNextInternal() { | |
| 44 // First time this is called token_end_ points to the first character in the | |
| 45 // input. Every other time token_end_ points to the delimiter at the end of | |
| 46 // the last returned token (which could be the end of the string). | |
| 47 | |
| 48 // End of string, return false. | |
| 49 if (token_end_ == end_) | |
| 50 return false; | |
| 51 | |
| 52 // Skip past the delimiter. | |
| 53 if (*token_end_ == ',') | |
| 54 ++token_end_; | |
| 55 | |
| 56 // Make token_begin_ point to the beginning of the next token, and search | |
| 57 // for the end of the token in token_end_. | |
| 58 token_begin_ = token_end_; | |
| 59 | |
| 60 // Set to true if we're currently inside a quoted string. | |
| 61 bool in_quote = false; | |
| 62 // Set to true if we're currently inside a quoted string, and have just | |
| 63 // encountered an escape character. In this case a closing quote will be | |
| 64 // ignored. | |
| 65 bool in_escape = false; | |
| 66 // If currently in a quoted string, this is the character that (when not | |
| 67 // escaped) indicates the end of the string. | |
| 68 char quote_close_char = '\0'; | |
| 69 // If currently in a quoted string, this is set to true if it is possible to | |
| 70 // escape the closing quote using '\'. | |
| 71 bool quote_allows_escape = false; | |
| 72 | |
| 73 while (token_end_ != end_) { | |
| 74 char c = *token_end_; | |
| 75 if (in_quote) { | |
| 76 if (in_escape) { | |
| 77 in_escape = false; | |
| 78 } else if (quote_allows_escape && c == '\\') { | |
| 79 in_escape = true; | |
| 80 } else if (c == quote_close_char) { | |
| 81 in_quote = false; | |
| 82 } | |
| 83 } else { | |
| 84 if (c == ',') | |
| 85 break; | |
| 86 if (c == '\'' || c == '"' || c == '<') { | |
| 87 in_quote = true; | |
| 88 quote_close_char = (c == '<' ? '>' : c); | |
| 89 quote_allows_escape = (c != '<'); | |
| 90 } | |
| 91 } | |
| 92 ++token_end_; | |
| 93 } | |
| 94 return true; | |
| 95 } | |
| 96 | |
| 97 std::string::const_iterator token_begin_; | |
| 98 std::string::const_iterator token_end_; | |
| 99 std::string::const_iterator end_; | |
| 100 }; | |
| 101 | |
| 102 // TODO(mek): Figure out if it makes sense to move this function to | |
| 103 // net::HttpUtil, as it implements the "parmname" rule from | |
| 104 // https://tools.ietf.org/html/rfc5987#section-3.2.1 | |
| 105 bool IsParameterName(std::string::const_iterator begin, | |
| 106 std::string::const_iterator end) { | |
| 107 if (!net::HttpUtil::IsToken(begin, end)) | |
| 108 return false; | |
| 109 for (auto it = begin; it != end; ++it) { | |
| 110 if (*it == '*' || *it == '\'' || *it == '%') | |
| 111 return false; | |
| 112 } | |
| 113 return true; | |
| 114 } | |
| 115 | |
| 116 // Parses the URL part of a Link header. When successful |url_begin| points | |
| 117 // to the beginning of the url, |url_end| points to the end of the url and | |
| 118 // |params_begin| points to the first character after the '>' character at the | |
| 119 // end of the url. | |
| 120 bool ExtractURL(std::string::const_iterator begin, | |
| 121 std::string::const_iterator end, | |
| 122 std::string::const_iterator* url_begin, | |
| 123 std::string::const_iterator* url_end, | |
| 124 std::string::const_iterator* params_begin) { | |
| 125 // Extract the URL part (everything between '<' and first '>' character). | |
| 126 if (*begin != '<') | |
| 127 return false; | |
| 128 | |
| 129 ++begin; | |
| 130 *url_begin = begin; | |
| 131 *url_end = std::find(begin, end, '>'); | |
| 132 | |
| 133 // Fail if we did not find a '>'. | |
| 134 if (*url_end == end) | |
| 135 return false; | |
| 136 | |
| 137 *params_begin = *url_end; | |
| 138 // Skip the '>' at the end of the URL. | |
| 139 ++*params_begin; | |
| 140 | |
| 141 // Trim whitespace from the URL. | |
| 142 net::HttpUtil::TrimLWS(url_begin, url_end); | |
| 143 return true; | |
| 144 } | |
| 145 | |
| 146 } // namespace | |
| 147 | |
| 148 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { | |
| 149 std::vector<StringIteratorPair> values; | |
| 150 ValueTokenizer tokenizer(header.begin(), header.end()); | |
| 151 while (tokenizer.GetNext()) { | |
| 152 values.push_back( | |
| 153 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); | |
| 154 } | |
| 155 return values; | |
| 156 } | |
| 157 | |
| 158 // Parses one link in a link header into its url and parameters. | |
| 159 // A link is of the form "<some-url>; param1=value1; param2=value2". | |
| 160 // Returns false if parsing the link failed, returns true on success. This | |
| 161 // method is more lenient than the RFC. It doesn't fail on things like invalid | |
| 162 // characters in the URL, and also doesn't verify that certain parameters should | |
| 163 // or shouldn't be quoted strings. | |
| 164 // If a parameter occurs more than once in the link, only the first value is | |
| 165 // returned in params as this is the required behavior for all attributes chrome | |
| 166 // currently cares about in link headers. | |
| 167 bool ParseLinkHeaderValue( | |
| 168 std::string::const_iterator begin, | |
| 169 std::string::const_iterator end, | |
| 170 std::string* url, | |
| 171 std::unordered_map<std::string, base::Optional<std::string>>* params) { | |
| 172 // Can't parse an empty string. | |
| 173 if (begin == end) | |
| 174 return false; | |
| 175 | |
| 176 // Extract the URL part (everything between '<' and first '>' character). | |
| 177 std::string::const_iterator url_begin; | |
| 178 std::string::const_iterator url_end; | |
| 179 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin)) | |
| 180 return false; | |
| 181 *url = std::string(url_begin, url_end); | |
| 182 | |
| 183 // Trim any remaining whitespace, and make sure there is a ';' separating | |
| 184 // parameters from the URL. | |
| 185 net::HttpUtil::TrimLWS(&begin, &end); | |
| 186 if (begin != end && *begin != ';') | |
| 187 return false; | |
| 188 | |
| 189 // Parse all the parameters. | |
| 190 net::HttpUtil::NameValuePairsIterator params_iterator( | |
| 191 begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL, | |
| 192 net::HttpUtil::NameValuePairsIterator::STRICT_QUOTES); | |
| 193 while (params_iterator.GetNext()) { | |
| 194 if (!IsParameterName(params_iterator.name_begin(), | |
| 195 params_iterator.name_end())) | |
| 196 return false; | |
| 197 std::string name = base::ToLowerASCII(base::StringPiece( | |
| 198 params_iterator.name_begin(), params_iterator.name_end())); | |
| 199 if (!params_iterator.value_is_quoted() && | |
| 200 params_iterator.value_begin() == params_iterator.value_end()) | |
| 201 params->insert(std::make_pair(name, base::nullopt_t(0))); | |
|
dcheng
2016/04/20 04:41:06
Just write base::nullopt here.
Marijn Kruisselbrink
2016/04/20 21:07:48
Done. I initially wrote this against an older vers
| |
| 202 else | |
| 203 params->insert(std::make_pair(name, params_iterator.value())); | |
| 204 } | |
| 205 return params_iterator.valid(); | |
| 206 } | |
| 207 | |
| 208 } // namespace link_header_util | |
| OLD | NEW |