Chromium Code Reviews| Index: components/link_header_util/link_header_util.cc |
| diff --git a/components/link_header_util/link_header_util.cc b/components/link_header_util/link_header_util.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..235d8e1e7b6200418be841a01ac1d07650429221 |
| --- /dev/null |
| +++ b/components/link_header_util/link_header_util.cc |
| @@ -0,0 +1,186 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/link_header_util/link_header_util.h" |
| + |
| +#include "base/strings/string_util.h" |
| +#include "net/http/http_util.h" |
| + |
| +namespace link_header_util { |
| + |
| +namespace { |
| + |
| +// A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. |
| +// Takes the parsing of StringTokenizer and adds support for quoted strings that |
| +// are quoted by matching <> (and does not support escaping in those strings). |
| +// Also has the behavior of ValuesIterator where it strips whitespace from all |
| +// values and only outputs non-empty values. |
| +// Only supports ',' as separator and supports '' "" and <> as quote chars. |
| +// TODO(mek): Figure out if there is a way to share this with the parsing code |
| +// in blink::LinkHeader. |
|
Yoav Weiss
2016/03/30 07:38:52
That TODO is no longer relevant, right?
Marijn Kruisselbrink
2016/04/20 01:45:03
Good point, removed.
|
| +class ValueTokenizer { |
|
Yoav Weiss
2016/03/30 07:38:52
Ideally, I'd love to see this goes back to StringT
Marijn Kruisselbrink
2016/04/20 01:45:03
I initially started out by trying to add this to S
|
| + public: |
| + ValueTokenizer(std::string::const_iterator begin, |
| + std::string::const_iterator end) |
| + : token_begin_(begin), token_end_(begin), end_(end) {} |
| + |
| + std::string::const_iterator token_begin() const { return token_begin_; } |
| + std::string::const_iterator token_end() const { return token_end_; } |
| + |
| + bool GetNext() { |
| + while (GetNextInternal()) { |
| + net::HttpUtil::TrimLWS(&token_begin_, &token_end_); |
| + |
| + // Only return non-empty values. |
| + if (token_begin_ != token_end_) |
| + return true; |
| + } |
| + return false; |
| + } |
| + |
| + private: |
| + // Updates token_begin_ and token_end_ to point to the (possibly empty) next |
| + // token. Returns false if end-of-string was reached first. |
| + bool GetNextInternal() { |
| + // First time this is called token_end_ points to the first character in the |
| + // input. Every other time token_end_ points to the delimiter at the end of |
| + // the last returned token (which could be the end of the string). |
| + |
| + // End of string, return false. |
| + if (token_end_ == end_) |
| + return false; |
| + |
| + // Skip past the delimiter. |
| + if (*token_end_ == ',') |
| + ++token_end_; |
| + |
| + // Make token_begin_ point to the beginning of the next token, and search |
| + // for the end of the token in token_end_. |
| + token_begin_ = token_end_; |
| + |
| + // Set to true if we're currently inside a quoted string. |
| + bool in_quote = false; |
| + // Set to true if we're currently inside a quoted string, and have just |
| + // encountered an escape character. In this case a closing quote will be |
| + // ignored. |
| + bool in_escape = false; |
| + // If currently in a quoted string, this is the character that (when not |
| + // escaped) indicates the end of the string. |
| + char quote_close_char = '\0'; |
| + // If currently in a quoted string, this is set to true if it is possible to |
| + // escape the closing quote using '\'. |
| + bool quote_allows_escape = false; |
| + |
| + while (token_end_ != end_) { |
| + char c = *token_end_; |
| + if (in_quote) { |
| + if (in_escape) { |
| + in_escape = false; |
| + } else if (quote_allows_escape && c == '\\') { |
| + in_escape = true; |
| + } else if (c == quote_close_char) { |
| + in_quote = false; |
| + } |
| + } else { |
| + if (c == ',') |
| + break; |
| + if (c == '\'' || c == '"' || c == '<') { |
| + in_quote = true; |
| + quote_close_char = (c == '<' ? '>' : c); |
| + quote_allows_escape = (c != '<'); |
| + } |
| + } |
| + ++token_end_; |
| + } |
| + return true; |
| + } |
| + |
| + std::string::const_iterator token_begin_; |
| + std::string::const_iterator token_end_; |
| + std::string::const_iterator end_; |
| +}; |
| + |
| +// TODO(mek): Figure out if it makes sense to move this function to |
| +// net::HttpUtil, as it implements the "parmname" rule from |
| +// https://tools.ietf.org/html/rfc5987#section-3.2.1 |
| +bool IsParameterName(std::string::const_iterator begin, |
| + std::string::const_iterator end) { |
| + if (!net::HttpUtil::IsToken(begin, end)) |
| + return false; |
| + for (auto it = begin; it != end; ++it) { |
|
Yoav Weiss
2016/03/29 22:05:45
Can we change that so that we'd iterate over the p
Marijn Kruisselbrink
2016/04/20 01:45:03
I agree that that would be ideal (that's also why
|
| + if (*it == '*' || *it == '\'' || *it == '%') |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +} // namespace |
| + |
| +std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { |
| + std::vector<StringIteratorPair> values; |
| + ValueTokenizer tokenizer(header.begin(), header.end()); |
| + while (tokenizer.GetNext()) { |
| + values.push_back( |
|
Yoav Weiss
2016/03/30 07:38:52
s/values/linkHeaders/ (or something similar)? I th
Marijn Kruisselbrink
2016/04/20 01:45:03
I used values because that's the terminology the s
|
| + StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); |
| + } |
| + return values; |
| +} |
| + |
| +// Parses one link in a link header into its url and parameters. |
| +// A link is of the form "<some-url>; param1=value1; param2=value2". |
| +// Returns false if parsing the link failed, returns true on success. This |
| +// method is more lenient than the RFC. It doesn't fail on things like invalid |
| +// characters in the URL, and also doesn't verify that certain parameters should |
| +// or shouldn't be quoted strings. |
| +// If a parameter occurs more than once in the link, only the first value is |
| +// returned in params as this is the required behavior for all attributes chrome |
| +// currently cares about in link headers. |
| +bool ParseLinkHeaderValue( |
| + std::string::const_iterator begin, |
| + std::string::const_iterator end, |
| + std::string* url, |
| + std::unordered_map<std::string, base::Optional<std::string>>* params) { |
| + // Can't parse an empty string. |
| + if (begin == end) |
| + return false; |
| + |
| + // Extract the URL part (everything between '<' and first '>' character). |
|
Yoav Weiss
2016/03/30 07:38:52
Could you perhaps split the URL parsing part into
Marijn Kruisselbrink
2016/04/20 01:45:03
Done
|
| + if (*begin != '<') |
| + return false; |
| + ++begin; |
| + std::string::const_iterator url_begin = begin; |
| + std::string::const_iterator url_end = std::find(begin, end, '>'); |
| + // Fail if we did not find a '>'. |
| + if (url_end == end) |
| + return false; |
| + begin = url_end; |
| + net::HttpUtil::TrimLWS(&url_begin, &url_end); |
| + *url = std::string(url_begin, url_end); |
| + |
| + // Skip the '>' at the end of the URL, trim any remaining whitespace, and make |
| + // sure it is followed by a ';' to indicate the start of parameters. |
| + ++begin; |
| + net::HttpUtil::TrimLWS(&begin, &end); |
| + if (begin != end && *begin != ';') |
| + return false; |
| + |
| + // Parse all the parameters. |
| + net::HttpUtil::NameValuePairsIterator params_iterator( |
| + begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL); |
| + while (params_iterator.GetNext()) { |
| + if (!IsParameterName(params_iterator.name_begin(), |
| + params_iterator.name_end())) |
| + return false; |
| + std::string name = base::ToLowerASCII(base::StringPiece( |
| + params_iterator.name_begin(), params_iterator.name_end())); |
| + if (!params_iterator.value_is_quoted() && |
| + params_iterator.value_begin() == params_iterator.value_end()) |
| + params->insert(std::make_pair(name, base::nullopt_t(0))); |
| + else |
| + params->insert(std::make_pair(name, params_iterator.value())); |
| + } |
| + return params_iterator.valid(); |
| +} |
| + |
| +} // namespace link_header_util |