Index: components/link_header_util/link_header_util.cc |
diff --git a/components/link_header_util/link_header_util.cc b/components/link_header_util/link_header_util.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..ab6ba8232f00e4227dc5df20c099dbba9adcd281 |
--- /dev/null |
+++ b/components/link_header_util/link_header_util.cc |
@@ -0,0 +1,208 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "components/link_header_util/link_header_util.h" |
+ |
+#include "base/strings/string_util.h" |
+#include "net/http/http_util.h" |
+ |
+namespace link_header_util { |
+ |
+namespace { |
+ |
+// A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. |
+// Takes the parsing of StringTokenizer and adds support for quoted strings that |
+// are quoted by matching <> (and does not support escaping in those strings). |
+// Also has the behavior of ValuesIterator where it strips whitespace from all |
+// values and only outputs non-empty values. |
+// Only supports ',' as separator and supports '' "" and <> as quote chars. |
+class ValueTokenizer { |
+ public: |
+ ValueTokenizer(std::string::const_iterator begin, |
+ std::string::const_iterator end) |
+ : token_begin_(begin), token_end_(begin), end_(end) {} |
+ |
+ std::string::const_iterator token_begin() const { return token_begin_; } |
+ std::string::const_iterator token_end() const { return token_end_; } |
+ |
+ bool GetNext() { |
+ while (GetNextInternal()) { |
+ net::HttpUtil::TrimLWS(&token_begin_, &token_end_); |
+ |
+ // Only return non-empty values. |
+ if (token_begin_ != token_end_) |
+ return true; |
+ } |
+ return false; |
+ } |
+ |
+ private: |
+ // Updates token_begin_ and token_end_ to point to the (possibly empty) next |
+ // token. Returns false if end-of-string was reached first. |
+ bool GetNextInternal() { |
+ // First time this is called token_end_ points to the first character in the |
+ // input. Every other time token_end_ points to the delimiter at the end of |
+ // the last returned token (which could be the end of the string). |
+ |
+ // End of string, return false. |
+ if (token_end_ == end_) |
+ return false; |
+ |
+ // Skip past the delimiter. |
+ if (*token_end_ == ',') |
+ ++token_end_; |
+ |
+ // Make token_begin_ point to the beginning of the next token, and search |
+ // for the end of the token in token_end_. |
+ token_begin_ = token_end_; |
+ |
+ // Set to true if we're currently inside a quoted string. |
+ bool in_quote = false; |
+ // Set to true if we're currently inside a quoted string, and have just |
+ // encountered an escape character. In this case a closing quote will be |
+ // ignored. |
+ bool in_escape = false; |
+ // If currently in a quoted string, this is the character that (when not |
+ // escaped) indicates the end of the string. |
+ char quote_close_char = '\0'; |
+ // If currently in a quoted string, this is set to true if it is possible to |
+ // escape the closing quote using '\'. |
+ bool quote_allows_escape = false; |
+ |
+ while (token_end_ != end_) { |
+ char c = *token_end_; |
+ if (in_quote) { |
+ if (in_escape) { |
+ in_escape = false; |
+ } else if (quote_allows_escape && c == '\\') { |
+ in_escape = true; |
+ } else if (c == quote_close_char) { |
+ in_quote = false; |
+ } |
+ } else { |
+ if (c == ',') |
+ break; |
+ if (c == '\'' || c == '"' || c == '<') { |
+ in_quote = true; |
+ quote_close_char = (c == '<' ? '>' : c); |
+ quote_allows_escape = (c != '<'); |
+ } |
+ } |
+ ++token_end_; |
+ } |
+ return true; |
+ } |
+ |
+ std::string::const_iterator token_begin_; |
+ std::string::const_iterator token_end_; |
+ std::string::const_iterator end_; |
+}; |
+ |
+// TODO(mek): Figure out if it makes sense to move this function to |
+// net::HttpUtil, as it implements the "parmname" rule from |
+// https://tools.ietf.org/html/rfc5987#section-3.2.1 |
+bool IsParameterName(std::string::const_iterator begin, |
+ std::string::const_iterator end) { |
+ if (!net::HttpUtil::IsToken(begin, end)) |
+ return false; |
+ for (auto it = begin; it != end; ++it) { |
+ if (*it == '*' || *it == '\'' || *it == '%') |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+// Parses the URL part of a Link header. When successful |url_begin| points |
+// to the beginning of the url, |url_end| points to the end of the url and |
+// |params_begin| points to the first character after the '>' character at the |
+// end of the url. |
+bool ExtractURL(std::string::const_iterator begin, |
+ std::string::const_iterator end, |
+ std::string::const_iterator* url_begin, |
+ std::string::const_iterator* url_end, |
+ std::string::const_iterator* params_begin) { |
+ // Extract the URL part (everything between '<' and first '>' character). |
+ if (*begin != '<') |
+ return false; |
+ |
+ ++begin; |
+ *url_begin = begin; |
+ *url_end = std::find(begin, end, '>'); |
+ |
+ // Fail if we did not find a '>'. |
+ if (*url_end == end) |
+ return false; |
+ |
+ *params_begin = *url_end; |
+ // Skip the '>' at the end of the URL. |
+ ++*params_begin; |
+ |
+ // Trim whitespace from the URL. |
+ net::HttpUtil::TrimLWS(url_begin, url_end); |
+ return true; |
+} |
+ |
+} // namespace |
+ |
+std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { |
+ std::vector<StringIteratorPair> values; |
+ ValueTokenizer tokenizer(header.begin(), header.end()); |
+ while (tokenizer.GetNext()) { |
+ values.push_back( |
+ StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); |
+ } |
+ return values; |
+} |
+ |
+// Parses one link in a link header into its url and parameters. |
+// A link is of the form "<some-url>; param1=value1; param2=value2". |
+// Returns false if parsing the link failed, returns true on success. This |
+// method is more lenient than the RFC. It doesn't fail on things like invalid |
+// characters in the URL, and also doesn't verify that certain parameters should |
+// or shouldn't be quoted strings. |
+// If a parameter occurs more than once in the link, only the first value is |
+// returned in params as this is the required behavior for all attributes chrome |
+// currently cares about in link headers. |
+bool ParseLinkHeaderValue( |
+ std::string::const_iterator begin, |
+ std::string::const_iterator end, |
+ std::string* url, |
+ std::unordered_map<std::string, base::Optional<std::string>>* params) { |
+ // Can't parse an empty string. |
+ if (begin == end) |
+ return false; |
+ |
+ // Extract the URL part (everything between '<' and first '>' character). |
+ std::string::const_iterator url_begin; |
+ std::string::const_iterator url_end; |
+ if (!ExtractURL(begin, end, &url_begin, &url_end, &begin)) |
+ return false; |
+ *url = std::string(url_begin, url_end); |
+ |
+ // Trim any remaining whitespace, and make sure there is a ';' separating |
+ // parameters from the URL. |
+ net::HttpUtil::TrimLWS(&begin, &end); |
+ if (begin != end && *begin != ';') |
+ return false; |
+ |
+ // Parse all the parameters. |
+ net::HttpUtil::NameValuePairsIterator params_iterator( |
+ begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL, |
+ net::HttpUtil::NameValuePairsIterator::STRICT_QUOTES); |
+ while (params_iterator.GetNext()) { |
+ if (!IsParameterName(params_iterator.name_begin(), |
+ params_iterator.name_end())) |
+ return false; |
+ std::string name = base::ToLowerASCII(base::StringPiece( |
+ params_iterator.name_begin(), params_iterator.name_end())); |
+ if (!params_iterator.value_is_quoted() && |
+ params_iterator.value_begin() == params_iterator.value_end()) |
+ params->insert(std::make_pair(name, base::nullopt_t(0))); |
dcheng
2016/04/20 04:41:06
Just write base::nullopt here.
Marijn Kruisselbrink
2016/04/20 21:07:48
Done. I initially wrote this against an older vers
|
+ else |
+ params->insert(std::make_pair(name, params_iterator.value())); |
+ } |
+ return params_iterator.valid(); |
+} |
+ |
+} // namespace link_header_util |