components/link_header_util/link_header_util.cc - Issue 1811163002: Share link header parsing code between blink and content.

Side by Side Diff: components/link_header_util/link_header_util.cc

Issue 1811163002: Share link header parsing code between blink and content. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@base-optional

Patch Set: rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « components/link_header_util/link_header_util.h ('k') | components/link_header_util/link_header_util.gyp » ('j') | components/link_header_util/link_header_util_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "components/link_header_util/link_header_util.h"

	6

	7 #include "base/strings/string_util.h"

	8 #include "net/http/http_util.h"

	9

	10 namespace link_header_util {

	11

	12 namespace {

	13

	14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator.

	15 // Takes the parsing of StringTokenizer and adds support for quoted strings that

	16 // are quoted by matching <> (and does not support escaping in those strings).

	17 // Also has the behavior of ValuesIterator where it strips whitespace from all

	18 // values and only outputs non-empty values.

	19 // Only supports ',' as separator and supports '' "" and <> as quote chars.

	20 class ValueTokenizer {

	21 public:

	22 ValueTokenizer(std::string::const_iterator begin,

	23 std::string::const_iterator end)

	24 : token_begin_(begin), token_end_(begin), end_(end) {}

	25

	26 std::string::const_iterator token_begin() const { return token_begin_; }

	27 std::string::const_iterator token_end() const { return token_end_; }

	28

	29 bool GetNext() {

	30 while (GetNextInternal()) {

	31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_);

	32

	33 // Only return non-empty values.

	34 if (token_begin_ != token_end_)

	35 return true;

	36 }

	37 return false;

	38 }

	39

	40 private:

	41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next

	42 // token. Returns false if end-of-string was reached first.

	43 bool GetNextInternal() {

	44 // First time this is called token_end_ points to the first character in the

	45 // input. Every other time token_end_ points to the delimiter at the end of

	46 // the last returned token (which could be the end of the string).

	47

	48 // End of string, return false.

	49 if (token_end_ == end_)

	50 return false;

	51

	52 // Skip past the delimiter.

	53 if (*token_end_ == ',')

	54 ++token_end_;

	55

	56 // Make token_begin_ point to the beginning of the next token, and search

	57 // for the end of the token in token_end_.

	58 token_begin_ = token_end_;

	59

	60 // Set to true if we're currently inside a quoted string.

	61 bool in_quote = false;

	62 // Set to true if we're currently inside a quoted string, and have just

	63 // encountered an escape character. In this case a closing quote will be

	64 // ignored.

	65 bool in_escape = false;

	66 // If currently in a quoted string, this is the character that (when not

	67 // escaped) indicates the end of the string.

	68 char quote_close_char = '\0';

	69 // If currently in a quoted string, this is set to true if it is possible to

	70 // escape the closing quote using '\'.

	71 bool quote_allows_escape = false;

	72

	73 while (token_end_ != end_) {

	74 char c = *token_end_;

	75 if (in_quote) {

	76 if (in_escape) {

	77 in_escape = false;

	78 } else if (quote_allows_escape && c == '\\') {

	79 in_escape = true;

	80 } else if (c == quote_close_char) {

	81 in_quote = false;

	82 }

	83 } else {

	84 if (c == ',')

	85 break;

	86 if (c == '\'' \|\| c == '"' \|\| c == '<') {

	87 in_quote = true;

	88 quote_close_char = (c == '<' ? '>' : c);

	89 quote_allows_escape = (c != '<');

	90 }

	91 }

	92 ++token_end_;

	93 }

	94 return true;

	95 }

	96

	97 std::string::const_iterator token_begin_;

	98 std::string::const_iterator token_end_;

	99 std::string::const_iterator end_;

	100 };

	101

	102 // TODO(mek): Figure out if it makes sense to move this function to

	103 // net::HttpUtil, as it implements the "parmname" rule from

	104 // https://tools.ietf.org/html/rfc5987#section-3.2.1

	105 bool IsParameterName(std::string::const_iterator begin,

	106 std::string::const_iterator end) {

	107 if (!net::HttpUtil::IsToken(begin, end))

	108 return false;

	109 for (auto it = begin; it != end; ++it) {

	110 if (it == '' \|\| it == '\'' \|\| it == '%')

	111 return false;

	112 }

	113 return true;

	114 }

	115

	116 // Parses the URL part of a Link header. When successful \|url_begin\| points

	117 // to the beginning of the url, \|url_end\| points to the end of the url and

	118 // \|params_begin\| points to the first character after the '>' character at the

	119 // end of the url.

	120 bool ExtractURL(std::string::const_iterator begin,

	121 std::string::const_iterator end,

	122 std::string::const_iterator* url_begin,

	123 std::string::const_iterator* url_end,

	124 std::string::const_iterator* params_begin) {

	125 // Extract the URL part (everything between '<' and first '>' character).

	126 if (*begin != '<')

	127 return false;

	128

	129 ++begin;

	130 *url_begin = begin;

	131 *url_end = std::find(begin, end, '>');

	132

	133 // Fail if we did not find a '>'.

	134 if (*url_end == end)

	135 return false;

	136

	137 params_begin = url_end;

	138 // Skip the '>' at the end of the URL.

	139 ++*params_begin;

	140

	141 // Trim whitespace from the URL.

	142 net::HttpUtil::TrimLWS(url_begin, url_end);

	143 return true;

	144 }

	145

	146 } // namespace

	147

	148 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) {

	149 std::vector<StringIteratorPair> values;

	150 ValueTokenizer tokenizer(header.begin(), header.end());

	151 while (tokenizer.GetNext()) {

	152 values.push_back(

	153 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end()));

	154 }

	155 return values;

	156 }

	157

	158 // Parses one link in a link header into its url and parameters.

	159 // A link is of the form "<some-url>; param1=value1; param2=value2".

	160 // Returns false if parsing the link failed, returns true on success. This

	161 // method is more lenient than the RFC. It doesn't fail on things like invalid

	162 // characters in the URL, and also doesn't verify that certain parameters should

	163 // or shouldn't be quoted strings.

	164 // If a parameter occurs more than once in the link, only the first value is

	165 // returned in params as this is the required behavior for all attributes chrome

	166 // currently cares about in link headers.

	167 bool ParseLinkHeaderValue(

	168 std::string::const_iterator begin,

	169 std::string::const_iterator end,

	170 std::string* url,

	171 std::unordered_map<std::string, base::Optional<std::string>>* params) {

	172 // Can't parse an empty string.

	173 if (begin == end)

	174 return false;

	175

	176 // Extract the URL part (everything between '<' and first '>' character).

	177 std::string::const_iterator url_begin;

	178 std::string::const_iterator url_end;

	179 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin))

	180 return false;

	181 *url = std::string(url_begin, url_end);

	182

	183 // Trim any remaining whitespace, and make sure there is a ';' separating

	184 // parameters from the URL.

	185 net::HttpUtil::TrimLWS(&begin, &end);

	186 if (begin != end && *begin != ';')

	187 return false;

	188

	189 // Parse all the parameters.

	190 net::HttpUtil::NameValuePairsIterator params_iterator(

	191 begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL,

	192 net::HttpUtil::NameValuePairsIterator::STRICT_QUOTES);

	193 while (params_iterator.GetNext()) {

	194 if (!IsParameterName(params_iterator.name_begin(),

	195 params_iterator.name_end()))

	196 return false;

	197 std::string name = base::ToLowerASCII(base::StringPiece(

	198 params_iterator.name_begin(), params_iterator.name_end()));

	199 if (!params_iterator.value_is_quoted() &&

	200 params_iterator.value_begin() == params_iterator.value_end())

	201 params->insert(std::make_pair(name, base::nullopt_t(0)));
	dcheng 2016/04/20 04:41:06 Just write base::nullopt here. Just write base::nullopt here. Marijn Kruisselbrink 2016/04/20 21:07:48 Done. I initially wrote this against an older vers Show quoted text On 2016/04/20 at 04:41:06, dcheng wrote: > Just write base::nullopt here. Done. I initially wrote this against an older version of the base::optional patch where base::nullopt didn't compile. But now that works of course.
	202 else

	203 params->insert(std::make_pair(name, params_iterator.value()));

	204 }

	205 return params_iterator.valid();

	206 }

	207

	208 } // namespace link_header_util

OLD	NEW