components/link_header_util/link_header_util.cc - Issue 1811163002: Share link header parsing code between blink and content.

Side by Side Diff: components/link_header_util/link_header_util.cc

Issue 1811163002: Share link header parsing code between blink and content. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@base-optional

Patch Set: address more comments Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "components/link_header_util/link_header_util.h"

	6

	7 #include "base/strings/string_util.h"

	8 #include "net/http/http_util.h"

	9

	10 namespace link_header_util {

	11

	12 namespace {

	13

	14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator.

	15 // Takes the parsing of StringTokenizer and adds support for quoted strings that

	16 // are quoted by matching <> (and does not support escaping in those strings).

	17 // Also has the behavior of ValuesIterator where it strips whitespace from all

	18 // values and only outputs non-empty values.

	19 // Only supports ',' as separator and supports '' "" and <> as quote chars.

	20 class ValueTokenizer {

	21 public:

	22 ValueTokenizer(std::string::const_iterator begin,

	23 std::string::const_iterator end)

	24 : token_begin_(begin), token_end_(begin), end_(end) {}

	25

	26 std::string::const_iterator token_begin() const { return token_begin_; }

	27 std::string::const_iterator token_end() const { return token_end_; }

	28

	29 bool GetNext() {

	30 while (GetNextInternal()) {

	31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_);

	32

	33 // Only return non-empty values.

	34 if (token_begin_ != token_end_)

	35 return true;

	36 }

	37 return false;

	38 }

	39

	40 private:

	41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next

	42 // token. Returns false if end-of-string was reached first.

	43 bool GetNextInternal() {

	44 // First time this is called token_end_ points to the first character in the

	45 // input. Every other time token_end_ points to the delimiter at the end of

	46 // the last returned token (which could be the end of the string).

	47

	48 // End of string, return false.

	49 if (token_end_ == end_)

	50 return false;

	51

	52 // Skip past the delimiter.

	53 if (*token_end_ == ',')

	54 ++token_end_;

	55

	56 // Make token_begin_ point to the beginning of the next token, and search

	57 // for the end of the token in token_end_.

	58 token_begin_ = token_end_;

	59

	60 // Set to true if we're currently inside a quoted string.

	61 bool in_quote = false;

	62 // Set to true if we're currently inside a quoted string, and have just

	63 // encountered an escape character. In this case a closing quote will be

	64 // ignored.

	65 bool in_escape = false;

	66 // If currently in a quoted string, this is the character that (when not

	67 // escaped) indicates the end of the string.

	68 char quote_close_char = '\0';

	69 // If currently in a quoted string, this is set to true if it is possible to

	70 // escape the closing quote using '\'.

	71 bool quote_allows_escape = false;

	72

	73 while (token_end_ != end_) {

	74 char c = *token_end_;

	75 if (in_quote) {

	76 if (in_escape) {

	77 in_escape = false;

	78 } else if (quote_allows_escape && c == '\\') {

	79 in_escape = true;

	80 } else if (c == quote_close_char) {

	81 in_quote = false;

	82 }

	83 } else {

	84 if (c == ',')

	85 break;

	86 if (c == '\'' \|\| c == '"' \|\| c == '<') {

	87 in_quote = true;

	88 quote_close_char = (c == '<' ? '>' : c);

	89 quote_allows_escape = (c != '<');

	90 }

	91 }

	92 ++token_end_;

	93 }

	94 return true;

	95 }

	96

	97 std::string::const_iterator token_begin_;

	98 std::string::const_iterator token_end_;

	99 std::string::const_iterator end_;

	100 };
	jochen (gone - plz use gerrit) 2016/04/27 13:14:33 disallow copy/assign disallow copy/assign
	101

	102 // Parses the URL part of a Link header. When successful \|url_begin\| points

	103 // to the beginning of the url, \|url_end\| points to the end of the url and

	104 // \|params_begin\| points to the first character after the '>' character at the

	105 // end of the url.

	106 bool ExtractURL(std::string::const_iterator begin,

	107 std::string::const_iterator end,

	108 std::string::const_iterator* url_begin,

	109 std::string::const_iterator* url_end,

	110 std::string::const_iterator* params_begin) {

	111 // Extract the URL part (everything between '<' and first '>' character).

	112 if (*begin != '<')

	113 return false;

	114

	115 ++begin;

	116 *url_begin = begin;

	117 *url_end = std::find(begin, end, '>');

	118

	119 // Fail if we did not find a '>'.

	120 if (*url_end == end)

	121 return false;

	122

	123 params_begin = url_end;

	124 // Skip the '>' at the end of the URL.

	125 ++*params_begin;

	126

	127 // Trim whitespace from the URL.

	128 net::HttpUtil::TrimLWS(url_begin, url_end);

	129 return true;

	130 }

	131

	132 } // namespace

	133

	134 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) {

	135 std::vector<StringIteratorPair> values;

	136 ValueTokenizer tokenizer(header.begin(), header.end());

	137 while (tokenizer.GetNext()) {

	138 values.push_back(

	139 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end()));

	140 }

	141 return values;

	142 }

	143

	144 // Parses one link in a link header into its url and parameters.

	145 // A link is of the form "<some-url>; param1=value1; param2=value2".

	146 // Returns false if parsing the link failed, returns true on success. This

	147 // method is more lenient than the RFC. It doesn't fail on things like invalid

	148 // characters in the URL, and also doesn't verify that certain parameters should

	149 // or shouldn't be quoted strings.

	150 // If a parameter occurs more than once in the link, only the first value is

	151 // returned in params as this is the required behavior for all attributes chrome

	152 // currently cares about in link headers.

	153 bool ParseLinkHeaderValue(

	154 std::string::const_iterator begin,

	155 std::string::const_iterator end,

	156 std::string* url,

	157 std::unordered_map<std::string, base::Optional<std::string>>* params) {

	158 // Can't parse an empty string.

	159 if (begin == end)

	160 return false;

	161

	162 // Extract the URL part (everything between '<' and first '>' character).

	163 std::string::const_iterator url_begin;

	164 std::string::const_iterator url_end;

	165 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin))

	166 return false;

	167 *url = std::string(url_begin, url_end);

	168

	169 // Trim any remaining whitespace, and make sure there is a ';' separating

	170 // parameters from the URL.

	171 net::HttpUtil::TrimLWS(&begin, &end);

	172 if (begin != end && *begin != ';')

	173 return false;

	174

	175 // Parse all the parameters.

	176 net::HttpUtil::NameValuePairsIterator params_iterator(

	177 begin, end, ';', net::HttpUtil::NameValuePairsIterator::Values::OPTIONAL,

	178 net::HttpUtil::NameValuePairsIterator::Quotes::STRICT);

	179 while (params_iterator.GetNext()) {

	180 if (!net::HttpUtil::IsParmName(params_iterator.name_begin(),

	181 params_iterator.name_end()))

	182 return false;

	183 std::string name = base::ToLowerASCII(base::StringPiece(

	184 params_iterator.name_begin(), params_iterator.name_end()));

	185 if (!params_iterator.value_is_quoted() &&

	186 params_iterator.value_begin() == params_iterator.value_end())

	187 params->insert(std::make_pair(name, base::nullopt));

	188 else

	189 params->insert(std::make_pair(name, params_iterator.value()));

	190 }

	191 return params_iterator.valid();

	192 }

	193

	194 } // namespace link_header_util

OLD	NEW

« no previous file with comments | « components/link_header_util/link_header_util.h ('k') | components/link_header_util/link_header_util.gyp » ('j') | no next file with comments »