Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: components/link_header_util/link_header_util.cc

Issue 1811163002: Share link header parsing code between blink and content. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@base-optional
Patch Set: rebase Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/link_header_util/link_header_util.h"
6
7 #include "base/strings/string_util.h"
8 #include "net/http/http_util.h"
9
10 namespace link_header_util {
11
12 namespace {
13
14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator.
15 // Takes the parsing of StringTokenizer and adds support for quoted strings that
16 // are quoted by matching <> (and does not support escaping in those strings).
17 // Also has the behavior of ValuesIterator where it strips whitespace from all
18 // values and only outputs non-empty values.
19 // Only supports ',' as separator and supports '' "" and <> as quote chars.
20 class ValueTokenizer {
21 public:
22 ValueTokenizer(std::string::const_iterator begin,
23 std::string::const_iterator end)
24 : token_begin_(begin), token_end_(begin), end_(end) {}
25
26 std::string::const_iterator token_begin() const { return token_begin_; }
27 std::string::const_iterator token_end() const { return token_end_; }
28
29 bool GetNext() {
30 while (GetNextInternal()) {
31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_);
32
33 // Only return non-empty values.
34 if (token_begin_ != token_end_)
35 return true;
36 }
37 return false;
38 }
39
40 private:
41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next
42 // token. Returns false if end-of-string was reached first.
43 bool GetNextInternal() {
44 // First time this is called token_end_ points to the first character in the
45 // input. Every other time token_end_ points to the delimiter at the end of
46 // the last returned token (which could be the end of the string).
47
48 // End of string, return false.
49 if (token_end_ == end_)
50 return false;
51
52 // Skip past the delimiter.
53 if (*token_end_ == ',')
54 ++token_end_;
55
56 // Make token_begin_ point to the beginning of the next token, and search
57 // for the end of the token in token_end_.
58 token_begin_ = token_end_;
59
60 // Set to true if we're currently inside a quoted string.
61 bool in_quote = false;
62 // Set to true if we're currently inside a quoted string, and have just
63 // encountered an escape character. In this case a closing quote will be
64 // ignored.
65 bool in_escape = false;
66 // If currently in a quoted string, this is the character that (when not
67 // escaped) indicates the end of the string.
68 char quote_close_char = '\0';
69 // If currently in a quoted string, this is set to true if it is possible to
70 // escape the closing quote using '\'.
71 bool quote_allows_escape = false;
72
73 while (token_end_ != end_) {
74 char c = *token_end_;
75 if (in_quote) {
76 if (in_escape) {
77 in_escape = false;
78 } else if (quote_allows_escape && c == '\\') {
79 in_escape = true;
80 } else if (c == quote_close_char) {
81 in_quote = false;
82 }
83 } else {
84 if (c == ',')
85 break;
86 if (c == '\'' || c == '"' || c == '<') {
87 in_quote = true;
88 quote_close_char = (c == '<' ? '>' : c);
89 quote_allows_escape = (c != '<');
90 }
91 }
92 ++token_end_;
93 }
94 return true;
95 }
96
97 std::string::const_iterator token_begin_;
98 std::string::const_iterator token_end_;
99 std::string::const_iterator end_;
100 };
101
102 // TODO(mek): Figure out if it makes sense to move this function to
103 // net::HttpUtil, as it implements the "parmname" rule from
104 // https://tools.ietf.org/html/rfc5987#section-3.2.1
105 bool IsParameterName(std::string::const_iterator begin,
106 std::string::const_iterator end) {
107 if (!net::HttpUtil::IsToken(begin, end))
108 return false;
109 for (auto it = begin; it != end; ++it) {
110 if (*it == '*' || *it == '\'' || *it == '%')
111 return false;
112 }
113 return true;
114 }
115
116 // Parses the URL part of a Link header. When successful |url_begin| points
117 // to the beginning of the url, |url_end| points to the end of the url and
118 // |params_begin| points to the first character after the '>' character at the
119 // end of the url.
120 bool ExtractURL(std::string::const_iterator begin,
121 std::string::const_iterator end,
122 std::string::const_iterator* url_begin,
123 std::string::const_iterator* url_end,
124 std::string::const_iterator* params_begin) {
125 // Extract the URL part (everything between '<' and first '>' character).
126 if (*begin != '<')
127 return false;
128
129 ++begin;
130 *url_begin = begin;
131 *url_end = std::find(begin, end, '>');
132
133 // Fail if we did not find a '>'.
134 if (*url_end == end)
135 return false;
136
137 *params_begin = *url_end;
138 // Skip the '>' at the end of the URL.
139 ++*params_begin;
140
141 // Trim whitespace from the URL.
142 net::HttpUtil::TrimLWS(url_begin, url_end);
143 return true;
144 }
145
146 } // namespace
147
148 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) {
149 std::vector<StringIteratorPair> values;
150 ValueTokenizer tokenizer(header.begin(), header.end());
151 while (tokenizer.GetNext()) {
152 values.push_back(
153 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end()));
154 }
155 return values;
156 }
157
158 // Parses one link in a link header into its url and parameters.
159 // A link is of the form "<some-url>; param1=value1; param2=value2".
160 // Returns false if parsing the link failed, returns true on success. This
161 // method is more lenient than the RFC. It doesn't fail on things like invalid
162 // characters in the URL, and also doesn't verify that certain parameters should
163 // or shouldn't be quoted strings.
164 // If a parameter occurs more than once in the link, only the first value is
165 // returned in params as this is the required behavior for all attributes chrome
166 // currently cares about in link headers.
167 bool ParseLinkHeaderValue(
168 std::string::const_iterator begin,
169 std::string::const_iterator end,
170 std::string* url,
171 std::unordered_map<std::string, base::Optional<std::string>>* params) {
172 // Can't parse an empty string.
173 if (begin == end)
174 return false;
175
176 // Extract the URL part (everything between '<' and first '>' character).
177 std::string::const_iterator url_begin;
178 std::string::const_iterator url_end;
179 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin))
180 return false;
181 *url = std::string(url_begin, url_end);
182
183 // Trim any remaining whitespace, and make sure there is a ';' separating
184 // parameters from the URL.
185 net::HttpUtil::TrimLWS(&begin, &end);
186 if (begin != end && *begin != ';')
187 return false;
188
189 // Parse all the parameters.
190 net::HttpUtil::NameValuePairsIterator params_iterator(
191 begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL,
192 net::HttpUtil::NameValuePairsIterator::STRICT_QUOTES);
193 while (params_iterator.GetNext()) {
194 if (!IsParameterName(params_iterator.name_begin(),
195 params_iterator.name_end()))
196 return false;
197 std::string name = base::ToLowerASCII(base::StringPiece(
198 params_iterator.name_begin(), params_iterator.name_end()));
199 if (!params_iterator.value_is_quoted() &&
200 params_iterator.value_begin() == params_iterator.value_end())
201 params->insert(std::make_pair(name, base::nullopt_t(0)));
dcheng 2016/04/20 04:41:06 Just write base::nullopt here.
Marijn Kruisselbrink 2016/04/20 21:07:48 Done. I initially wrote this against an older vers
202 else
203 params->insert(std::make_pair(name, params_iterator.value()));
204 }
205 return params_iterator.valid();
206 }
207
208 } // namespace link_header_util
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698