OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/link_header_util/link_header_util.h" | |
6 | |
7 #include "base/strings/string_util.h" | |
8 #include "net/http/http_util.h" | |
9 | |
10 namespace link_header_util { | |
11 | |
12 namespace { | |
13 | |
14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. | |
15 // Takes the parsing of StringTokenizer and adds support for quoted strings that | |
16 // are quoted by matching <> (and does not support escaping in those strings). | |
17 // Also has the behavior of ValuesIterator where it strips whitespace from all | |
18 // values and only outputs non-empty values. | |
19 // Only supports ',' as separator and supports '' "" and <> as quote chars. | |
20 class ValueTokenizer { | |
21 public: | |
22 ValueTokenizer(std::string::const_iterator begin, | |
23 std::string::const_iterator end) | |
24 : token_begin_(begin), token_end_(begin), end_(end) {} | |
25 | |
26 std::string::const_iterator token_begin() const { return token_begin_; } | |
27 std::string::const_iterator token_end() const { return token_end_; } | |
28 | |
29 bool GetNext() { | |
30 while (GetNextInternal()) { | |
31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_); | |
32 | |
33 // Only return non-empty values. | |
34 if (token_begin_ != token_end_) | |
35 return true; | |
36 } | |
37 return false; | |
38 } | |
39 | |
40 private: | |
41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next | |
42 // token. Returns false if end-of-string was reached first. | |
43 bool GetNextInternal() { | |
44 // First time this is called token_end_ points to the first character in the | |
45 // input. Every other time token_end_ points to the delimiter at the end of | |
46 // the last returned token (which could be the end of the string). | |
47 | |
48 // End of string, return false. | |
49 if (token_end_ == end_) | |
50 return false; | |
51 | |
52 // Skip past the delimiter. | |
53 if (*token_end_ == ',') | |
54 ++token_end_; | |
55 | |
56 // Make token_begin_ point to the beginning of the next token, and search | |
57 // for the end of the token in token_end_. | |
58 token_begin_ = token_end_; | |
59 | |
60 // Set to true if we're currently inside a quoted string. | |
61 bool in_quote = false; | |
62 // Set to true if we're currently inside a quoted string, and have just | |
63 // encountered an escape character. In this case a closing quote will be | |
64 // ignored. | |
65 bool in_escape = false; | |
66 // If currently in a quoted string, this is the character that (when not | |
67 // escaped) indicates the end of the string. | |
68 char quote_close_char = '\0'; | |
69 // If currently in a quoted string, this is set to true if it is possible to | |
70 // escape the closing quote using '\'. | |
71 bool quote_allows_escape = false; | |
72 | |
73 while (token_end_ != end_) { | |
74 char c = *token_end_; | |
75 if (in_quote) { | |
76 if (in_escape) { | |
77 in_escape = false; | |
78 } else if (quote_allows_escape && c == '\\') { | |
79 in_escape = true; | |
80 } else if (c == quote_close_char) { | |
81 in_quote = false; | |
82 } | |
83 } else { | |
84 if (c == ',') | |
85 break; | |
86 if (c == '\'' || c == '"' || c == '<') { | |
87 in_quote = true; | |
88 quote_close_char = (c == '<' ? '>' : c); | |
89 quote_allows_escape = (c != '<'); | |
90 } | |
91 } | |
92 ++token_end_; | |
93 } | |
94 return true; | |
95 } | |
96 | |
97 std::string::const_iterator token_begin_; | |
98 std::string::const_iterator token_end_; | |
99 std::string::const_iterator end_; | |
100 }; | |
101 | |
102 // TODO(mek): Figure out if it makes sense to move this function to | |
103 // net::HttpUtil, as it implements the "parmname" rule from | |
104 // https://tools.ietf.org/html/rfc5987#section-3.2.1 | |
105 bool IsParameterName(std::string::const_iterator begin, | |
106 std::string::const_iterator end) { | |
107 if (!net::HttpUtil::IsToken(begin, end)) | |
108 return false; | |
109 for (auto it = begin; it != end; ++it) { | |
110 if (*it == '*' || *it == '\'' || *it == '%') | |
111 return false; | |
112 } | |
113 return true; | |
114 } | |
115 | |
116 // Parses the URL part of a Link header. When successful |url_begin| points | |
117 // to the beginning of the url, |url_end| points to the end of the url and | |
118 // |params_begin| points to the first character after the '>' character at the | |
119 // end of the url. | |
120 bool ExtractURL(std::string::const_iterator begin, | |
121 std::string::const_iterator end, | |
122 std::string::const_iterator* url_begin, | |
123 std::string::const_iterator* url_end, | |
124 std::string::const_iterator* params_begin) { | |
125 // Extract the URL part (everything between '<' and first '>' character). | |
126 if (*begin != '<') | |
127 return false; | |
128 | |
129 ++begin; | |
130 *url_begin = begin; | |
131 *url_end = std::find(begin, end, '>'); | |
132 | |
133 // Fail if we did not find a '>'. | |
134 if (*url_end == end) | |
135 return false; | |
136 | |
137 *params_begin = *url_end; | |
138 // Skip the '>' at the end of the URL. | |
139 ++*params_begin; | |
140 | |
141 // Trim whitespace from the URL. | |
142 net::HttpUtil::TrimLWS(url_begin, url_end); | |
143 return true; | |
144 } | |
145 | |
146 } // namespace | |
147 | |
148 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { | |
149 std::vector<StringIteratorPair> values; | |
150 ValueTokenizer tokenizer(header.begin(), header.end()); | |
151 while (tokenizer.GetNext()) { | |
152 values.push_back( | |
153 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); | |
154 } | |
155 return values; | |
156 } | |
157 | |
158 // Parses one link in a link header into its url and parameters. | |
159 // A link is of the form "<some-url>; param1=value1; param2=value2". | |
160 // Returns false if parsing the link failed, returns true on success. This | |
161 // method is more lenient than the RFC. It doesn't fail on things like invalid | |
162 // characters in the URL, and also doesn't verify that certain parameters should | |
163 // or shouldn't be quoted strings. | |
164 // If a parameter occurs more than once in the link, only the first value is | |
165 // returned in params as this is the required behavior for all attributes chrome | |
166 // currently cares about in link headers. | |
167 bool ParseLinkHeaderValue( | |
168 std::string::const_iterator begin, | |
169 std::string::const_iterator end, | |
170 std::string* url, | |
171 std::unordered_map<std::string, base::Optional<std::string>>* params) { | |
172 // Can't parse an empty string. | |
173 if (begin == end) | |
174 return false; | |
175 | |
176 // Extract the URL part (everything between '<' and first '>' character). | |
177 std::string::const_iterator url_begin; | |
178 std::string::const_iterator url_end; | |
179 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin)) | |
180 return false; | |
181 *url = std::string(url_begin, url_end); | |
182 | |
183 // Trim any remaining whitespace, and make sure there is a ';' separating | |
184 // parameters from the URL. | |
185 net::HttpUtil::TrimLWS(&begin, &end); | |
186 if (begin != end && *begin != ';') | |
187 return false; | |
188 | |
189 // Parse all the parameters. | |
190 net::HttpUtil::NameValuePairsIterator params_iterator( | |
191 begin, end, ';', net::HttpUtil::NameValuePairsIterator::VALUES_OPTIONAL, | |
192 net::HttpUtil::NameValuePairsIterator::STRICT_QUOTES); | |
193 while (params_iterator.GetNext()) { | |
194 if (!IsParameterName(params_iterator.name_begin(), | |
195 params_iterator.name_end())) | |
196 return false; | |
197 std::string name = base::ToLowerASCII(base::StringPiece( | |
198 params_iterator.name_begin(), params_iterator.name_end())); | |
199 if (!params_iterator.value_is_quoted() && | |
200 params_iterator.value_begin() == params_iterator.value_end()) | |
201 params->insert(std::make_pair(name, base::nullopt_t(0))); | |
dcheng
2016/04/20 04:41:06
Just write base::nullopt here.
Marijn Kruisselbrink
2016/04/20 21:07:48
Done. I initially wrote this against an older vers
| |
202 else | |
203 params->insert(std::make_pair(name, params_iterator.value())); | |
204 } | |
205 return params_iterator.valid(); | |
206 } | |
207 | |
208 } // namespace link_header_util | |
OLD | NEW |