OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/link_header_util/link_header_util.h" | |
6 | |
7 #include "base/strings/string_util.h" | |
8 #include "net/http/http_util.h" | |
9 | |
10 namespace link_header_util { | |
11 | |
12 namespace { | |
13 | |
14 // A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. | |
15 // Takes the parsing of StringTokenizer and adds support for quoted strings that | |
16 // are quoted by matching <> (and does not support escaping in those strings). | |
17 // Also has the behavior of ValuesIterator where it strips whitespace from all | |
18 // values and only outputs non-empty values. | |
19 // Only supports ',' as separator and supports '' "" and <> as quote chars. | |
20 class ValueTokenizer { | |
21 public: | |
22 ValueTokenizer(std::string::const_iterator begin, | |
23 std::string::const_iterator end) | |
24 : token_begin_(begin), token_end_(begin), end_(end) {} | |
25 | |
26 std::string::const_iterator token_begin() const { return token_begin_; } | |
27 std::string::const_iterator token_end() const { return token_end_; } | |
28 | |
29 bool GetNext() { | |
30 while (GetNextInternal()) { | |
31 net::HttpUtil::TrimLWS(&token_begin_, &token_end_); | |
32 | |
33 // Only return non-empty values. | |
34 if (token_begin_ != token_end_) | |
35 return true; | |
36 } | |
37 return false; | |
38 } | |
39 | |
40 private: | |
41 // Updates token_begin_ and token_end_ to point to the (possibly empty) next | |
42 // token. Returns false if end-of-string was reached first. | |
43 bool GetNextInternal() { | |
44 // First time this is called token_end_ points to the first character in the | |
45 // input. Every other time token_end_ points to the delimiter at the end of | |
46 // the last returned token (which could be the end of the string). | |
47 | |
48 // End of string, return false. | |
49 if (token_end_ == end_) | |
50 return false; | |
51 | |
52 // Skip past the delimiter. | |
53 if (*token_end_ == ',') | |
54 ++token_end_; | |
55 | |
56 // Make token_begin_ point to the beginning of the next token, and search | |
57 // for the end of the token in token_end_. | |
58 token_begin_ = token_end_; | |
59 | |
60 // Set to true if we're currently inside a quoted string. | |
61 bool in_quote = false; | |
62 // Set to true if we're currently inside a quoted string, and have just | |
63 // encountered an escape character. In this case a closing quote will be | |
64 // ignored. | |
65 bool in_escape = false; | |
66 // If currently in a quoted string, this is the character that (when not | |
67 // escaped) indicates the end of the string. | |
68 char quote_close_char = '\0'; | |
69 // If currently in a quoted string, this is set to true if it is possible to | |
70 // escape the closing quote using '\'. | |
71 bool quote_allows_escape = false; | |
72 | |
73 while (token_end_ != end_) { | |
74 char c = *token_end_; | |
75 if (in_quote) { | |
76 if (in_escape) { | |
77 in_escape = false; | |
78 } else if (quote_allows_escape && c == '\\') { | |
79 in_escape = true; | |
80 } else if (c == quote_close_char) { | |
81 in_quote = false; | |
82 } | |
83 } else { | |
84 if (c == ',') | |
85 break; | |
86 if (c == '\'' || c == '"' || c == '<') { | |
87 in_quote = true; | |
88 quote_close_char = (c == '<' ? '>' : c); | |
89 quote_allows_escape = (c != '<'); | |
90 } | |
91 } | |
92 ++token_end_; | |
93 } | |
94 return true; | |
95 } | |
96 | |
97 std::string::const_iterator token_begin_; | |
98 std::string::const_iterator token_end_; | |
99 std::string::const_iterator end_; | |
100 }; | |
jochen (gone - plz use gerrit)
2016/04/27 13:14:33
disallow copy/assign
| |
101 | |
102 // Parses the URL part of a Link header. When successful |url_begin| points | |
103 // to the beginning of the url, |url_end| points to the end of the url and | |
104 // |params_begin| points to the first character after the '>' character at the | |
105 // end of the url. | |
106 bool ExtractURL(std::string::const_iterator begin, | |
107 std::string::const_iterator end, | |
108 std::string::const_iterator* url_begin, | |
109 std::string::const_iterator* url_end, | |
110 std::string::const_iterator* params_begin) { | |
111 // Extract the URL part (everything between '<' and first '>' character). | |
112 if (*begin != '<') | |
113 return false; | |
114 | |
115 ++begin; | |
116 *url_begin = begin; | |
117 *url_end = std::find(begin, end, '>'); | |
118 | |
119 // Fail if we did not find a '>'. | |
120 if (*url_end == end) | |
121 return false; | |
122 | |
123 *params_begin = *url_end; | |
124 // Skip the '>' at the end of the URL. | |
125 ++*params_begin; | |
126 | |
127 // Trim whitespace from the URL. | |
128 net::HttpUtil::TrimLWS(url_begin, url_end); | |
129 return true; | |
130 } | |
131 | |
132 } // namespace | |
133 | |
134 std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { | |
135 std::vector<StringIteratorPair> values; | |
136 ValueTokenizer tokenizer(header.begin(), header.end()); | |
137 while (tokenizer.GetNext()) { | |
138 values.push_back( | |
139 StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); | |
140 } | |
141 return values; | |
142 } | |
143 | |
144 // Parses one link in a link header into its url and parameters. | |
145 // A link is of the form "<some-url>; param1=value1; param2=value2". | |
146 // Returns false if parsing the link failed, returns true on success. This | |
147 // method is more lenient than the RFC. It doesn't fail on things like invalid | |
148 // characters in the URL, and also doesn't verify that certain parameters should | |
149 // or shouldn't be quoted strings. | |
150 // If a parameter occurs more than once in the link, only the first value is | |
151 // returned in params as this is the required behavior for all attributes chrome | |
152 // currently cares about in link headers. | |
153 bool ParseLinkHeaderValue( | |
154 std::string::const_iterator begin, | |
155 std::string::const_iterator end, | |
156 std::string* url, | |
157 std::unordered_map<std::string, base::Optional<std::string>>* params) { | |
158 // Can't parse an empty string. | |
159 if (begin == end) | |
160 return false; | |
161 | |
162 // Extract the URL part (everything between '<' and first '>' character). | |
163 std::string::const_iterator url_begin; | |
164 std::string::const_iterator url_end; | |
165 if (!ExtractURL(begin, end, &url_begin, &url_end, &begin)) | |
166 return false; | |
167 *url = std::string(url_begin, url_end); | |
168 | |
169 // Trim any remaining whitespace, and make sure there is a ';' separating | |
170 // parameters from the URL. | |
171 net::HttpUtil::TrimLWS(&begin, &end); | |
172 if (begin != end && *begin != ';') | |
173 return false; | |
174 | |
175 // Parse all the parameters. | |
176 net::HttpUtil::NameValuePairsIterator params_iterator( | |
177 begin, end, ';', net::HttpUtil::NameValuePairsIterator::Values::OPTIONAL, | |
178 net::HttpUtil::NameValuePairsIterator::Quotes::STRICT); | |
179 while (params_iterator.GetNext()) { | |
180 if (!net::HttpUtil::IsParmName(params_iterator.name_begin(), | |
181 params_iterator.name_end())) | |
182 return false; | |
183 std::string name = base::ToLowerASCII(base::StringPiece( | |
184 params_iterator.name_begin(), params_iterator.name_end())); | |
185 if (!params_iterator.value_is_quoted() && | |
186 params_iterator.value_begin() == params_iterator.value_end()) | |
187 params->insert(std::make_pair(name, base::nullopt)); | |
188 else | |
189 params->insert(std::make_pair(name, params_iterator.value())); | |
190 } | |
191 return params_iterator.valid(); | |
192 } | |
193 | |
194 } // namespace link_header_util | |
OLD | NEW |