OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/extensions/api/web_request/post_data_parser.h" | |
6 | |
7 #include "base/values.h" | |
8 #include "net/base/escape.h" | |
9 #include "net/base/upload_data.h" | |
10 #include "net/url_request/url_request.h" | |
11 | |
12 namespace { | |
13 const char kContentDisposition[] = "Content-Disposition:"; | |
14 | |
15 // Takes |dictionary| of <string, list of strings> pairs, and gets the list | |
16 // for |key|, creating it if necessary. | |
17 ListValue* GetOrCreateList(DictionaryValue* dictionary, | |
18 const std::string& key) { | |
19 ListValue* list = NULL; | |
20 if (!dictionary->GetList(key, &list)) { | |
21 list = new ListValue(); | |
22 dictionary->Set(key, list); | |
23 } | |
24 return list; | |
25 } | |
26 } | |
wtc
2012/08/03 00:57:19
Nit: add a //namespace comment, like this:
} /
vabr (Chromium)
2012/08/05 18:54:46
Done.
| |
27 | |
28 namespace extensions { | |
29 | |
30 // Implementation of PostDataParser and PostDataParser::Result . | |
31 | |
32 PostDataParser::Result::Result() {} | |
33 PostDataParser::Result::~Result() {} | |
34 | |
35 void PostDataParser::Result::Reset() { | |
36 key_.erase(); | |
37 val_.erase(); | |
38 } | |
39 | |
40 void PostDataParser::Result::SetKey(const base::StringPiece& str) { | |
41 key_.replace(0, std::string::npos, str.data(), str.size()); | |
42 } | |
43 | |
44 void PostDataParser::Result::SetVal(const base::StringPiece& str) { | |
45 val_.replace(0, std::string::npos, str.data(), str.size()); | |
wtc
2012/08/03 00:57:19
I think you can use base::CopyToString (declared i
vabr (Chromium)
2012/08/05 18:54:46
Thanks, that's much more readable!
| |
46 } | |
47 | |
48 void PostDataParser::Result::SetKey(const std::string& str) { | |
49 key_ = str; | |
50 } | |
51 | |
52 void PostDataParser::Result::SetVal(const std::string& str) { | |
53 val_ = str; | |
54 } | |
wtc
2012/08/03 00:57:19
I think these four methods should be named set_key
vabr (Chromium)
2012/08/05 18:54:46
Done.
(For the record, if somebody asks me later:
Matt Perry
2012/08/06 21:06:45
Interesting, I didn't know that! Thanks!
| |
55 | |
56 PostDataParser::~PostDataParser() {} | |
57 | |
58 // static | |
59 scoped_ptr<PostDataParser> PostDataParser::CreatePostDataParser( | |
60 const net::URLRequest* request) { | |
61 std::string value; | |
62 const bool found = request->extra_request_headers().GetHeader( | |
63 net::HttpRequestHeaders::kContentType, &value); | |
64 return CreatePostDataParser(found ? &value : NULL); | |
65 } | |
66 | |
67 // static | |
68 scoped_ptr<PostDataParser> PostDataParser::CreatePostDataParser( | |
69 const std::string* content_type_header) { | |
wtc
2012/08/03 00:57:19
Nit: this argument is named |content_type| in the
vabr (Chromium)
2012/08/05 18:54:46
Done (corrected the header).
| |
70 enum ParserChoice {kUrlEncoded, kMultipart, kError}; | |
71 ParserChoice choice = kError; | |
72 std::string boundary; | |
73 | |
74 if (content_type_header == NULL) { | |
75 choice = kUrlEncoded; | |
76 } else { | |
77 const std::string content_type( | |
78 content_type_header->substr(0, content_type_header->find(';'))); | |
79 if (content_type == "application/x-www-form-urlencoded") { | |
80 choice = kUrlEncoded; | |
81 } else if (content_type == "multipart/form-data") { | |
wtc
2012/08/03 00:57:19
Should we do case-insensitive string comparison in
vabr (Chromium)
2012/08/05 18:54:46
That's a good point. RFC 2388 does not specify cas
| |
82 const char kBoundaryString[] = "boundary="; | |
wtc
2012/08/03 00:57:19
Nit: add 'static'
vabr (Chromium)
2012/08/05 18:54:46
Done.
| |
83 size_t offset = content_type_header->find(kBoundaryString); | |
84 if (offset == std::string::npos) { | |
85 // Malformed header. | |
86 return scoped_ptr<PostDataParser>(); | |
87 } | |
88 offset += strlen(kBoundaryString); | |
89 boundary = content_type_header->substr( | |
90 offset, content_type_header->find(';', offset)); | |
wtc
2012/08/03 00:57:19
I think we should also return scoped_ptr<PostDataP
vabr (Chromium)
2012/08/05 18:54:46
Indeed, thanks for catching this.
| |
91 choice = kMultipart; | |
92 } | |
93 } | |
94 // Other cases are unparseable, including when |content_type| is "text/plain". | |
95 | |
96 switch (choice) { | |
97 case kUrlEncoded: | |
98 return scoped_ptr<PostDataParser>(new PostDataParserUrlEncoded()); | |
99 case kMultipart: | |
100 return scoped_ptr<PostDataParser>(new PostDataParserMultipart(boundary)); | |
101 default: // In other words, case kError: | |
102 return scoped_ptr<PostDataParser>(); | |
103 } | |
104 } | |
105 | |
106 // static | |
107 scoped_ptr<base::DictionaryValue> PostDataParser::ParseURLRequestData( | |
108 const net::URLRequest* request) { | |
109 if (request->method() != "POST") | |
110 return scoped_ptr<base::DictionaryValue>(); | |
111 const std::vector<net::UploadData::Element>* elements = | |
112 request->get_upload()->elements(); | |
113 scoped_ptr<PostDataParser> parser = CreatePostDataParser(request); | |
114 if (parser.get() == NULL) { | |
wtc
2012/08/03 00:57:19
Nit: just do
if (!parser) {
vabr (Chromium)
2012/08/05 18:54:46
Function deleted in the meantime.
| |
115 // No parser means most probably unsupported form encoding. | |
116 return scoped_ptr<base::DictionaryValue>(); | |
117 } | |
118 scoped_ptr<base::DictionaryValue> form_data(new base::DictionaryValue); | |
119 std::vector<net::UploadData::Element>::const_iterator element; | |
120 bool data_valid = true; | |
121 for (element = elements->begin(); | |
122 data_valid && element != elements->end(); ++element) { | |
123 if (element->type() != net::UploadData::TYPE_BYTES) { | |
124 // We do not handle data including blobs or chunks. | |
125 if (element->type() != net::UploadData::TYPE_FILE) | |
126 data_valid = false; | |
wtc
2012/08/03 00:57:19
If we simply return scoped_ptr<base::DictionaryVal
vabr (Chromium)
2012/08/05 18:54:46
Function, and |data_valid| deleted in the meantime
| |
127 continue; | |
128 } | |
129 if (!parser->SetSource(&(element->bytes()))) | |
130 continue; | |
131 Result result; | |
132 while (parser->GetNextPair(&result)) { | |
133 GetOrCreateList(form_data.get(), result.key())->Append( | |
134 new StringValue(result.val())); | |
135 } | |
136 } | |
137 if (data_valid && parser->AllDataReadOK()) | |
138 return form_data.Pass(); | |
139 else | |
140 return scoped_ptr<base::DictionaryValue>(); | |
141 } | |
142 | |
143 // Implementation of PostDataParserUrlEncoded. | |
144 | |
145 PostDataParserUrlEncoded::PostDataParserUrlEncoded() : source_(NULL) {} | |
146 | |
147 PostDataParserUrlEncoded::~PostDataParserUrlEncoded() {} | |
148 | |
149 bool PostDataParserUrlEncoded::AllDataReadOK() { | |
150 return source_ != NULL && offset_ == source_->end(); | |
151 } | |
152 | |
153 bool PostDataParserUrlEncoded::GetNextPair(Result* result) { | |
154 result->Reset(); | |
155 if (source_ == NULL) | |
156 return false; | |
157 if (offset_ == source_->end()) | |
158 return false; | |
159 std::vector<char>::const_iterator seek = offset_; | |
160 // (*) Now we have |seek| >= |offset_| until the end of this function: | |
161 while (seek != source_->end() && *seek != '=') | |
162 ++seek; | |
163 if (seek == source_->end()) { | |
164 // This means the data is malformed. | |
165 offset_ = seek; | |
wtc
2012/08/03 00:57:19
BUG: if you set offset_ to seek here, it'll cause
vabr (Chromium)
2012/08/05 18:54:46
Indeed, very nicely spotted, thanks!
However, thi
| |
166 return false; | |
167 } | |
168 std::string encoded_key(&(*offset_), seek - offset_); // Safe, see (*). | |
wtc
2012/08/03 00:57:19
Is it necessary to do &(*offset_) ? I guess offse
vabr (Chromium)
2012/08/05 18:54:46
I'm afraid it is necessary. offset_ is a iterator,
| |
169 const net::UnescapeRule::Type unescape_rules = | |
170 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
171 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
172 result->SetKey(net::UnescapeURLComponent(encoded_key, unescape_rules)); | |
173 offset_ = ++seek; | |
174 while (seek != source_->end() && *seek != '&') | |
175 ++seek; | |
176 std::string encoded_val(&(*offset_), seek - offset_); // Safe, see (*). | |
177 result->SetVal(net::UnescapeURLComponent(encoded_val, unescape_rules)); | |
178 offset_ = (seek == source_->end()) ? seek : seek + 1; | |
179 return true; | |
180 } | |
181 | |
182 bool PostDataParserUrlEncoded::SetSource(const std::vector<char>* source) { | |
183 if (source_ != NULL) | |
184 return false; | |
wtc
2012/08/03 00:57:19
This allows SetSource() to be called only once. W
vabr (Chromium)
2012/08/05 18:54:46
I don't think Chrome/WebKit splits URLEncoded form
wtc
2012/08/09 22:02:50
Thank you for the explanation.
I was wondering if
vabr (Chromium)
2012/08/10 17:12:55
I can confirm that the each instance of PostDataPa
| |
185 source_ = source; | |
186 offset_ = source_->begin(); | |
187 return true; | |
188 } | |
189 | |
190 // Implementation of PostDataParserMultipart. | |
191 | |
192 PostDataParserMultipart::PostDataParserMultipart( | |
193 const std::string& boundary_separator) | |
194 : source_(NULL), | |
195 length_(0), // Dummy value. | |
196 line_start_(0), // Dummy value. | |
197 line_end_(0), // Dummy value. | |
198 next_line_(0), // Dummy value. | |
199 boundary_("--" + boundary_separator), | |
200 final_boundary_(boundary_ + "--"), | |
201 state_(kInit), | |
202 line_type_(kEmpty) // Dummy value. | |
203 {} | |
wtc
2012/08/03 00:57:19
Nit: the opening curly brace '{' probably should b
vabr (Chromium)
2012/08/05 18:54:46
Done.
| |
204 | |
205 PostDataParserMultipart::~PostDataParserMultipart() {} | |
206 | |
207 bool PostDataParserMultipart::AllDataReadOK() { | |
208 return source_ != NULL && next_line_ >= length_ && state_ == kFinal; | |
209 } | |
210 | |
211 // This function reads one block of the data, between two boundaries. | |
212 // First it reads the header to learn the key, and possibly also the | |
213 // value, if this block is for a file input element. | |
214 // Otherwise it then reads the value from the body. | |
215 bool PostDataParserMultipart::GetNextPair(Result* result) { | |
216 result->Reset(); | |
217 if (state_ == kError) | |
218 return false; | |
219 while (state_ != kHeadRead) { | |
220 if (!DoStep()) | |
221 return false; | |
222 } | |
223 bool val_extracted = false; | |
224 bool name_parsed = ParseHead(result, &val_extracted); | |
225 while (state_ != kBody) { | |
226 if (!DoStep()) | |
227 return false; | |
228 } | |
229 size_t val_start; | |
230 size_t val_end = 0; // Dummy value, replaced below, see (*). | |
231 // There may not be more to read from |source_| if the current result comes | |
232 // from a "file" input element. But then |result| is complete already. | |
233 if (!DoStep()) | |
234 return val_extracted; | |
235 val_start = line_start_; | |
236 // (*) Now state_ == kBody, so val_end gets updated below. | |
237 while (state_ != kHeadStart && state_ != kFinal) { | |
238 val_end = line_end_; | |
239 if (!DoStep()) break; | |
240 } | |
241 if (name_parsed && !val_extracted) { | |
242 result->SetVal(base::StringPiece(source_ + val_start, val_end - val_start)); | |
243 } | |
244 return name_parsed; | |
245 } | |
246 | |
247 bool PostDataParserMultipart::SetSource(const std::vector<char>* source) { | |
248 if (state_ == kError) | |
249 return false; | |
250 if (source_ != NULL && next_line_ < length_) | |
251 return false; | |
252 source_ = &(source->front()); | |
253 length_ = source->size(); | |
254 next_line_ = 0; | |
wtc
2012/08/03 00:57:19
Should we also set line_start_ and line_end_ to 0?
vabr (Chromium)
2012/08/05 18:54:46
We do not need to, they will be set accordingly as
wtc
2012/08/09 22:02:50
No, I don't. My previous suggestion was based on
vabr (Chromium)
2012/08/10 17:12:55
Just to be clear, PostDataParserMultipart::SetSour
| |
255 return true; | |
256 } | |
257 | |
258 bool PostDataParserMultipart::DoStep() { | |
259 if (!SeekNextLine()) | |
260 return false; | |
261 switch (state_) { | |
262 case kInit: | |
263 if (line_type_ == kBoundary) | |
264 state_ = kHeadStart; | |
265 else | |
266 state_ = kError; | |
267 break; | |
268 case kHeadStart: | |
269 if (line_type_ == kDisposition) | |
270 state_ = kHeadRead; | |
271 else | |
272 state_ = kHead; | |
273 break; | |
274 case kHead: | |
275 if (line_type_ == kDisposition) | |
276 state_ = kHeadRead; | |
277 break; | |
278 case kHeadRead: | |
279 if (line_type_ == kEmpty) | |
280 state_ = kBody; | |
281 break; | |
282 case kBody: | |
283 if (line_type_ == kBoundary) | |
284 state_ = kHeadStart; | |
285 else if (line_type_ == kEndBoundary) | |
286 state_ = kFinal; | |
287 break; | |
288 case kFinal: | |
289 if (line_type_ != kEmpty) | |
290 state_ = kError; | |
wtc
2012/08/03 00:57:19
Add a break statement.
vabr (Chromium)
2012/08/05 18:54:46
Done.
| |
291 case kError: | |
292 break; | |
293 } | |
294 return true; | |
295 } | |
296 | |
297 PostDataParserMultipart::LineType PostDataParserMultipart::GetLineType() { | |
298 const size_t line_length = line_end_ - line_start_; | |
299 const base::StringPiece line(source_ + line_start_, line_length); | |
300 if (line == boundary_) | |
301 return kBoundary; | |
302 else if (line == final_boundary_) | |
303 return kEndBoundary; | |
wtc
2012/08/03 00:57:19
Nit: I suggest making "final boundary" and "end bo
vabr (Chromium)
2012/08/05 18:54:46
Done.
| |
304 else if (line.starts_with(kContentDisposition)) | |
305 return kDisposition; | |
306 else if (line_start_ == line_end_) | |
307 return kEmpty; | |
308 else | |
309 return kOther; | |
310 } | |
311 | |
312 // Contract: only to be called from DoStep(). | |
313 bool PostDataParserMultipart::SeekNextLine() { | |
314 if (source_ == NULL || state_ == kError) | |
315 return false; | |
316 if (next_line_ >= length_) | |
317 return false; | |
318 line_start_ = next_line_; | |
319 size_t seek = line_start_; | |
320 while (seek < length_ && *(source_ + seek) != '\r') | |
321 ++seek; | |
322 line_end_ = seek; | |
323 line_type_ = GetLineType(); | |
324 if ((seek+1) < length_ && strncmp(source_ + seek, "\r\n", 2) != 0) | |
325 return false; | |
326 next_line_ = seek + 2; | |
wtc
2012/08/03 00:57:19
If the data does not end with a final "\r\n", this
vabr (Chromium)
2012/08/05 18:54:46
Due to adding a check for garbage at the end, this
| |
327 return true; | |
328 } | |
329 | |
330 // Contract: line_type_ == kDisposition. | |
331 bool PostDataParserMultipart::ParseHead(Result* result, bool* val_extracted) { | |
332 DCHECK_EQ(kDisposition, line_type_); | |
333 base::StringPiece line(source_ + line_start_, line_end_ - line_start_); | |
334 const char kNameEquals[] = " name=\""; | |
335 const char kFilenameEquals[] = " filename=\""; | |
336 size_t key_offset = line.find(kNameEquals); | |
337 if (key_offset == base::StringPiece::npos) | |
338 return false; | |
339 key_offset += strlen(kNameEquals); | |
340 result->SetKey(base::StringPiece(source_ + line_start_ + key_offset, | |
341 line.find('"', key_offset) - key_offset)); | |
342 size_t val_offset = line.find(kFilenameEquals); | |
343 if (val_offset == std::string::npos) { | |
344 *val_extracted = false; | |
345 } else { | |
346 *val_extracted = true; | |
347 val_offset += strlen(kFilenameEquals); | |
348 result->SetVal(base::StringPiece(source_ + line_start_ + val_offset, | |
349 line.find('"', val_offset) - val_offset)); | |
350 } | |
351 return true; | |
352 } | |
353 | |
354 } // namespace extensions | |
OLD | NEW |