OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
6 | |
7 #include "base/string_util.h" | |
8 #include "base/values.h" | |
9 #include "net/base/escape.h" | |
10 #include "net/url_request/url_request.h" | |
11 | |
12 using base::DictionaryValue; | |
13 using base::ListValue; | |
14 using base::StringPiece; | |
15 | |
16 namespace extensions { | |
17 | |
18 // Implementation of FormDataParser and FormDataParser::Result . | |
19 | |
20 FormDataParser::Result::Result() {} | |
21 FormDataParser::Result::~Result() {} | |
22 | |
23 void FormDataParser::Result::Reset() { | |
24 name_.erase(); | |
25 value_.erase(); | |
26 } | |
27 | |
28 FormDataParser::~FormDataParser() {} | |
29 | |
30 // static | |
31 scoped_ptr<FormDataParser> FormDataParser::Create( | |
32 const net::URLRequest* request) { | |
33 std::string value; | |
34 const bool found = request->extra_request_headers().GetHeader( | |
35 net::HttpRequestHeaders::kContentType, &value); | |
36 return Create(found ? &value : NULL); | |
37 } | |
38 | |
39 // static | |
40 scoped_ptr<FormDataParser> FormDataParser::Create( | |
41 const std::string* content_type_header) { | |
42 enum ParserChoice {kUrlEncoded, kMultipart, kError}; | |
43 ParserChoice choice = kError; | |
44 std::string boundary; | |
45 | |
46 if (content_type_header == NULL) { | |
47 choice = kUrlEncoded; | |
48 } else { | |
49 const std::string content_type( | |
50 content_type_header->substr(0, content_type_header->find(';'))); | |
51 | |
52 if (base::strcasecmp( | |
53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
54 choice = kUrlEncoded; | |
55 } else if (base::strcasecmp( | |
56 content_type.c_str(), "multipart/form-data") == 0) { | |
57 static const char kBoundaryString[] = "boundary="; | |
58 size_t offset = content_type_header->find(kBoundaryString); | |
59 if (offset == std::string::npos) { | |
60 // Malformed header. | |
61 return scoped_ptr<FormDataParser>(); | |
62 } | |
63 offset += strlen(kBoundaryString); | |
64 boundary = content_type_header->substr( | |
65 offset, content_type_header->find(';', offset)); | |
66 if (!boundary.empty()) | |
67 choice = kMultipart; | |
68 } | |
69 } | |
70 // Other cases are unparseable, including when |content_type| is "text/plain". | |
71 | |
72 switch (choice) { | |
73 case kUrlEncoded: | |
74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
75 case kMultipart: | |
76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
77 default: // In other words, case kError: | |
78 return scoped_ptr<FormDataParser>(); | |
79 } | |
80 } | |
81 | |
82 FormDataParser::FormDataParser() {} | |
83 | |
84 // Implementation of FormDataParserUrlEncoded. | |
85 | |
86 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
87 : source_(NULL), | |
88 aborted_(false), | |
89 equality_signs_(0), | |
90 amp_signs_(0), | |
91 expect_equality_(true) { | |
92 } | |
93 | |
94 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
95 | |
96 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
97 return source_ != NULL && | |
98 !aborted_ && | |
99 offset_ == source_->end() && | |
100 equality_signs_ == amp_signs_ + 1; | |
101 } | |
102 | |
103 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
104 result->Reset(); | |
105 if (source_ == NULL || aborted_) | |
106 return false; | |
107 if (offset_ == source_->end()) | |
108 return false; | |
109 const char* const name_start = &(*offset_); | |
110 char c; | |
111 bool last_read_success = GetNextChar(&c); | |
112 while (last_read_success && c != '=') | |
113 last_read_success = GetNextChar(&c); | |
114 if (!last_read_success) { // This means the data is malformed. | |
115 Abort(); | |
116 return false; | |
117 } | |
118 const char* const name_end = &(*(offset_ - 1)); | |
119 const std::string encoded_name(name_start, name_end - name_start); | |
120 const net::UnescapeRule::Type unescape_rules = | |
121 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
122 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
123 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); | |
124 | |
125 const char* const value_start = &(*offset_); | |
126 last_read_success = GetNextChar(&c); | |
127 while (last_read_success && c != '&') | |
128 last_read_success = GetNextChar(&c); | |
129 const char* const value_end = | |
130 last_read_success ? &(*(offset_ - 1)) : &(*offset_); | |
131 const std::string encoded_value(value_start, value_end - value_start); | |
132 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); | |
133 return true; | |
134 } | |
135 | |
136 bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) { | |
137 if (source_ != NULL || source == NULL || aborted_) | |
138 return false; | |
139 source_ = source; | |
140 offset_ = source_->begin(); | |
141 return true; | |
142 } | |
143 | |
144 bool FormDataParserUrlEncoded::GetNextChar(char* c) { | |
145 if (offset_ == source_->end() || aborted_) | |
146 return false; | |
147 *c = *offset_; | |
148 ++offset_; | |
149 | |
150 if (*c == '=') { | |
151 if (expect_equality_) { | |
152 ++equality_signs_; | |
153 expect_equality_ = false; | |
154 } else { | |
155 Abort(); | |
156 return false; | |
157 } | |
158 } | |
159 if (*c == '&' && offset_ != source_->end()) { | |
160 if (!expect_equality_) { | |
161 ++amp_signs_; | |
162 expect_equality_ = true; | |
163 } else { | |
164 Abort(); | |
165 return false; | |
166 } | |
167 } | |
168 | |
169 return true; | |
170 } | |
171 | |
172 void FormDataParserUrlEncoded::Abort() { | |
173 aborted_ = true; | |
174 } | |
175 | |
176 // Implementation of FormDataParserMultipart. | |
177 | |
178 FormDataParserMultipart::FormDataParserMultipart( | |
179 const std::string& boundary_separator) | |
180 : source_(NULL), | |
181 dash_boundary_("--" + boundary_separator), | |
182 state_(kStart), | |
183 value_name_present_(false) { | |
184 } | |
185 | |
186 FormDataParserMultipart::~FormDataParserMultipart() {} | |
187 | |
188 bool FormDataParserMultipart::AllDataReadOK() { | |
189 return source_ != NULL && InFinalState(); | |
190 } | |
191 | |
192 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
193 if (!value_name_present_ || state_ == kError) | |
194 return false; | |
195 result->set_name(next_name_); | |
196 result->set_value(next_value_); | |
197 next_name_.clear(); | |
198 next_value_.clear(); | |
199 value_name_present_ = ReadNextNameValue(); | |
200 return true; | |
201 } | |
202 | |
203 bool FormDataParserMultipart::SetSource(const std::vector<char>* source) { | |
204 if (state_ == kError || | |
205 source == NULL || | |
206 // Message part across a source split is also an error. | |
207 next_name_.data() != NULL || next_value_.data() != NULL) | |
208 return false; | |
209 if (source_ != NULL && offset_ != source_->end()){ | |
210 // Try to seek until the end. If no name-value pair is found, this is OK. | |
211 value_name_present_ = ReadNextNameValue(); | |
212 if (!value_name_present_ || offset_ != source_->end()) | |
213 return false; | |
214 } | |
215 source_ = source; | |
216 offset_ = source_->begin(); | |
217 value_name_present_ = ReadNextNameValue(); | |
218 return true; | |
219 } | |
220 | |
221 // static | |
222 char FormDataParserMultipart::kTransitionToChar[] = { | |
223 '\n', // For kLF. | |
224 '\r', // For kCR. | |
225 0, // For kAscii. | |
226 0, // For kLwsp. | |
227 0, // For kDashBoundary. | |
228 ':', // For kColonT. | |
229 '-', // For kDash. | |
230 0, // For kAny. | |
231 }; | |
232 | |
233 // static | |
234 FormDataParserMultipart::Transition | |
235 FormDataParserMultipart::kAvailableTransitions[] = { | |
236 kDashBoundary, kCR, kAny, // For kStart. | |
237 kLF, kAny, // For kCR1. | |
238 kCR, kAny, // For kIgnorePreamble. | |
239 kLwsp, kCR, kAny, // For kDB1. | |
240 kLF, kAny, // For kCR2. | |
241 kAscii, kCR, kAny, // For kPart. | |
242 kAscii, kColonT, kAny, // For kName. | |
243 kLF, kCR, kAny, // For kColonS. | |
244 kCR, kAscii, kAny, // For kEnd1. | |
245 kLF, kCR, kAscii, kAny, // For kEnd2. | |
246 kLwsp, kCR, kAscii, kAny, // For kEnd3. | |
247 kLF, kAny, // For kCR3. | |
248 kDashBoundary, kCR, kAny, // For kPreData. | |
249 kLF, kAny, // For kCR4. | |
250 kCR, kAny, // For kData. | |
251 kDashBoundary, kAny, // For kData2. | |
252 kLwsp, kCR, kDash, kAny, // For kDB2. | |
253 kDash, kAny, // For kD. | |
254 kLwsp, kCR, kAny, // For kEnd. | |
255 kLF, kAny, // For kCR5. | |
256 kAny, // For kIgnoreEpilogue. | |
257 kAny // For kError. | |
258 }; | |
259 | |
260 // static | |
261 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { | |
262 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. | |
263 kStart, kIgnorePreamble, // For kCR1; 05. | |
264 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. | |
265 kDB1, kCR2, kError, // For kDB1; 10. | |
266 kPart, kError, // For kCR2; 12. | |
267 kName, kCR3, kError, // For kPart; 15. | |
268 kName, kColonS, kError, // For kName; 18. | |
269 kEnd1, kEnd2, kColonS, // For kColonS; 21. | |
270 kCR3, kName, kError, // For kEnd1; 24. | |
271 kEnd3, kCR3, kName, kError, // For kEnd2; 28. | |
272 kColonS, kCR3, kName, kError, // For kEnd3; 32. | |
273 kPreData, kError, // For kCR3; 34. | |
274 kDB2, kCR3, kData, // For kPreData; 37. | |
275 kData2, kData, // For kCR4; 39. | |
276 kCR4, kData, // For kData; 41. | |
277 kDB2, kCR4, // For kData2; 43. | |
278 kDB1, kCR2, kD, kError, // For kDB2; 47. | |
279 kEnd, kError, // For kD; 49. | |
280 kEnd, kCR5, kError, // For kEnd; 52. | |
281 kIgnoreEpilogue, kError, // For kCR5; 54. | |
282 kIgnoreEpilogue, // For kIgnoreEpilogue; 55. | |
283 kError // For kError; 56. | |
284 }; | |
285 | |
286 // static | |
287 size_t FormDataParserMultipart::kStateToTransition[] = { | |
288 0u, // For kStart | |
289 3u, // For kCR1 | |
290 5u, // For kIgnorePreamble | |
291 7u, // For kDB1 | |
292 10u, // For kCR2 | |
293 12u, // For kPart | |
294 15u, // For kName | |
295 18u, // For kColonS | |
296 21u, // For kEnd1 | |
297 24u, // For kEnd2 | |
298 28u, // For kEnd3 | |
299 32u, // For kCR3 | |
300 34u, // For kPreData | |
301 37u, // For kCR4 | |
302 39u, // For kData | |
303 41u, // For kData2 | |
304 43u, // For kDB2 | |
305 47u, // For kD | |
306 49u, // For kEnd | |
307 52u, // For kCR5 | |
308 54u, // For kIgnoreEpilogue | |
309 55u, // For kError | |
310 }; | |
311 | |
312 bool FormDataParserMultipart::ReadNextNameValue() { | |
313 if (state_ == kError || source_ == NULL || | |
314 next_name_.data() != NULL || next_value_.data() != NULL) | |
315 return false; | |
316 | |
317 // Seek to the next part's headers. | |
318 while (state_ != kPart) | |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
319 if (!DoStep()) | |
320 return false; | |
321 while (state_ != kPreData) { | |
322 const char* header = &(*offset_); | |
323 while (state_ != kColonS) | |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
324 if (!DoStep()) | |
325 return false; | |
326 size_t header_length = 0u; | |
327 while (state_ != kPreData && state_ != kName) { | |
328 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) | |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
329 // The cast is safe, we know that offset only moves forward. | |
330 header_length = static_cast<size_t>(&(*offset_) - header); | |
331 if (!DoStep()) | |
332 return false; | |
333 } | |
334 if (ParseHeader(base::StringPiece(header, header_length))) { | |
335 // Found what we were looking for, just skip to the part's body. | |
336 while (state_ != kPreData) | |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
337 if (!DoStep()) | |
338 return false; | |
339 } | |
340 } | |
341 | |
342 const char* body = &(*offset_); | |
343 size_t body_length = 0; | |
344 while (state_ != kDB2 && offset_ != source_->end()) { | |
345 if (!DoStep()) | |
346 return false; | |
347 if (state_ == kCR4) | |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
348 // We are in the middle of which might be the CRLF starting the part | |
349 // separator (see the "delimiter" non-terminal from the grammar given | |
350 // in the header file). The cast is safe, we know that offset only moves | |
351 // forward and body was assigned at least 1 transition ago. | |
352 body_length = static_cast<size_t>(&(*offset_) - body - 1); | |
353 } | |
354 if (body_length > 0) | |
355 next_value_.set(body, body_length); | |
356 return true; | |
357 } | |
358 | |
359 bool FormDataParserMultipart::DoStep() { | |
360 if (state_ == kError || source_ == NULL || offset_ == source_->end()) | |
361 return false; | |
362 size_t transition_index = kStateToTransition[state_]; | |
363 Transition t = kAvailableTransitions[transition_index]; | |
364 while (t != kAny) { | |
365 const State s = kNextState[transition_index]; | |
366 const size_t length = LookUp(t); | |
367 if (length > 0) { | |
368 offset_ += length; | |
369 state_ = s; | |
370 return true; | |
371 } | |
372 t = kAvailableTransitions[++transition_index]; | |
373 } | |
374 // We have kAny, the default choice. Seek by one and switch the state. | |
375 ++offset_; | |
376 state_ = kNextState[transition_index]; | |
377 return true; | |
378 } | |
379 | |
380 // Contract -- the following must be true: | |
381 // source_ != NULL && offset_ != source.end() | |
382 // The idea is to check this only once in the caller (DoStep()), and do not | |
383 // repeat it here every time, as this can be called many times from one call | |
384 // to DoStep(). | |
385 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { | |
386 const char ahead = *offset_; | |
387 const char first_char = kTransitionToChar[t]; | |
388 | |
389 // Easy case: labels corresponding to a single char. | |
390 if (first_char != 0) | |
391 return ahead == first_char ? 1u : 0u; | |
392 | |
393 // Harder cases. | |
394 switch (t) { | |
395 // Multiple alternatives, 1-char long: return immediately. | |
396 case kAscii: | |
397 return ahead >= 33 && ahead <= 126 && ahead != ':' ? 1u : 0u; | |
398 case kLwsp: | |
399 return ahead == ' ' || ahead == '\t' ? 1u : 0u; | |
battre
2012/08/16 19:18:03
nit: () around condition, also above
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
400 | |
401 // Longer than 1 char: prepare work for later. | |
402 case kDashBoundary: { | |
403 const size_t length = dash_boundary_.size(); | |
404 // The cast below is safe, we know that the difference is not negative. | |
405 if (static_cast<size_t>(source_->end() - offset_) < length || | |
406 memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0) | |
vabr (Chromium)
2012/08/16 08:00:59
Doing this each time is unnecessary expensive. It
| |
407 return 0u; | |
408 return length; | |
409 } | |
410 case kAny: | |
411 // We are not supposed to be asked for kAny, but this is the right answer: | |
412 return 1u; | |
413 default: // We never get here -- the rest has already been handled above. | |
414 NOTREACHED(); | |
415 return 0u; | |
416 } | |
417 } | |
418 | |
419 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { | |
420 static const char kContentDisposition[] = "Content-Disposition:"; | |
421 if (memcmp( | |
422 header.data(), kContentDisposition, strlen(kContentDisposition) != 0)) | |
battre
2012/08/16 19:18:03
I would move header.data(), kContentDisposition, t
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
423 return false; | |
424 static const char kNameEquals[] = " name=\""; | |
425 static const char kFilenameEquals[] = " filename=\""; | |
426 | |
427 // Mandatory part: find the name and set it as |next_name_|. | |
428 StringPiece::size_type field_offset = header.find(kNameEquals); | |
429 if (field_offset == StringPiece::npos) | |
430 return false; | |
431 field_offset += strlen(kNameEquals); | |
432 StringPiece::size_type field_end = header.find('"', field_offset); | |
433 if (field_end == StringPiece::npos) | |
434 return false; | |
435 next_name_.set(header.data() + field_offset, field_end - field_offset); | |
436 | |
437 // Optional part: find the filename and set it as |next_value_|. | |
438 field_offset = header.find(kFilenameEquals); | |
439 if (field_offset == StringPiece::npos) | |
440 return true; // This was only optional | |
441 field_offset += strlen(kFilenameEquals); | |
442 field_end = header.find('"', field_offset); | |
443 if (field_end == StringPiece::npos) | |
444 return false; // This is a malformed header. | |
445 next_value_.set(header.data() + field_offset, field_end - field_offset); | |
446 return true; | |
447 } | |
448 | |
449 } // namespace extensions | |
OLD | NEW |