OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
6 | |
7 #include "base/string_util.h" | |
8 #include "base/values.h" | |
9 #include "net/base/escape.h" | |
10 #include "net/url_request/url_request.h" | |
11 | |
12 using base::DictionaryValue; | |
13 using base::ListValue; | |
14 using base::StringPiece; | |
15 | |
16 namespace extensions { | |
17 | |
18 // Implementation of FormDataParser and FormDataParser::Result . | |
19 | |
20 FormDataParser::Result::Result() {} | |
21 FormDataParser::Result::~Result() {} | |
22 | |
23 void FormDataParser::Result::Reset() { | |
24 name_.erase(); | |
25 value_.erase(); | |
26 } | |
27 | |
28 FormDataParser::~FormDataParser() {} | |
29 | |
30 // static | |
31 scoped_ptr<FormDataParser> FormDataParser::Create( | |
32 const net::URLRequest* request) { | |
33 std::string value; | |
34 const bool found = request->extra_request_headers().GetHeader( | |
35 net::HttpRequestHeaders::kContentType, &value); | |
36 return Create(found ? &value : NULL); | |
37 } | |
38 | |
39 // static | |
40 scoped_ptr<FormDataParser> FormDataParser::Create( | |
41 const std::string* content_type_header) { | |
42 enum ParserChoice {kUrlEncoded, kMultipart, kError}; | |
43 ParserChoice choice = kError; | |
44 std::string boundary; | |
45 | |
46 if (content_type_header == NULL) { | |
47 choice = kUrlEncoded; | |
48 } else { | |
49 const std::string content_type( | |
50 content_type_header->substr(0, content_type_header->find(';'))); | |
51 | |
52 if (base::strcasecmp( | |
53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
54 choice = kUrlEncoded; | |
55 } else if (base::strcasecmp( | |
56 content_type.c_str(), "multipart/form-data") == 0) { | |
57 static const char kBoundaryString[] = "boundary="; | |
58 size_t offset = content_type_header->find(kBoundaryString); | |
59 if (offset == std::string::npos) { | |
60 // Malformed header. | |
61 return scoped_ptr<FormDataParser>(); | |
62 } | |
63 offset += strlen(kBoundaryString); | |
64 boundary = content_type_header->substr( | |
65 offset, content_type_header->find(';', offset)); | |
66 if (!boundary.empty()) | |
67 choice = kMultipart; | |
68 } | |
69 } | |
70 // Other cases are unparseable, including when |content_type| is "text/plain". | |
tkent
2012/08/24 14:26:50
Why text/plain is not supported?
vabr (Chromium)
2012/08/24 16:16:59
This encoding is ambiguous.
Nice description from
| |
71 | |
72 switch (choice) { | |
73 case kUrlEncoded: | |
74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
75 case kMultipart: | |
76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
77 default: // In other words, case kError: | |
78 return scoped_ptr<FormDataParser>(); | |
79 } | |
80 } | |
81 | |
82 FormDataParser::FormDataParser() {} | |
83 | |
84 // Implementation of FormDataParserUrlEncoded. | |
85 | |
86 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
87 : source_end_(NULL), | |
88 aborted_(false), | |
89 offset_(NULL), | |
90 equality_signs_(0), | |
91 amp_signs_(0), | |
92 expect_equality_(true) { | |
93 } | |
94 | |
95 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
96 | |
97 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
98 return source_.data() != NULL && | |
99 !aborted_ && | |
100 offset_ == source_end_ && | |
101 equality_signs_ == amp_signs_ + 1; | |
102 } | |
103 | |
104 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
105 result->Reset(); | |
106 if (source_.data() == NULL || aborted_) | |
107 return false; | |
108 if (offset_ == source_end_) | |
109 return false; | |
110 const char* const name_start = &(*offset_); | |
111 char c; | |
112 bool last_read_success = GetNextChar(&c); | |
113 while (last_read_success && c != '=') | |
114 last_read_success = GetNextChar(&c); | |
115 if (!last_read_success) { // This means the data is malformed. | |
116 Abort(); | |
117 return false; | |
118 } | |
119 const char* const name_end = offset_ - 1; | |
120 const std::string encoded_name(name_start, name_end - name_start); | |
121 const net::UnescapeRule::Type unescape_rules = | |
122 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
123 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
124 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); | |
125 | |
126 const char* const value_start = offset_; | |
127 last_read_success = GetNextChar(&c); | |
128 while (last_read_success && c != '&') | |
129 last_read_success = GetNextChar(&c); | |
130 const char* const value_end = | |
131 last_read_success ? offset_ - 1 : offset_; | |
132 const std::string encoded_value(value_start, value_end - value_start); | |
133 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); | |
134 return true; | |
135 } | |
136 | |
137 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) { | |
138 if (source_ != NULL || source.data() == NULL || aborted_) | |
139 return false; | |
140 source_ = source; | |
141 source_end_ = source_.data() + source_.size(); | |
142 offset_ = source_.data(); | |
143 return true; | |
144 } | |
145 | |
146 bool FormDataParserUrlEncoded::GetNextChar(char* c) { | |
147 if (offset_ == source_end_ || aborted_) | |
148 return false; | |
149 *c = *offset_; | |
150 ++offset_; | |
151 | |
152 if (*c == '=') { | |
153 if (expect_equality_) { | |
154 ++equality_signs_; | |
155 expect_equality_ = false; | |
156 } else { | |
157 Abort(); | |
158 return false; | |
159 } | |
160 } | |
161 if (*c == '&' && offset_ != source_end_) { | |
162 if (!expect_equality_) { | |
163 ++amp_signs_; | |
164 expect_equality_ = true; | |
165 } else { | |
166 Abort(); | |
167 return false; | |
168 } | |
169 } | |
170 | |
171 return true; | |
172 } | |
173 | |
174 void FormDataParserUrlEncoded::Abort() { | |
175 aborted_ = true; | |
176 } | |
177 | |
178 // Implementation of FormDataParserMultipart. | |
179 | |
180 FormDataParserMultipart::FormDataParserMultipart( | |
181 const std::string& boundary_separator) | |
182 : source_end_(NULL), | |
183 offset_(NULL), | |
184 dash_boundary_("--" + boundary_separator), | |
185 state_(kStart), | |
186 value_name_present_(false) { | |
187 } | |
188 | |
189 FormDataParserMultipart::~FormDataParserMultipart() {} | |
190 | |
191 bool FormDataParserMultipart::AllDataReadOK() { | |
192 return source_.data() != NULL && InFinalState(); | |
193 } | |
194 | |
195 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
196 if (!value_name_present_ || state_ == kError) | |
197 return false; | |
198 result->set_name(next_name_); | |
199 result->set_value(next_value_); | |
200 next_name_.clear(); | |
201 next_value_.clear(); | |
202 value_name_present_ = ReadNextNameValue(); | |
203 return true; | |
204 } | |
205 | |
206 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) { | |
207 if (state_ == kError || | |
208 source.data() == NULL || | |
209 // Message part across a source split is also an error. | |
210 next_name_.data() != NULL || next_value_.data() != NULL) | |
211 return false; | |
212 if (source_.data() != NULL && offset_ != source_end_){ | |
213 // Try to seek until the end. If no name-value pair is found, this is OK. | |
214 value_name_present_ = ReadNextNameValue(); | |
215 if (!value_name_present_ || offset_ != source_end_) | |
216 return false; | |
217 } | |
218 source_ = source; | |
219 source_end_ = source_.data() + source_.size(); | |
220 offset_ = source_.data(); | |
221 value_name_present_ = ReadNextNameValue(); | |
222 return true; | |
223 } | |
224 | |
225 // static | |
226 char FormDataParserMultipart::kTransitionToChar[] = { | |
227 '\n', // For kLF. | |
228 '\r', // For kCR. | |
229 0, // For kAscii. | |
230 0, // For kLwsp. | |
231 0, // For kDashBoundary. | |
232 ':', // For kColonT. | |
233 '-', // For kDash. | |
234 0, // For kAny. | |
235 }; | |
236 | |
237 // static | |
238 FormDataParserMultipart::Transition | |
239 FormDataParserMultipart::kAvailableTransitions[] = { | |
240 kDashBoundary, kCR, kAny, // For kStart. | |
241 kLF, kAny, // For kCR1. | |
242 kCR, kAny, // For kIgnorePreamble. | |
243 kLwsp, kCR, kAny, // For kDB1. | |
244 kLF, kAny, // For kCR2. | |
245 kAscii, kCR, kAny, // For kPart. | |
246 kAscii, kColonT, kAny, // For kName. | |
247 kLF, kCR, kAny, // For kColonS. | |
248 kCR, kAscii, kAny, // For kEnd1. | |
249 kLF, kCR, kAscii, kAny, // For kEnd2. | |
250 kLwsp, kCR, kAscii, kAny, // For kEnd3. | |
251 kLF, kAny, // For kCR3. | |
252 kDashBoundary, kCR, kAny, // For kPreData. | |
253 kLF, kAny, // For kCR4. | |
254 kCR, kAny, // For kData. | |
255 kDashBoundary, kAny, // For kData2. | |
256 kLwsp, kCR, kDash, kAny, // For kDB2. | |
257 kDash, kAny, // For kD. | |
258 kLwsp, kCR, kAny, // For kEnd. | |
259 kLF, kAny, // For kCR5. | |
260 kAny, // For kIgnoreEpilogue. | |
261 kAny // For kError. | |
262 }; | |
263 | |
264 // static | |
265 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { | |
266 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. | |
267 kStart, kIgnorePreamble, // For kCR1; 05. | |
268 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. | |
269 kDB1, kCR2, kError, // For kDB1; 10. | |
270 kPart, kError, // For kCR2; 12. | |
271 kName, kCR3, kError, // For kPart; 15. | |
272 kName, kColonS, kError, // For kName; 18. | |
273 kEnd1, kEnd2, kColonS, // For kColonS; 21. | |
274 kCR3, kName, kError, // For kEnd1; 24. | |
275 kEnd3, kCR3, kName, kError, // For kEnd2; 28. | |
276 kColonS, kCR3, kName, kError, // For kEnd3; 32. | |
277 kPreData, kError, // For kCR3; 34. | |
278 kDB2, kCR3, kData, // For kPreData; 37. | |
279 kData2, kData, // For kCR4; 39. | |
280 kCR4, kData, // For kData; 41. | |
281 kDB2, kCR4, // For kData2; 43. | |
282 kDB1, kCR2, kD, kError, // For kDB2; 47. | |
283 kEnd, kError, // For kD; 49. | |
284 kEnd, kCR5, kError, // For kEnd; 52. | |
285 kIgnoreEpilogue, kError, // For kCR5; 54. | |
286 kIgnoreEpilogue, // For kIgnoreEpilogue; 55. | |
287 kError // For kError; 56. | |
288 }; | |
289 | |
290 // static | |
291 size_t FormDataParserMultipart::kStateToTransition[] = { | |
292 0u, // For kStart | |
293 3u, // For kCR1 | |
294 5u, // For kIgnorePreamble | |
295 7u, // For kDB1 | |
296 10u, // For kCR2 | |
297 12u, // For kPart | |
298 15u, // For kName | |
299 18u, // For kColonS | |
300 21u, // For kEnd1 | |
301 24u, // For kEnd2 | |
302 28u, // For kEnd3 | |
303 32u, // For kCR3 | |
304 34u, // For kPreData | |
305 37u, // For kCR4 | |
306 39u, // For kData | |
307 41u, // For kData2 | |
308 43u, // For kDB2 | |
309 47u, // For kD | |
310 49u, // For kEnd | |
311 52u, // For kCR5 | |
312 54u, // For kIgnoreEpilogue | |
313 55u, // For kError | |
314 }; | |
315 | |
316 bool FormDataParserMultipart::ReadNextNameValue() { | |
317 if (state_ == kError || source_.data() == NULL || | |
318 next_name_.data() != NULL || next_value_.data() != NULL) | |
319 return false; | |
320 | |
321 // Seek to the next part's headers. | |
322 while (state_ != kPart) { | |
323 if (!DoStep()) | |
324 return false; | |
325 } | |
326 while (state_ != kPreData) { | |
327 const char* header = offset_; | |
328 while (state_ != kColonS) { | |
329 if (!DoStep()) | |
330 return false; | |
331 } | |
332 size_t header_length = 0u; | |
333 while (state_ != kPreData && state_ != kName) { | |
334 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) { | |
335 // The cast is safe, we know that offset only moves forward. | |
336 header_length = static_cast<size_t>(offset_ - header); | |
337 } | |
338 if (!DoStep()) | |
339 return false; | |
340 } | |
341 if (ParseHeader(base::StringPiece(header, header_length))) { | |
342 // Found what we were looking for, just skip to the part's body. | |
343 while (state_ != kPreData) { | |
344 if (!DoStep()) | |
345 return false; | |
346 } | |
347 } | |
348 } | |
349 | |
350 const char* body = offset_; | |
351 size_t body_length = 0; | |
352 while (state_ != kDB2 && offset_ != source_end_) { | |
353 if (!DoStep()) | |
354 return false; | |
355 if (state_ == kCR4) { | |
356 // We are in the middle of which might be the CRLF starting the part | |
357 // separator (see the "delimiter" non-terminal from the grammar given | |
358 // in the header file). The cast is safe, we know that offset only moves | |
359 // forward and body was assigned at least 1 transition ago. | |
360 body_length = static_cast<size_t>(offset_ - body - 1); | |
361 } | |
362 } | |
363 if (body_length > 0) | |
364 next_value_.set(body, body_length); | |
365 return true; | |
366 } | |
367 | |
368 bool FormDataParserMultipart::DoStep() { | |
369 if (state_ == kError || offset_ == source_end_) | |
370 return false; | |
371 size_t transition_index = kStateToTransition[state_]; | |
372 Transition t = kAvailableTransitions[transition_index]; | |
373 while (t != kAny) { | |
374 const State s = kNextState[transition_index]; | |
375 const size_t length = LookUp(t); | |
376 if (length > 0) { | |
377 offset_ += length; | |
378 state_ = s; | |
379 return true; | |
380 } | |
381 t = kAvailableTransitions[++transition_index]; | |
382 } | |
383 // We have kAny, the default choice. Seek by one and switch the state. | |
384 ++offset_; | |
385 state_ = kNextState[transition_index]; | |
386 return true; | |
387 } | |
388 | |
389 // Contract -- the following must be true: offset_ != source_end_ . | |
390 // The idea is to check this only once in the caller (DoStep()), and do not | |
391 // repeat it here every time, as this can be called many times from one call | |
392 // to DoStep(). | |
393 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { | |
394 const char ahead = *offset_; | |
395 const char first_char = kTransitionToChar[t]; | |
396 | |
397 // Easy case: labels corresponding to a single char. | |
398 if (first_char != 0) | |
399 return ahead == first_char ? 1u : 0u; | |
400 | |
401 // Harder cases. | |
402 switch (t) { | |
403 // Multiple alternatives, 1-char long: return immediately. | |
404 case kAscii: | |
405 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u; | |
406 case kLwsp: | |
407 return (ahead == ' ' || ahead == '\t') ? 1u : 0u; | |
408 | |
409 // Longer than 1 char: prepare work for later. | |
410 case kDashBoundary: { | |
411 const size_t length = dash_boundary_.size(); | |
412 // The cast below is safe, we know that the difference is not negative. | |
413 if (static_cast<size_t>(source_end_ - offset_) < length || | |
414 memcmp(dash_boundary_.c_str(), offset_, length) != 0) | |
415 return 0u; | |
416 return length; | |
417 } | |
418 case kAny: | |
419 // We are not supposed to be asked for kAny, but this is the right answer: | |
420 return 1u; | |
421 default: // We never get here -- the rest has already been handled above. | |
422 NOTREACHED(); | |
423 return 0u; | |
424 } | |
425 } | |
426 | |
427 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { | |
428 static const char kContentDisposition[] = "Content-Disposition:"; | |
429 if (memcmp(header.data(), kContentDisposition, | |
430 strlen(kContentDisposition) != 0)) | |
431 return false; | |
432 static const char kNameEquals[] = " name=\""; | |
433 static const char kFilenameEquals[] = " filename=\""; | |
434 | |
435 // Mandatory part: find the name and set it as |next_name_|. | |
436 StringPiece::size_type field_offset = header.find(kNameEquals); | |
437 if (field_offset == StringPiece::npos) | |
438 return false; | |
439 field_offset += strlen(kNameEquals); | |
440 StringPiece::size_type field_end = header.find('"', field_offset); | |
441 if (field_end == StringPiece::npos) | |
442 return false; | |
443 next_name_.set(header.data() + field_offset, field_end - field_offset); | |
444 | |
445 // Optional part: find the filename and set it as |next_value_|. | |
446 field_offset = header.find(kFilenameEquals); | |
447 if (field_offset == StringPiece::npos) | |
448 return true; // This was only optional | |
449 field_offset += strlen(kFilenameEquals); | |
450 field_end = header.find('"', field_offset); | |
451 if (field_end == StringPiece::npos) | |
452 return false; // This is a malformed header. | |
453 next_value_.set(header.data() + field_offset, field_end - field_offset); | |
454 return true; | |
455 } | |
456 | |
457 } // namespace extensions | |
OLD | NEW |