OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
6 | |
7 #include <vector> | |
8 | |
9 #include "base/lazy_instance.h" | |
10 #include "base/logging.h" | |
11 #include "base/macros.h" | |
12 #include "base/strings/string_util.h" | |
13 #include "base/values.h" | |
14 #include "net/base/escape.h" | |
15 #include "net/url_request/url_request.h" | |
16 #include "third_party/re2/re2/re2.h" | |
17 | |
18 using base::DictionaryValue; | |
19 using base::ListValue; | |
20 using base::StringPiece; | |
21 using re2::RE2; | |
22 | |
23 namespace extensions { | |
24 | |
25 namespace { | |
26 | |
27 const char kContentDisposition[] = "content-disposition:"; | |
28 const size_t kContentDispositionLength = arraysize(kContentDisposition) - 1; | |
29 // kCharacterPattern is an allowed character in a URL encoding. Definition is | |
30 // from RFC 1738, end of section 2.2. | |
31 const char kCharacterPattern[] = | |
32 "(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))"; | |
33 const char kEscapeClosingQuote[] = "\\\\E"; | |
34 | |
35 // A wrapper struct for static RE2 objects to be held as LazyInstance. | |
36 struct Patterns { | |
37 Patterns(); | |
38 ~Patterns(); | |
39 const RE2 transfer_padding_pattern; | |
40 const RE2 crlf_pattern; | |
41 const RE2 closing_pattern; | |
42 const RE2 epilogue_pattern; | |
43 const RE2 crlf_free_pattern; | |
44 const RE2 preamble_pattern; | |
45 const RE2 header_pattern; | |
46 const RE2 content_disposition_pattern; | |
47 const RE2 name_pattern; | |
48 const RE2 value_pattern; | |
49 const RE2 unquote_pattern; | |
50 const RE2 url_encoded_pattern; | |
51 }; | |
52 | |
53 Patterns::Patterns() | |
54 : transfer_padding_pattern("[ \\t]*\\r\\n"), | |
55 crlf_pattern("\\r\\n"), | |
56 closing_pattern("--[ \\t]*"), | |
57 epilogue_pattern("|\\r\\n(?s:.)*"), | |
58 crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"), | |
59 preamble_pattern(".+?"), | |
60 header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"), | |
61 content_disposition_pattern(std::string("(?i:") + kContentDisposition + | |
62 ")"), | |
63 name_pattern("\\bname=\"([^\"]*)\""), | |
64 value_pattern("\\bfilename=\"([^\"]*)\""), | |
65 unquote_pattern(kEscapeClosingQuote), | |
66 url_encoded_pattern(std::string("(") + kCharacterPattern + "*)=(" + | |
67 kCharacterPattern + | |
68 "*)") { | |
69 } | |
70 | |
71 Patterns::~Patterns() {} | |
72 | |
73 base::LazyInstance<Patterns>::Leaky g_patterns = LAZY_INSTANCE_INITIALIZER; | |
74 | |
75 } // namespace | |
76 | |
77 // Parses URLencoded forms, see | |
78 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . | |
79 class FormDataParserUrlEncoded : public FormDataParser { | |
80 public: | |
81 FormDataParserUrlEncoded(); | |
82 virtual ~FormDataParserUrlEncoded(); | |
83 | |
84 // Implementation of FormDataParser. | |
85 virtual bool AllDataReadOK() OVERRIDE; | |
86 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
87 virtual bool SetSource(base::StringPiece source) OVERRIDE; | |
88 | |
89 private: | |
90 // Returns the pattern to match a single name-value pair. This could be even | |
91 // static, but then we would have to spend more code on initializing the | |
92 // cached pointer to g_patterns.Get(). | |
93 const RE2& pattern() const { | |
94 return patterns_->url_encoded_pattern; | |
95 } | |
96 | |
97 // Auxiliary constant for using RE2. Number of arguments for parsing | |
98 // name-value pairs (one for name, one for value). | |
99 static const size_t args_size_ = 2u; | |
100 static const net::UnescapeRule::Type unescape_rules_; | |
101 | |
102 re2::StringPiece source_; | |
103 bool source_set_; | |
104 bool source_malformed_; | |
105 | |
106 // Auxiliary store for using RE2. | |
107 std::string name_; | |
108 std::string value_; | |
109 const RE2::Arg arg_name_; | |
110 const RE2::Arg arg_value_; | |
111 const RE2::Arg* args_[args_size_]; | |
112 | |
113 // Caching the pointer to g_patterns.Get(). | |
114 const Patterns* patterns_; | |
115 | |
116 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded); | |
117 }; | |
118 | |
119 // The following class, FormDataParserMultipart, parses forms encoded as | |
120 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart | |
121 // encoding) and 5322 (MIME-headers). | |
122 // | |
123 // Implementation details | |
124 // | |
125 // The original grammar from RFC 2046 is this, "multipart-body" being the root | |
126 // non-terminal: | |
127 // | |
128 // boundary := 0*69<bchars> bcharsnospace | |
129 // bchars := bcharsnospace / " " | |
130 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," | |
131 // / "-" / "." / "/" / ":" / "=" / "?" | |
132 // dash-boundary := "--" boundary | |
133 // multipart-body := [preamble CRLF] | |
134 // dash-boundary transport-padding CRLF | |
135 // body-part *encapsulation | |
136 // close-delimiter transport-padding | |
137 // [CRLF epilogue] | |
138 // transport-padding := *LWSP-char | |
139 // encapsulation := delimiter transport-padding CRLF body-part | |
140 // delimiter := CRLF dash-boundary | |
141 // close-delimiter := delimiter "--" | |
142 // preamble := discard-text | |
143 // epilogue := discard-text | |
144 // discard-text := *(*text CRLF) *text | |
145 // body-part := MIME-part-headers [CRLF *OCTET] | |
146 // OCTET := <any 0-255 octet value> | |
147 // | |
148 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, | |
149 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the | |
150 // English alphabet, respectively. | |
151 // The non-terminal "text" is presumably just any text, excluding line breaks. | |
152 // The non-terminal "LWSP-char" is not directly defined in the original grammar | |
153 // but it means "linear whitespace", which is a space or a horizontal tab. | |
154 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use | |
155 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: | |
156 // | |
157 // MIME-part-headers := field-name ":" unstructured CRLF | |
158 // field-name := 1*ftext | |
159 // ftext := %d33-57 / ; Printable US-ASCII | |
160 // %d59-126 ; characters not including ":". | |
161 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which | |
162 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and | |
163 // "CRLF<horizontal tab>", which serve for "folding". | |
164 // | |
165 // The FormDataParseMultipart class reads the input source and tries to parse it | |
166 // according to the grammar above, rooted at the "multipart-body" non-terminal. | |
167 // This happens in stages: | |
168 // | |
169 // 1. The optional preamble and the initial dash-boundary with transport padding | |
170 // and a CRLF are read and ignored. | |
171 // | |
172 // 2. Repeatedly each body part is read. The body parts can either serve to | |
173 // upload a file, or just a string of bytes. | |
174 // 2.a. The headers of that part are searched for the "content-disposition" | |
175 // header, which contains the name of the value represented by that body | |
176 // part. If the body-part is for file upload, that header also contains a | |
177 // filename. | |
178 // 2.b. The "*OCTET" part of the body part is then read and passed as the value | |
179 // of the name-value pair for body parts representing a string of bytes. | |
180 // For body parts for uploading a file the "*OCTET" part is just ignored | |
181 // and the filename is used for value instead. | |
182 // | |
183 // 3. The final close-delimiter and epilogue are read and ignored. | |
184 // | |
185 // IMPORTANT NOTE | |
186 // This parser supports sources split into multiple chunks. Therefore SetSource | |
187 // can be called multiple times if the source is spread over several chunks. | |
188 // However, the split may only occur inside a body part, right after the | |
189 // trailing CRLF of headers. | |
190 class FormDataParserMultipart : public FormDataParser { | |
191 public: | |
192 explicit FormDataParserMultipart(const std::string& boundary_separator); | |
193 virtual ~FormDataParserMultipart(); | |
194 | |
195 // Implementation of FormDataParser. | |
196 virtual bool AllDataReadOK() OVERRIDE; | |
197 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
198 virtual bool SetSource(base::StringPiece source) OVERRIDE; | |
199 | |
200 private: | |
201 enum State { | |
202 STATE_INIT, // No input read yet. | |
203 STATE_READY, // Ready to call GetNextNameValue. | |
204 STATE_FINISHED, // Read the input until the end. | |
205 STATE_SUSPEND, // Waiting until a new |source_| is set. | |
206 STATE_ERROR | |
207 }; | |
208 | |
209 // Produces a regexp to match the string "--" + |literal|. The idea is to | |
210 // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed | |
211 // in "\\Q" and "\\E". The only catch is to watch out for occurences of "\\E" | |
212 // inside |literal|. Those must be excluded from the quote and the backslash | |
213 // doubly escaped. For example, for literal == "abc\\Edef" the result is | |
214 // "\\Q--abc\\E\\\\E\\Qdef\\E". | |
215 static std::string CreateBoundaryPatternFromLiteral( | |
216 const std::string& literal); | |
217 | |
218 // Tests whether |input| has a prefix matching |pattern|. | |
219 static bool StartsWithPattern(const re2::StringPiece& input, | |
220 const RE2& pattern); | |
221 | |
222 // If |source_| starts with a header, seeks |source_| beyond the header. If | |
223 // the header is Content-Disposition, extracts |name| from "name=" and | |
224 // possibly |value| from "filename=" fields of that header. Only if the | |
225 // "name" or "filename" fields are found, then |name| or |value| are touched. | |
226 // Returns true iff |source_| is seeked forward. Sets |value_assigned| | |
227 // to true iff |value| has been assigned to. | |
228 bool TryReadHeader(base::StringPiece* name, | |
229 base::StringPiece* value, | |
230 bool* value_assigned); | |
231 | |
232 // Helper to GetNextNameValue. Expects that the input starts with a data | |
233 // portion of a body part. An attempt is made to read the input until the end | |
234 // of that body part. If |data| is not NULL, it is set to contain the data | |
235 // portion. Returns true iff the reading was successful. | |
236 bool FinishReadingPart(base::StringPiece* data); | |
237 | |
238 // These methods could be even static, but then we would have to spend more | |
239 // code on initializing the cached pointer to g_patterns.Get(). | |
240 const RE2& transfer_padding_pattern() const { | |
241 return patterns_->transfer_padding_pattern; | |
242 } | |
243 const RE2& crlf_pattern() const { | |
244 return patterns_->crlf_pattern; | |
245 } | |
246 const RE2& closing_pattern() const { | |
247 return patterns_->closing_pattern; | |
248 } | |
249 const RE2& epilogue_pattern() const { | |
250 return patterns_->epilogue_pattern; | |
251 } | |
252 const RE2& crlf_free_pattern() const { | |
253 return patterns_->crlf_free_pattern; | |
254 } | |
255 const RE2& preamble_pattern() const { | |
256 return patterns_->preamble_pattern; | |
257 } | |
258 const RE2& header_pattern() const { | |
259 return patterns_->header_pattern; | |
260 } | |
261 const RE2& content_disposition_pattern() const { | |
262 return patterns_->content_disposition_pattern; | |
263 } | |
264 const RE2& name_pattern() const { | |
265 return patterns_->name_pattern; | |
266 } | |
267 const RE2& value_pattern() const { | |
268 return patterns_->value_pattern; | |
269 } | |
270 // However, this is used in a static method so it needs to be static. | |
271 static const RE2& unquote_pattern() { | |
272 return g_patterns.Get().unquote_pattern; // No caching g_patterns here. | |
273 } | |
274 | |
275 const RE2 dash_boundary_pattern_; | |
276 | |
277 // Because of initialisation dependency, |state_| needs to be declared after | |
278 // |dash_boundary_pattern_|. | |
279 State state_; | |
280 | |
281 // The parsed message can be split into multiple sources which we read | |
282 // sequentially. | |
283 re2::StringPiece source_; | |
284 | |
285 // Caching the pointer to g_patterns.Get(). | |
286 const Patterns* patterns_; | |
287 | |
288 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart); | |
289 }; | |
290 | |
291 FormDataParser::Result::Result() {} | |
292 FormDataParser::Result::~Result() {} | |
293 | |
294 FormDataParser::~FormDataParser() {} | |
295 | |
296 // static | |
297 scoped_ptr<FormDataParser> FormDataParser::Create( | |
298 const net::URLRequest& request) { | |
299 std::string value; | |
300 const bool found = request.extra_request_headers().GetHeader( | |
301 net::HttpRequestHeaders::kContentType, &value); | |
302 return CreateFromContentTypeHeader(found ? &value : NULL); | |
303 } | |
304 | |
305 // static | |
306 scoped_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader( | |
307 const std::string* content_type_header) { | |
308 enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE}; | |
309 ParserChoice choice = ERROR_CHOICE; | |
310 std::string boundary; | |
311 | |
312 if (content_type_header == NULL) { | |
313 choice = URL_ENCODED; | |
314 } else { | |
315 const std::string content_type( | |
316 content_type_header->substr(0, content_type_header->find(';'))); | |
317 | |
318 if (base::strcasecmp( | |
319 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
320 choice = URL_ENCODED; | |
321 } else if (base::strcasecmp( | |
322 content_type.c_str(), "multipart/form-data") == 0) { | |
323 static const char kBoundaryString[] = "boundary="; | |
324 size_t offset = content_type_header->find(kBoundaryString); | |
325 if (offset == std::string::npos) { | |
326 // Malformed header. | |
327 return scoped_ptr<FormDataParser>(); | |
328 } | |
329 offset += sizeof(kBoundaryString) - 1; | |
330 boundary = content_type_header->substr( | |
331 offset, content_type_header->find(';', offset)); | |
332 if (!boundary.empty()) | |
333 choice = MULTIPART; | |
334 } | |
335 } | |
336 // Other cases are unparseable, including when |content_type| is "text/plain". | |
337 | |
338 switch (choice) { | |
339 case URL_ENCODED: | |
340 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
341 case MULTIPART: | |
342 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
343 case ERROR_CHOICE: | |
344 return scoped_ptr<FormDataParser>(); | |
345 } | |
346 NOTREACHED(); // Some compilers do not believe this is unreachable. | |
347 return scoped_ptr<FormDataParser>(); | |
348 } | |
349 | |
350 FormDataParser::FormDataParser() {} | |
351 | |
352 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ = | |
353 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
354 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
355 | |
356 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
357 : source_(NULL), | |
358 source_set_(false), | |
359 source_malformed_(false), | |
360 arg_name_(&name_), | |
361 arg_value_(&value_), | |
362 patterns_(g_patterns.Pointer()) { | |
363 args_[0] = &arg_name_; | |
364 args_[1] = &arg_value_; | |
365 } | |
366 | |
367 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
368 | |
369 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
370 // All OK means we read the whole source. | |
371 return source_set_ && source_.empty() && !source_malformed_; | |
372 } | |
373 | |
374 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
375 if (!source_set_ || source_malformed_) | |
376 return false; | |
377 | |
378 bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_); | |
379 if (success) { | |
380 result->set_name(net::UnescapeURLComponent(name_, unescape_rules_)); | |
381 result->set_value(net::UnescapeURLComponent(value_, unescape_rules_)); | |
382 } | |
383 if (source_.length() > 0) { | |
384 if (source_[0] == '&') | |
385 source_.remove_prefix(1); // Remove the leading '&'. | |
386 else | |
387 source_malformed_ = true; // '&' missing between two name-value pairs. | |
388 } | |
389 return success && !source_malformed_; | |
390 } | |
391 | |
392 bool FormDataParserUrlEncoded::SetSource(base::StringPiece source) { | |
393 if (source_set_) | |
394 return false; // We do not allow multiple sources for this parser. | |
395 source_.set(source.data(), source.size()); | |
396 source_set_ = true; | |
397 source_malformed_ = false; | |
398 return true; | |
399 } | |
400 | |
401 // static | |
402 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral( | |
403 const std::string& literal) { | |
404 static const char quote[] = "\\Q"; | |
405 static const char unquote[] = "\\E"; | |
406 | |
407 // The result always starts with opening the qoute and then "--". | |
408 std::string result("\\Q--"); | |
409 | |
410 // This StringPiece is used below to record the next occurrence of "\\E" in | |
411 // |literal|. | |
412 re2::StringPiece seek_unquote(literal); | |
413 const char* copy_start = literal.data(); | |
414 size_t copy_length = literal.size(); | |
415 | |
416 // Find all "\\E" in |literal| and exclude them from the \Q...\E quote. | |
417 while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) { | |
418 copy_length = seek_unquote.data() - copy_start; | |
419 result.append(copy_start, copy_length); | |
420 result.append(kEscapeClosingQuote); | |
421 result.append(quote); | |
422 copy_start = seek_unquote.data(); | |
423 } | |
424 | |
425 // Finish the last \Q...\E quote. | |
426 copy_length = (literal.data() + literal.size()) - copy_start; | |
427 result.append(copy_start, copy_length); | |
428 result.append(unquote); | |
429 return result; | |
430 } | |
431 | |
432 // static | |
433 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input, | |
434 const RE2& pattern) { | |
435 return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0); | |
436 } | |
437 | |
438 FormDataParserMultipart::FormDataParserMultipart( | |
439 const std::string& boundary_separator) | |
440 : dash_boundary_pattern_( | |
441 CreateBoundaryPatternFromLiteral(boundary_separator)), | |
442 state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR), | |
443 patterns_(g_patterns.Pointer()) {} | |
444 | |
445 FormDataParserMultipart::~FormDataParserMultipart() {} | |
446 | |
447 bool FormDataParserMultipart::AllDataReadOK() { | |
448 return state_ == STATE_FINISHED; | |
449 } | |
450 | |
451 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) { | |
452 const char* data_start = source_.data(); | |
453 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
454 if (!RE2::Consume(&source_, crlf_free_pattern()) || | |
455 !RE2::Consume(&source_, crlf_pattern())) { | |
456 state_ = STATE_ERROR; | |
457 return false; | |
458 } | |
459 } | |
460 if (data != NULL) { | |
461 if (source_.data() == data_start) { | |
462 // No data in this body part. | |
463 state_ = STATE_ERROR; | |
464 return false; | |
465 } | |
466 // Subtract 2 for the trailing "\r\n". | |
467 data->set(data_start, source_.data() - data_start - 2); | |
468 } | |
469 | |
470 // Finally, read the dash-boundary and either skip to the next body part, or | |
471 // finish reading the source. | |
472 CHECK(RE2::Consume(&source_, dash_boundary_pattern_)); | |
473 if (StartsWithPattern(source_, closing_pattern())) { | |
474 CHECK(RE2::Consume(&source_, closing_pattern())); | |
475 if (RE2::Consume(&source_, epilogue_pattern())) | |
476 state_ = STATE_FINISHED; | |
477 else | |
478 state_ = STATE_ERROR; | |
479 } else { // Next body part ahead. | |
480 if (!RE2::Consume(&source_, transfer_padding_pattern())) | |
481 state_ = STATE_ERROR; | |
482 } | |
483 return state_ != STATE_ERROR; | |
484 } | |
485 | |
486 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
487 if (source_.empty() || state_ != STATE_READY) | |
488 return false; | |
489 | |
490 // 1. Read body-part headers. | |
491 base::StringPiece name; | |
492 base::StringPiece value; | |
493 bool value_assigned = false; | |
494 bool value_assigned_temp; | |
495 while (TryReadHeader(&name, &value, &value_assigned_temp)) | |
496 value_assigned |= value_assigned_temp; | |
497 if (name.empty() || state_ == STATE_ERROR) { | |
498 state_ = STATE_ERROR; | |
499 return false; | |
500 } | |
501 | |
502 // 2. Read the trailing CRLF after headers. | |
503 if (!RE2::Consume(&source_, crlf_pattern())) { | |
504 state_ = STATE_ERROR; | |
505 return false; | |
506 } | |
507 | |
508 // 3. Read the data of this body part, i.e., everything until the first | |
509 // dash-boundary. | |
510 bool return_value; | |
511 if (value_assigned && source_.empty()) { // Wait for a new source? | |
512 return_value = true; | |
513 state_ = STATE_SUSPEND; | |
514 } else { | |
515 return_value = FinishReadingPart(value_assigned ? NULL : &value); | |
516 } | |
517 | |
518 std::string unescaped_name = net::UnescapeURLComponent( | |
519 name.as_string(), | |
520 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS); | |
521 result->set_name(unescaped_name); | |
522 result->set_value(value); | |
523 | |
524 return return_value; | |
525 } | |
526 | |
527 bool FormDataParserMultipart::SetSource(base::StringPiece source) { | |
528 if (source.data() == NULL || !source_.empty()) | |
529 return false; | |
530 source_.set(source.data(), source.size()); | |
531 | |
532 switch (state_) { | |
533 case STATE_INIT: | |
534 // Seek behind the preamble. | |
535 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
536 if (!RE2::Consume(&source_, preamble_pattern())) { | |
537 state_ = STATE_ERROR; | |
538 break; | |
539 } | |
540 } | |
541 // Read dash-boundary, transfer padding, and CRLF. | |
542 if (state_ != STATE_ERROR) { | |
543 if (!RE2::Consume(&source_, dash_boundary_pattern_) || | |
544 !RE2::Consume(&source_, transfer_padding_pattern())) | |
545 state_ = STATE_ERROR; | |
546 else | |
547 state_ = STATE_READY; | |
548 } | |
549 break; | |
550 case STATE_READY: // Nothing to do. | |
551 break; | |
552 case STATE_SUSPEND: | |
553 state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR; | |
554 break; | |
555 default: | |
556 state_ = STATE_ERROR; | |
557 } | |
558 return state_ != STATE_ERROR; | |
559 } | |
560 | |
561 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name, | |
562 base::StringPiece* value, | |
563 bool* value_assigned) { | |
564 *value_assigned = false; | |
565 const char* header_start = source_.data(); | |
566 if (!RE2::Consume(&source_, header_pattern())) | |
567 return false; | |
568 // (*) After this point we must return true, because we consumed one header. | |
569 | |
570 // Subtract 2 for the trailing "\r\n". | |
571 re2::StringPiece header(header_start, source_.data() - header_start - 2); | |
572 | |
573 if (!StartsWithPattern(header, content_disposition_pattern())) | |
574 return true; // Skip headers that don't describe the content-disposition. | |
575 | |
576 re2::StringPiece groups[2]; | |
577 | |
578 if (!name_pattern().Match(header, | |
579 kContentDispositionLength, header.size(), | |
580 RE2::UNANCHORED, groups, 2)) { | |
581 state_ = STATE_ERROR; | |
582 return true; // See (*) for why true. | |
583 } | |
584 name->set(groups[1].data(), groups[1].size()); | |
585 | |
586 if (value_pattern().Match(header, | |
587 kContentDispositionLength, header.size(), | |
588 RE2::UNANCHORED, groups, 2)) { | |
589 value->set(groups[1].data(), groups[1].size()); | |
590 *value_assigned = true; | |
591 } | |
592 return true; | |
593 } | |
594 | |
595 } // namespace extensions | |
OLD | NEW |