OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
6 | |
7 #include <vector> | |
8 | |
9 #include "base/lazy_instance.h" | |
10 #include "base/string_util.h" | |
11 #include "base/values.h" | |
12 #include "net/base/escape.h" | |
13 #include "net/url_request/url_request.h" | |
14 #include "third_party/re2/re2/re2.h" | |
15 | |
16 using base::DictionaryValue; | |
17 using base::ListValue; | |
18 using base::StringPiece; | |
19 using re2::RE2; | |
20 | |
21 namespace extensions { | |
22 | |
23 namespace { | |
24 | |
25 #define CONTENT_DISPOSITION "content-disposition:" | |
26 | |
27 static const char g_escape_closing_quote[] = "\\\\E"; | |
28 static const size_t g_content_disposition_length = | |
29 sizeof(CONTENT_DISPOSITION) - 1; | |
30 | |
31 // A wrapper struct for static RE2 objects to be held as LazyInstance. | |
32 struct Patterns { | |
33 Patterns(); | |
34 ~Patterns(); | |
35 const RE2 transfer_padding_pattern_; | |
battre
2012/09/09 22:08:35
no _ at end for structs
vabr (Chromium)
2012/09/12 11:18:08
Done.
| |
36 const RE2 crlf_pattern_; | |
37 const RE2 closing_pattern_; | |
38 const RE2 epilogue_pattern_; | |
39 const RE2 crlf_free_pattern_; | |
40 const RE2 preamble_pattern_; | |
41 const RE2 header_pattern_; | |
42 const RE2 content_disposition_pattern_; | |
43 const RE2 name_pattern_; | |
44 const RE2 value_pattern_; | |
45 const RE2 unquote_pattern_; | |
46 const RE2 url_encoded_pattern_; | |
47 }; | |
48 | |
49 Patterns::Patterns() | |
50 : transfer_padding_pattern_("[ \\t]*\\r\\n"), | |
51 crlf_pattern_("\\r\\n"), | |
52 closing_pattern_("--[ \\t]*"), | |
53 epilogue_pattern_("|\\r\\n(?s:.)*"), | |
54 crlf_free_pattern_("(?:[^\\r]|\\r+[^\\r\\n])*"), | |
55 preamble_pattern_(".*?"), | |
56 header_pattern_("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"), | |
57 content_disposition_pattern_("(?i:" CONTENT_DISPOSITION ")"), | |
58 name_pattern_("\\bname=\"([^\"]*)\""), | |
59 value_pattern_("\\bfilename=\"([^\"]*)\""), | |
60 unquote_pattern_(g_escape_closing_quote), | |
61 url_encoded_pattern_("([^=]*)=([^&]*)&?") {} | |
62 | |
63 #undef CONTENT_DISPOSITION | |
64 | |
65 Patterns::~Patterns() {} | |
66 | |
67 static base::LazyInstance<Patterns>::Leaky g_patterns = | |
68 LAZY_INSTANCE_INITIALIZER; | |
69 | |
70 } // namespace | |
71 | |
72 // Parses URLencoded forms, see | |
73 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . | |
74 class FormDataParserUrlEncoded : public FormDataParser { | |
75 public: | |
76 FormDataParserUrlEncoded(); | |
77 virtual ~FormDataParserUrlEncoded(); | |
78 | |
79 // Implementation of FormDataParser. | |
80 virtual bool AllDataReadOK() OVERRIDE; | |
81 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
82 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; | |
83 | |
84 private: | |
85 // The pattern to match a single name-value pair. | |
86 const RE2& pattern() { | |
battre
2012/09/09 22:08:35
const
vabr (Chromium)
2012/09/12 11:18:08
Thanks for pointing that out.
I made that a static
| |
87 return g_patterns.Get().url_encoded_pattern_; | |
battre
2012/09/09 22:08:35
g_patterns.Get() is relatively expensive. How abou
vabr (Chromium)
2012/09/12 11:18:08
I added the caching. Because I would like to leave
| |
88 } | |
89 | |
90 static const size_t args_size_ = 2u; // Auxiliary constant for using RE2. | |
battre
2012/09/09 22:08:35
Can you describe what this represents? "Number of
vabr (Chromium)
2012/09/12 11:18:08
Done.
| |
91 static const net::UnescapeRule::Type unescape_rules_; | |
92 | |
93 re2::StringPiece source_; | |
94 bool source_set_; | |
95 | |
96 // Auxiliary store for using RE2. | |
97 std::string name_; | |
98 std::string value_; | |
99 const RE2::Arg arg_name_; | |
100 const RE2::Arg arg_value_; | |
101 const RE2::Arg* args_[args_size_]; | |
102 | |
103 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded); | |
104 }; | |
105 | |
106 // The following class, FormDataParserMultipart, parses forms encoded as | |
107 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart | |
108 // encoding) and 5322 (MIME-headers). | |
109 // | |
110 // Implementation details | |
111 // | |
112 // The original grammar from RFC 2046 is this, "multipart-body" being the root | |
113 // non-terminal: | |
114 // | |
115 // boundary := 0*69<bchars> bcharsnospace | |
116 // bchars := bcharsnospace / " " | |
117 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," | |
118 // / "-" / "." / "/" / ":" / "=" / "?" | |
119 // dash-boundary := "--" boundary | |
120 // multipart-body := [preamble CRLF] | |
121 // dash-boundary transport-padding CRLF | |
122 // body-part *encapsulation | |
123 // close-delimiter transport-padding | |
124 // [CRLF epilogue] | |
125 // transport-padding := *LWSP-char | |
126 // encapsulation := delimiter transport-padding CRLF body-part | |
127 // delimiter := CRLF dash-boundary | |
128 // close-delimiter := delimiter "--" | |
129 // preamble := discard-text | |
130 // epilogue := discard-text | |
131 // discard-text := *(*text CRLF) *text | |
132 // body-part := MIME-part-headers [CRLF *OCTET] | |
133 // OCTET := <any 0-255 octet value> | |
134 // | |
135 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, | |
136 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the | |
137 // English alphabet, respectively. | |
138 // The non-terminal "text" is presumably just any text, excluding line breaks. | |
139 // The non-terminal "LWSP-char" is not directly defined in the original grammar | |
140 // but it means "linear whitespace", which is a space or a horizontal tab. | |
141 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use | |
142 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: | |
143 // | |
144 // MIME-part-headers := field-name ":" unstructured CRLF | |
145 // field-name := 1*ftext | |
146 // ftext := %d33-57 / ; Printable US-ASCII | |
147 // %d59-126 ; characters not including ":". | |
148 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which | |
149 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and | |
150 // "CRLF<horizontal tab>", which serve for "folding". | |
151 // | |
152 // The FormDataParseMultipart class reads the input source and tries to parse it | |
153 // according to the grammar above, rooted at the "multipart-body" non-terminal. | |
154 // This happens in stages: | |
155 // | |
156 // 1. The optional preamble and the initial dash-boundary with transport padding | |
157 // and a CRLF are read and ignored. | |
158 // | |
159 // 2. Repeatedly each body part is read. The body parts can either serve to | |
160 // upload a file, or just a string of bytes. | |
161 // 2.a. The headers of that part are searched for the "content-disposition" | |
162 // header, which contains the name of the value represented by that body | |
163 // part. If the body-part is for file upload, that header also contains a | |
164 // filename. | |
165 // 2.b. The "*OCTET" part of the body part is then read and passed as the value | |
166 // of the name-value pair for body parts representing a string of bytes. | |
167 // For body parts for uploading a file the "*OCTET" part is just ignored | |
168 // and the filename is used for value instead. | |
169 // | |
170 // 3. The final close-delimiter and epilogue are read and ignored. | |
171 // | |
172 // IMPORTANT NOTE | |
173 // This parser supports multiple sources, i.e., SetSource can be called multiple | |
174 // times if the input is spread over several byte blocks. However, the split | |
175 // may only occur inside a body part, right after the trailing CRLF of headers. | |
176 class FormDataParserMultipart : public FormDataParser { | |
177 public: | |
178 explicit FormDataParserMultipart(const std::string& boundary_separator); | |
179 virtual ~FormDataParserMultipart(); | |
180 | |
181 // Implementation of FormDataParser. | |
182 virtual bool AllDataReadOK() OVERRIDE; | |
183 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
184 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; | |
185 | |
186 private: | |
187 enum State { | |
188 STATE_INIT, // No input read yet. | |
189 STATE_READY, // Ready to call GetNextNameValue. | |
190 STATE_FINISHED, // Read the input until the end. | |
191 STATE_SUSPEND, // Waiting until a new |source_| is set. | |
192 STATE_ERROR | |
193 }; | |
194 | |
195 // Produces a regexp to match the string "--" + |literal|. | |
196 static std::string GetBoundaryPatternFromLiteral(const std::string& literal); | |
197 | |
198 // Tests whether |input| has a prefix matching |pattern|. | |
199 static bool LookAhead(const RE2& pattern, const re2::StringPiece& input); | |
battre
2012/09/09 22:08:35
How about naming this "StartsWithPattern" and reve
vabr (Chromium)
2012/09/12 11:18:08
Well, if you insist, I'll do it.
I prefer LookAhe
| |
200 | |
201 // If source_ starts with a header, consumes it. If the header is | |
202 // Content-Disposition, it also extracts |name| from "name=" and possibly | |
battre
2012/09/09 22:08:35
what does "it" refer to? From a grammatical POV, i
vabr (Chromium)
2012/09/12 11:18:08
It indeed refers to the header. I reworded the com
| |
203 // |value| from "filename=" fields of that header. It only touches |name| or | |
204 // |value| if it finds the respective fields for them. Returns true if it | |
205 // consumed a header, false if it did not. Sets |value_assigned| to true if it | |
battre
2012/09/09 22:08:35
nit: remove ", false if it did not."
vabr (Chromium)
2012/09/12 11:18:08
Done, I replaced those tails with using "iff", so
| |
206 // has assigned to value, otherwise it sets it to false. | |
207 bool TryReadHeader(base::StringPiece* name, | |
208 base::StringPiece* value, | |
209 bool* value_assigned); | |
210 | |
211 // Helper to GetNextNameValue. Expects that the input starts with a data | |
212 // portion of a body part. It then attempts to read the input until the end of | |
battre
2012/09/09 22:08:35
nit: Please get rid of "It". Sentences like "it se
vabr (Chromium)
2012/09/12 11:18:08
Done. Thanks for pointing that horrible sentence o
| |
213 // that body part. If |data| is not NULL, it sets it to contain the data | |
214 // portion. Returns true when the reading was successful. | |
215 bool FinishReadingPart(base::StringPiece* data); | |
216 | |
217 static const RE2& transfer_padding_pattern() { | |
218 return g_patterns.Get().transfer_padding_pattern_; | |
219 } | |
220 static const RE2& crlf_pattern() { | |
221 return g_patterns.Get().crlf_pattern_; | |
222 } | |
223 static const RE2& closing_pattern() { | |
224 return g_patterns.Get().closing_pattern_; | |
225 } | |
226 static const RE2& epilogue_pattern() { | |
227 return g_patterns.Get().epilogue_pattern_; | |
228 } | |
229 static const RE2& crlf_free_pattern() { | |
230 return g_patterns.Get().crlf_free_pattern_; | |
231 } | |
232 static const RE2& preamble_pattern() { | |
233 return g_patterns.Get().preamble_pattern_; | |
234 } | |
235 static const RE2& header_pattern() { | |
236 return g_patterns.Get().header_pattern_; | |
237 } | |
238 static const RE2& content_disposition_pattern() { | |
239 return g_patterns.Get().content_disposition_pattern_; | |
240 } | |
241 static const RE2& name_pattern() { | |
242 return g_patterns.Get().name_pattern_; | |
243 } | |
244 static const RE2& value_pattern() { | |
245 return g_patterns.Get().value_pattern_; | |
246 } | |
247 static const RE2& unquote_pattern() { | |
248 return g_patterns.Get().unquote_pattern_; | |
249 } | |
250 | |
251 const RE2 dash_boundary_pattern_; | |
252 | |
253 // Because of initialisation dependency, |state_| needs to be declared after | |
254 // |dash_boundary_pattern_|. | |
255 State state_; | |
256 | |
257 // The parsed message can be split into multiple sources which we read | |
258 // sequentially. | |
259 re2::StringPiece source_; | |
260 | |
261 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart); | |
262 }; | |
263 | |
264 // Implementation of FormDataParser and FormDataParser::Result . | |
265 | |
266 FormDataParser::Result::Result() {} | |
267 FormDataParser::Result::~Result() {} | |
268 | |
269 void FormDataParser::Result::Reset() { | |
270 name_.erase(); | |
271 value_.erase(); | |
272 } | |
273 | |
274 FormDataParser::~FormDataParser() {} | |
275 | |
276 // static | |
277 scoped_ptr<FormDataParser> FormDataParser::Create( | |
278 const net::URLRequest* request) { | |
279 std::string value; | |
280 const bool found = request->extra_request_headers().GetHeader( | |
281 net::HttpRequestHeaders::kContentType, &value); | |
282 return Create(found ? &value : NULL); | |
283 } | |
284 | |
285 // static | |
286 scoped_ptr<FormDataParser> FormDataParser::Create( | |
287 const std::string* content_type_header) { | |
288 enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE}; | |
289 ParserChoice choice = ERROR_CHOICE; | |
290 std::string boundary; | |
291 | |
292 if (content_type_header == NULL) { | |
293 choice = URL_ENCODED; | |
294 } else { | |
295 const std::string content_type( | |
296 content_type_header->substr(0, content_type_header->find(';'))); | |
297 | |
298 if (base::strcasecmp( | |
299 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
300 choice = URL_ENCODED; | |
301 } else if (base::strcasecmp( | |
302 content_type.c_str(), "multipart/form-data") == 0) { | |
303 static const char kBoundaryString[] = "boundary="; | |
304 size_t offset = content_type_header->find(kBoundaryString); | |
305 if (offset == std::string::npos) { | |
306 // Malformed header. | |
307 return scoped_ptr<FormDataParser>(); | |
308 } | |
309 offset += sizeof(kBoundaryString) - 1; | |
310 boundary = content_type_header->substr( | |
311 offset, content_type_header->find(';', offset)); | |
312 if (!boundary.empty()) | |
313 choice = MULTIPART; | |
314 } | |
315 } | |
316 // Other cases are unparseable, including when |content_type| is "text/plain". | |
317 | |
318 switch (choice) { | |
319 case URL_ENCODED: | |
320 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
321 case MULTIPART: | |
322 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
323 default: // In other words, case ERROR_CHOICE: | |
324 return scoped_ptr<FormDataParser>(); | |
325 } | |
326 } | |
327 | |
328 FormDataParser::FormDataParser() {} | |
329 | |
330 // Implementation of FormDataParserUrlEncoded. | |
331 | |
332 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ = | |
333 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
334 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
335 | |
336 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
337 : source_(NULL), | |
338 source_set_(false), | |
339 arg_name_(&name_), | |
340 arg_value_(&value_) { | |
341 args_[0] = &arg_name_; | |
342 args_[1] = &arg_value_; | |
343 } | |
344 | |
345 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
346 | |
347 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
348 // All OK means we read the whole source. | |
349 return source_set_ && source_.size() == 0; | |
350 } | |
351 | |
352 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
353 if (!source_set_) | |
354 return false; | |
355 | |
356 bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_); | |
357 if (success) { | |
358 result->set_name(net::UnescapeURLComponent(name_, unescape_rules_)); | |
359 result->set_value(net::UnescapeURLComponent(value_, unescape_rules_)); | |
360 } | |
361 return success; | |
362 } | |
363 | |
364 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) { | |
365 if (source_set_) | |
366 return false; // We do not allow multiple sources for this parser. | |
367 source_.set(source.data(), source.size()); | |
368 source_set_ = true; | |
369 return true; | |
370 } | |
371 | |
372 // Implementation of FormDataParserMultipart. | |
373 | |
374 // static | |
375 std::string FormDataParserMultipart::GetBoundaryPatternFromLiteral( | |
battre
2012/09/09 22:08:35
nit: I suggest to name functions that don't fetch
vabr (Chromium)
2012/09/12 11:18:08
Done.
| |
376 const std::string& literal) { | |
battre
2012/09/09 22:08:35
can you explain what this function does? May be us
vabr (Chromium)
2012/09/12 11:18:08
I expanded the description at the declaration of t
| |
377 #define OPEN_QUOTE "\\Q" | |
378 static const char opening_quote[] = OPEN_QUOTE; | |
379 static const char closing_quote[] = "\\E"; | |
380 | |
381 std::string output(OPEN_QUOTE "--"); // Let us start with the "--". | |
382 #undef OPEN_QUOTE | |
383 re2::StringPiece seek_unquote(literal); | |
384 const char* copy_start = literal.data(); | |
385 size_t copy_length = literal.size(); | |
386 while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) { | |
387 copy_length = seek_unquote.data() - copy_start; | |
388 output.append(copy_start, copy_length); | |
389 output.append(g_escape_closing_quote); | |
390 output.append(opening_quote); | |
391 copy_start = seek_unquote.data(); | |
392 } | |
393 copy_length = (literal.data() + literal.size()) - copy_start; | |
394 output.append(copy_start, copy_length); | |
395 output.append(closing_quote); | |
396 return output; | |
397 } | |
398 | |
399 // static | |
400 bool FormDataParserMultipart::LookAhead(const RE2& pattern, | |
401 const re2::StringPiece& input) { | |
402 return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0); | |
403 } | |
404 | |
405 FormDataParserMultipart::FormDataParserMultipart( | |
406 const std::string& boundary_separator) | |
407 : dash_boundary_pattern_(GetBoundaryPatternFromLiteral(boundary_separator)), | |
408 state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR) {} | |
409 | |
410 FormDataParserMultipart::~FormDataParserMultipart() {} | |
411 | |
412 bool FormDataParserMultipart::AllDataReadOK() { | |
413 return state_ == STATE_FINISHED; | |
414 } | |
415 | |
416 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) { | |
417 const char* data_start = source_.data(); | |
418 while (!LookAhead(dash_boundary_pattern_, source_)) { | |
419 if (!RE2::Consume(&source_, crlf_free_pattern()) || | |
420 !RE2::Consume(&source_, crlf_pattern())) { | |
421 state_ = STATE_ERROR; | |
422 return false; | |
423 } | |
424 } | |
425 if (data != NULL) { | |
426 if (source_.data() == data_start) { | |
427 // No data in this body part. | |
428 state_ = STATE_ERROR; | |
429 return false; | |
430 } | |
431 // Subtract 2u for the trailing "\r\n". | |
432 data->set(data_start, source_.data() - data_start - 2u); | |
433 } | |
434 | |
435 // Finally, read the dash-boundary and either skip to the next body part, or | |
436 // finish reading the source. | |
437 CHECK(RE2::Consume(&source_, dash_boundary_pattern_)); | |
438 if (LookAhead(closing_pattern(), source_)) { | |
439 CHECK(RE2::Consume(&source_, closing_pattern())); | |
440 if (RE2::Consume(&source_, epilogue_pattern())) | |
441 state_ = STATE_FINISHED; | |
442 else | |
443 state_ = STATE_ERROR; | |
444 } else { // Next body part ahead. | |
445 if (!RE2::Consume(&source_, transfer_padding_pattern())) | |
446 state_ = STATE_ERROR; | |
447 } | |
448 return state_ != STATE_ERROR; | |
449 } | |
450 | |
451 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
452 if (source_.size() == 0 || state_ != STATE_READY) | |
453 return false; | |
454 | |
455 // 1. Read body-part headers. | |
456 base::StringPiece name; | |
457 base::StringPiece value; | |
458 bool value_assigned = false; | |
459 bool value_assigned_temp; | |
460 while (TryReadHeader(&name, &value, &value_assigned_temp)) | |
461 value_assigned |= value_assigned_temp; | |
462 if (name.size() == 0) { | |
463 state_ = STATE_ERROR; | |
464 return false; | |
465 } | |
battre
2012/09/09 22:08:35
move this into TryReadHeader?
There is a code path
vabr (Chromium)
2012/09/12 11:18:08
No, I need to check name.size() == 0 after all Try
| |
466 | |
467 // 2. Read the trailing CRLF after headers. | |
468 if (!RE2::Consume(&source_, crlf_pattern())) { | |
469 state_ = STATE_ERROR; | |
470 return false; | |
471 } | |
472 | |
473 // 3. Read the data of this body part, i.e., everything until the first | |
474 // dash-boundary. | |
475 bool return_value = true; | |
476 if (value_assigned && source_.size() == 0) // Wait for a new source? | |
battre
2012/09/09 22:08:35
I think this becomes clearer if return_value is se
vabr (Chromium)
2012/09/12 11:18:08
Done.
| |
477 state_ = STATE_SUSPEND; | |
478 else | |
479 return_value = FinishReadingPart(value_assigned ? NULL : &value); | |
480 | |
481 std::string unescaped_name = net::UnescapeURLComponent( | |
482 name.as_string(), | |
483 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS); | |
484 result->set_name(unescaped_name); | |
485 result->set_value(value); | |
486 | |
487 return return_value; | |
488 } | |
489 | |
490 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) { | |
491 if (source.data() == NULL || source_.size() != 0) | |
492 return false; | |
493 source_.set(source.data(), source.size()); | |
494 | |
495 switch (state_) { | |
496 case STATE_INIT: | |
497 // Seek behind the preamble. | |
498 while (!LookAhead(dash_boundary_pattern_, source_)) { | |
499 if (!RE2::Consume(&source_, preamble_pattern())) { | |
500 state_ = STATE_ERROR; | |
501 break; | |
502 } | |
503 } | |
504 // Read dash-boundary, transfer padding, and CRLF. | |
505 if (state_ != STATE_ERROR) { | |
506 if (!RE2::Consume(&source_, dash_boundary_pattern_) || | |
507 !RE2::Consume(&source_, transfer_padding_pattern())) | |
508 state_ = STATE_ERROR; | |
509 else | |
510 state_ = STATE_READY; | |
511 } | |
512 break; | |
513 case STATE_READY: // Nothing to do. | |
514 break; | |
515 case STATE_SUSPEND: | |
516 state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR; | |
517 break; | |
518 default: | |
519 state_ = STATE_ERROR; | |
520 } | |
521 return state_ != STATE_ERROR; | |
522 } | |
523 | |
524 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name, | |
525 base::StringPiece* value, | |
526 bool* value_assigned) { | |
527 *value_assigned = false; | |
528 const char* header_start = source_.data(); | |
529 if (!RE2::Consume(&source_, header_pattern())) | |
530 return false; | |
531 // (*) After this point we must return true, because we consumed one header. | |
532 | |
533 // Subtract 2u for the trailing "\r\n". | |
534 re2::StringPiece header(header_start, source_.data() - header_start - 2u); | |
535 | |
536 // Now we check whether |header| is a Content-Disposition header, and try | |
537 // to extract name and possibly value from it. | |
538 if (LookAhead(content_disposition_pattern(), header)) { | |
battre
2012/09/09 22:08:35
Inverse the logic here?
if (!LookAhead(...))
ret
vabr (Chromium)
2012/09/12 11:18:08
Done, thanks!
| |
539 re2::StringPiece groups[2u]; | |
540 | |
541 if (!name_pattern().Match(header, | |
542 g_content_disposition_length, header.size(), | |
543 RE2::UNANCHORED, groups, 2)) { | |
544 state_ = STATE_ERROR; | |
545 return true; // See (*) for why true. | |
546 } | |
547 name->set(groups[1].data(), groups[1].size()); | |
548 | |
549 if (!value_pattern().Match(header, | |
550 g_content_disposition_length, header.size(), | |
551 RE2::UNANCHORED, groups, 2)) | |
battre
2012/09/09 22:08:35
please add {}
vabr (Chromium)
2012/09/12 11:18:08
Done.
| |
552 return true; // See (*) for why true. | |
battre
2012/09/09 22:08:35
why the asymmetry? Don't you want to set state_ to
vabr (Chromium)
2012/09/12 11:18:08
No, it is not an error if the header does not spec
| |
553 value->set(groups[1].data(), groups[1].size()); | |
554 *value_assigned = true; | |
555 } | |
556 return true; | |
557 } | |
558 | |
559 } // namespace extensions | |
OLD | NEW |