| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
| 6 | |
| 7 #include <vector> | |
| 8 | |
| 9 #include "base/lazy_instance.h" | |
| 10 #include "base/logging.h" | |
| 11 #include "base/macros.h" | |
| 12 #include "base/strings/string_util.h" | |
| 13 #include "base/values.h" | |
| 14 #include "net/base/escape.h" | |
| 15 #include "net/url_request/url_request.h" | |
| 16 #include "third_party/re2/re2/re2.h" | |
| 17 | |
| 18 using base::DictionaryValue; | |
| 19 using base::ListValue; | |
| 20 using base::StringPiece; | |
| 21 using re2::RE2; | |
| 22 | |
| 23 namespace extensions { | |
| 24 | |
| 25 namespace { | |
| 26 | |
| 27 const char kContentDisposition[] = "content-disposition:"; | |
| 28 const size_t kContentDispositionLength = arraysize(kContentDisposition) - 1; | |
| 29 // kCharacterPattern is an allowed character in a URL encoding. Definition is | |
| 30 // from RFC 1738, end of section 2.2. | |
| 31 const char kCharacterPattern[] = | |
| 32 "(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))"; | |
| 33 const char kEscapeClosingQuote[] = "\\\\E"; | |
| 34 | |
| 35 // A wrapper struct for static RE2 objects to be held as LazyInstance. | |
| 36 struct Patterns { | |
| 37 Patterns(); | |
| 38 ~Patterns(); | |
| 39 const RE2 transfer_padding_pattern; | |
| 40 const RE2 crlf_pattern; | |
| 41 const RE2 closing_pattern; | |
| 42 const RE2 epilogue_pattern; | |
| 43 const RE2 crlf_free_pattern; | |
| 44 const RE2 preamble_pattern; | |
| 45 const RE2 header_pattern; | |
| 46 const RE2 content_disposition_pattern; | |
| 47 const RE2 name_pattern; | |
| 48 const RE2 value_pattern; | |
| 49 const RE2 unquote_pattern; | |
| 50 const RE2 url_encoded_pattern; | |
| 51 }; | |
| 52 | |
| 53 Patterns::Patterns() | |
| 54 : transfer_padding_pattern("[ \\t]*\\r\\n"), | |
| 55 crlf_pattern("\\r\\n"), | |
| 56 closing_pattern("--[ \\t]*"), | |
| 57 epilogue_pattern("|\\r\\n(?s:.)*"), | |
| 58 crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"), | |
| 59 preamble_pattern(".+?"), | |
| 60 header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"), | |
| 61 content_disposition_pattern(std::string("(?i:") + kContentDisposition + | |
| 62 ")"), | |
| 63 name_pattern("\\bname=\"([^\"]*)\""), | |
| 64 value_pattern("\\bfilename=\"([^\"]*)\""), | |
| 65 unquote_pattern(kEscapeClosingQuote), | |
| 66 url_encoded_pattern(std::string("(") + kCharacterPattern + "*)=(" + | |
| 67 kCharacterPattern + | |
| 68 "*)") { | |
| 69 } | |
| 70 | |
| 71 Patterns::~Patterns() {} | |
| 72 | |
| 73 base::LazyInstance<Patterns>::Leaky g_patterns = LAZY_INSTANCE_INITIALIZER; | |
| 74 | |
| 75 } // namespace | |
| 76 | |
| 77 // Parses URLencoded forms, see | |
| 78 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . | |
| 79 class FormDataParserUrlEncoded : public FormDataParser { | |
| 80 public: | |
| 81 FormDataParserUrlEncoded(); | |
| 82 virtual ~FormDataParserUrlEncoded(); | |
| 83 | |
| 84 // Implementation of FormDataParser. | |
| 85 virtual bool AllDataReadOK() OVERRIDE; | |
| 86 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
| 87 virtual bool SetSource(base::StringPiece source) OVERRIDE; | |
| 88 | |
| 89 private: | |
| 90 // Returns the pattern to match a single name-value pair. This could be even | |
| 91 // static, but then we would have to spend more code on initializing the | |
| 92 // cached pointer to g_patterns.Get(). | |
| 93 const RE2& pattern() const { | |
| 94 return patterns_->url_encoded_pattern; | |
| 95 } | |
| 96 | |
| 97 // Auxiliary constant for using RE2. Number of arguments for parsing | |
| 98 // name-value pairs (one for name, one for value). | |
| 99 static const size_t args_size_ = 2u; | |
| 100 static const net::UnescapeRule::Type unescape_rules_; | |
| 101 | |
| 102 re2::StringPiece source_; | |
| 103 bool source_set_; | |
| 104 bool source_malformed_; | |
| 105 | |
| 106 // Auxiliary store for using RE2. | |
| 107 std::string name_; | |
| 108 std::string value_; | |
| 109 const RE2::Arg arg_name_; | |
| 110 const RE2::Arg arg_value_; | |
| 111 const RE2::Arg* args_[args_size_]; | |
| 112 | |
| 113 // Caching the pointer to g_patterns.Get(). | |
| 114 const Patterns* patterns_; | |
| 115 | |
| 116 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded); | |
| 117 }; | |
| 118 | |
| 119 // The following class, FormDataParserMultipart, parses forms encoded as | |
| 120 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart | |
| 121 // encoding) and 5322 (MIME-headers). | |
| 122 // | |
| 123 // Implementation details | |
| 124 // | |
| 125 // The original grammar from RFC 2046 is this, "multipart-body" being the root | |
| 126 // non-terminal: | |
| 127 // | |
| 128 // boundary := 0*69<bchars> bcharsnospace | |
| 129 // bchars := bcharsnospace / " " | |
| 130 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," | |
| 131 // / "-" / "." / "/" / ":" / "=" / "?" | |
| 132 // dash-boundary := "--" boundary | |
| 133 // multipart-body := [preamble CRLF] | |
| 134 // dash-boundary transport-padding CRLF | |
| 135 // body-part *encapsulation | |
| 136 // close-delimiter transport-padding | |
| 137 // [CRLF epilogue] | |
| 138 // transport-padding := *LWSP-char | |
| 139 // encapsulation := delimiter transport-padding CRLF body-part | |
| 140 // delimiter := CRLF dash-boundary | |
| 141 // close-delimiter := delimiter "--" | |
| 142 // preamble := discard-text | |
| 143 // epilogue := discard-text | |
| 144 // discard-text := *(*text CRLF) *text | |
| 145 // body-part := MIME-part-headers [CRLF *OCTET] | |
| 146 // OCTET := <any 0-255 octet value> | |
| 147 // | |
| 148 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, | |
| 149 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the | |
| 150 // English alphabet, respectively. | |
| 151 // The non-terminal "text" is presumably just any text, excluding line breaks. | |
| 152 // The non-terminal "LWSP-char" is not directly defined in the original grammar | |
| 153 // but it means "linear whitespace", which is a space or a horizontal tab. | |
| 154 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use | |
| 155 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: | |
| 156 // | |
| 157 // MIME-part-headers := field-name ":" unstructured CRLF | |
| 158 // field-name := 1*ftext | |
| 159 // ftext := %d33-57 / ; Printable US-ASCII | |
| 160 // %d59-126 ; characters not including ":". | |
| 161 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which | |
| 162 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and | |
| 163 // "CRLF<horizontal tab>", which serve for "folding". | |
| 164 // | |
| 165 // The FormDataParseMultipart class reads the input source and tries to parse it | |
| 166 // according to the grammar above, rooted at the "multipart-body" non-terminal. | |
| 167 // This happens in stages: | |
| 168 // | |
| 169 // 1. The optional preamble and the initial dash-boundary with transport padding | |
| 170 // and a CRLF are read and ignored. | |
| 171 // | |
| 172 // 2. Repeatedly each body part is read. The body parts can either serve to | |
| 173 // upload a file, or just a string of bytes. | |
| 174 // 2.a. The headers of that part are searched for the "content-disposition" | |
| 175 // header, which contains the name of the value represented by that body | |
| 176 // part. If the body-part is for file upload, that header also contains a | |
| 177 // filename. | |
| 178 // 2.b. The "*OCTET" part of the body part is then read and passed as the value | |
| 179 // of the name-value pair for body parts representing a string of bytes. | |
| 180 // For body parts for uploading a file the "*OCTET" part is just ignored | |
| 181 // and the filename is used for value instead. | |
| 182 // | |
| 183 // 3. The final close-delimiter and epilogue are read and ignored. | |
| 184 // | |
| 185 // IMPORTANT NOTE | |
| 186 // This parser supports sources split into multiple chunks. Therefore SetSource | |
| 187 // can be called multiple times if the source is spread over several chunks. | |
| 188 // However, the split may only occur inside a body part, right after the | |
| 189 // trailing CRLF of headers. | |
| 190 class FormDataParserMultipart : public FormDataParser { | |
| 191 public: | |
| 192 explicit FormDataParserMultipart(const std::string& boundary_separator); | |
| 193 virtual ~FormDataParserMultipart(); | |
| 194 | |
| 195 // Implementation of FormDataParser. | |
| 196 virtual bool AllDataReadOK() OVERRIDE; | |
| 197 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
| 198 virtual bool SetSource(base::StringPiece source) OVERRIDE; | |
| 199 | |
| 200 private: | |
| 201 enum State { | |
| 202 STATE_INIT, // No input read yet. | |
| 203 STATE_READY, // Ready to call GetNextNameValue. | |
| 204 STATE_FINISHED, // Read the input until the end. | |
| 205 STATE_SUSPEND, // Waiting until a new |source_| is set. | |
| 206 STATE_ERROR | |
| 207 }; | |
| 208 | |
| 209 // Produces a regexp to match the string "--" + |literal|. The idea is to | |
| 210 // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed | |
| 211 // in "\\Q" and "\\E". The only catch is to watch out for occurences of "\\E" | |
| 212 // inside |literal|. Those must be excluded from the quote and the backslash | |
| 213 // doubly escaped. For example, for literal == "abc\\Edef" the result is | |
| 214 // "\\Q--abc\\E\\\\E\\Qdef\\E". | |
| 215 static std::string CreateBoundaryPatternFromLiteral( | |
| 216 const std::string& literal); | |
| 217 | |
| 218 // Tests whether |input| has a prefix matching |pattern|. | |
| 219 static bool StartsWithPattern(const re2::StringPiece& input, | |
| 220 const RE2& pattern); | |
| 221 | |
| 222 // If |source_| starts with a header, seeks |source_| beyond the header. If | |
| 223 // the header is Content-Disposition, extracts |name| from "name=" and | |
| 224 // possibly |value| from "filename=" fields of that header. Only if the | |
| 225 // "name" or "filename" fields are found, then |name| or |value| are touched. | |
| 226 // Returns true iff |source_| is seeked forward. Sets |value_assigned| | |
| 227 // to true iff |value| has been assigned to. | |
| 228 bool TryReadHeader(base::StringPiece* name, | |
| 229 base::StringPiece* value, | |
| 230 bool* value_assigned); | |
| 231 | |
| 232 // Helper to GetNextNameValue. Expects that the input starts with a data | |
| 233 // portion of a body part. An attempt is made to read the input until the end | |
| 234 // of that body part. If |data| is not NULL, it is set to contain the data | |
| 235 // portion. Returns true iff the reading was successful. | |
| 236 bool FinishReadingPart(base::StringPiece* data); | |
| 237 | |
| 238 // These methods could be even static, but then we would have to spend more | |
| 239 // code on initializing the cached pointer to g_patterns.Get(). | |
| 240 const RE2& transfer_padding_pattern() const { | |
| 241 return patterns_->transfer_padding_pattern; | |
| 242 } | |
| 243 const RE2& crlf_pattern() const { | |
| 244 return patterns_->crlf_pattern; | |
| 245 } | |
| 246 const RE2& closing_pattern() const { | |
| 247 return patterns_->closing_pattern; | |
| 248 } | |
| 249 const RE2& epilogue_pattern() const { | |
| 250 return patterns_->epilogue_pattern; | |
| 251 } | |
| 252 const RE2& crlf_free_pattern() const { | |
| 253 return patterns_->crlf_free_pattern; | |
| 254 } | |
| 255 const RE2& preamble_pattern() const { | |
| 256 return patterns_->preamble_pattern; | |
| 257 } | |
| 258 const RE2& header_pattern() const { | |
| 259 return patterns_->header_pattern; | |
| 260 } | |
| 261 const RE2& content_disposition_pattern() const { | |
| 262 return patterns_->content_disposition_pattern; | |
| 263 } | |
| 264 const RE2& name_pattern() const { | |
| 265 return patterns_->name_pattern; | |
| 266 } | |
| 267 const RE2& value_pattern() const { | |
| 268 return patterns_->value_pattern; | |
| 269 } | |
| 270 // However, this is used in a static method so it needs to be static. | |
| 271 static const RE2& unquote_pattern() { | |
| 272 return g_patterns.Get().unquote_pattern; // No caching g_patterns here. | |
| 273 } | |
| 274 | |
| 275 const RE2 dash_boundary_pattern_; | |
| 276 | |
| 277 // Because of initialisation dependency, |state_| needs to be declared after | |
| 278 // |dash_boundary_pattern_|. | |
| 279 State state_; | |
| 280 | |
| 281 // The parsed message can be split into multiple sources which we read | |
| 282 // sequentially. | |
| 283 re2::StringPiece source_; | |
| 284 | |
| 285 // Caching the pointer to g_patterns.Get(). | |
| 286 const Patterns* patterns_; | |
| 287 | |
| 288 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart); | |
| 289 }; | |
| 290 | |
| 291 FormDataParser::Result::Result() {} | |
| 292 FormDataParser::Result::~Result() {} | |
| 293 | |
| 294 FormDataParser::~FormDataParser() {} | |
| 295 | |
| 296 // static | |
| 297 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 298 const net::URLRequest& request) { | |
| 299 std::string value; | |
| 300 const bool found = request.extra_request_headers().GetHeader( | |
| 301 net::HttpRequestHeaders::kContentType, &value); | |
| 302 return CreateFromContentTypeHeader(found ? &value : NULL); | |
| 303 } | |
| 304 | |
| 305 // static | |
| 306 scoped_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader( | |
| 307 const std::string* content_type_header) { | |
| 308 enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE}; | |
| 309 ParserChoice choice = ERROR_CHOICE; | |
| 310 std::string boundary; | |
| 311 | |
| 312 if (content_type_header == NULL) { | |
| 313 choice = URL_ENCODED; | |
| 314 } else { | |
| 315 const std::string content_type( | |
| 316 content_type_header->substr(0, content_type_header->find(';'))); | |
| 317 | |
| 318 if (base::strcasecmp( | |
| 319 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
| 320 choice = URL_ENCODED; | |
| 321 } else if (base::strcasecmp( | |
| 322 content_type.c_str(), "multipart/form-data") == 0) { | |
| 323 static const char kBoundaryString[] = "boundary="; | |
| 324 size_t offset = content_type_header->find(kBoundaryString); | |
| 325 if (offset == std::string::npos) { | |
| 326 // Malformed header. | |
| 327 return scoped_ptr<FormDataParser>(); | |
| 328 } | |
| 329 offset += sizeof(kBoundaryString) - 1; | |
| 330 boundary = content_type_header->substr( | |
| 331 offset, content_type_header->find(';', offset)); | |
| 332 if (!boundary.empty()) | |
| 333 choice = MULTIPART; | |
| 334 } | |
| 335 } | |
| 336 // Other cases are unparseable, including when |content_type| is "text/plain". | |
| 337 | |
| 338 switch (choice) { | |
| 339 case URL_ENCODED: | |
| 340 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
| 341 case MULTIPART: | |
| 342 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
| 343 case ERROR_CHOICE: | |
| 344 return scoped_ptr<FormDataParser>(); | |
| 345 } | |
| 346 NOTREACHED(); // Some compilers do not believe this is unreachable. | |
| 347 return scoped_ptr<FormDataParser>(); | |
| 348 } | |
| 349 | |
| 350 FormDataParser::FormDataParser() {} | |
| 351 | |
| 352 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ = | |
| 353 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
| 354 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
| 355 | |
| 356 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
| 357 : source_(NULL), | |
| 358 source_set_(false), | |
| 359 source_malformed_(false), | |
| 360 arg_name_(&name_), | |
| 361 arg_value_(&value_), | |
| 362 patterns_(g_patterns.Pointer()) { | |
| 363 args_[0] = &arg_name_; | |
| 364 args_[1] = &arg_value_; | |
| 365 } | |
| 366 | |
| 367 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
| 368 | |
| 369 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
| 370 // All OK means we read the whole source. | |
| 371 return source_set_ && source_.empty() && !source_malformed_; | |
| 372 } | |
| 373 | |
| 374 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
| 375 if (!source_set_ || source_malformed_) | |
| 376 return false; | |
| 377 | |
| 378 bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_); | |
| 379 if (success) { | |
| 380 result->set_name(net::UnescapeURLComponent(name_, unescape_rules_)); | |
| 381 result->set_value(net::UnescapeURLComponent(value_, unescape_rules_)); | |
| 382 } | |
| 383 if (source_.length() > 0) { | |
| 384 if (source_[0] == '&') | |
| 385 source_.remove_prefix(1); // Remove the leading '&'. | |
| 386 else | |
| 387 source_malformed_ = true; // '&' missing between two name-value pairs. | |
| 388 } | |
| 389 return success && !source_malformed_; | |
| 390 } | |
| 391 | |
| 392 bool FormDataParserUrlEncoded::SetSource(base::StringPiece source) { | |
| 393 if (source_set_) | |
| 394 return false; // We do not allow multiple sources for this parser. | |
| 395 source_.set(source.data(), source.size()); | |
| 396 source_set_ = true; | |
| 397 source_malformed_ = false; | |
| 398 return true; | |
| 399 } | |
| 400 | |
| 401 // static | |
| 402 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral( | |
| 403 const std::string& literal) { | |
| 404 static const char quote[] = "\\Q"; | |
| 405 static const char unquote[] = "\\E"; | |
| 406 | |
| 407 // The result always starts with opening the qoute and then "--". | |
| 408 std::string result("\\Q--"); | |
| 409 | |
| 410 // This StringPiece is used below to record the next occurrence of "\\E" in | |
| 411 // |literal|. | |
| 412 re2::StringPiece seek_unquote(literal); | |
| 413 const char* copy_start = literal.data(); | |
| 414 size_t copy_length = literal.size(); | |
| 415 | |
| 416 // Find all "\\E" in |literal| and exclude them from the \Q...\E quote. | |
| 417 while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) { | |
| 418 copy_length = seek_unquote.data() - copy_start; | |
| 419 result.append(copy_start, copy_length); | |
| 420 result.append(kEscapeClosingQuote); | |
| 421 result.append(quote); | |
| 422 copy_start = seek_unquote.data(); | |
| 423 } | |
| 424 | |
| 425 // Finish the last \Q...\E quote. | |
| 426 copy_length = (literal.data() + literal.size()) - copy_start; | |
| 427 result.append(copy_start, copy_length); | |
| 428 result.append(unquote); | |
| 429 return result; | |
| 430 } | |
| 431 | |
| 432 // static | |
| 433 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input, | |
| 434 const RE2& pattern) { | |
| 435 return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0); | |
| 436 } | |
| 437 | |
| 438 FormDataParserMultipart::FormDataParserMultipart( | |
| 439 const std::string& boundary_separator) | |
| 440 : dash_boundary_pattern_( | |
| 441 CreateBoundaryPatternFromLiteral(boundary_separator)), | |
| 442 state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR), | |
| 443 patterns_(g_patterns.Pointer()) {} | |
| 444 | |
| 445 FormDataParserMultipart::~FormDataParserMultipart() {} | |
| 446 | |
| 447 bool FormDataParserMultipart::AllDataReadOK() { | |
| 448 return state_ == STATE_FINISHED; | |
| 449 } | |
| 450 | |
| 451 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) { | |
| 452 const char* data_start = source_.data(); | |
| 453 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
| 454 if (!RE2::Consume(&source_, crlf_free_pattern()) || | |
| 455 !RE2::Consume(&source_, crlf_pattern())) { | |
| 456 state_ = STATE_ERROR; | |
| 457 return false; | |
| 458 } | |
| 459 } | |
| 460 if (data != NULL) { | |
| 461 if (source_.data() == data_start) { | |
| 462 // No data in this body part. | |
| 463 state_ = STATE_ERROR; | |
| 464 return false; | |
| 465 } | |
| 466 // Subtract 2 for the trailing "\r\n". | |
| 467 data->set(data_start, source_.data() - data_start - 2); | |
| 468 } | |
| 469 | |
| 470 // Finally, read the dash-boundary and either skip to the next body part, or | |
| 471 // finish reading the source. | |
| 472 CHECK(RE2::Consume(&source_, dash_boundary_pattern_)); | |
| 473 if (StartsWithPattern(source_, closing_pattern())) { | |
| 474 CHECK(RE2::Consume(&source_, closing_pattern())); | |
| 475 if (RE2::Consume(&source_, epilogue_pattern())) | |
| 476 state_ = STATE_FINISHED; | |
| 477 else | |
| 478 state_ = STATE_ERROR; | |
| 479 } else { // Next body part ahead. | |
| 480 if (!RE2::Consume(&source_, transfer_padding_pattern())) | |
| 481 state_ = STATE_ERROR; | |
| 482 } | |
| 483 return state_ != STATE_ERROR; | |
| 484 } | |
| 485 | |
| 486 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
| 487 if (source_.empty() || state_ != STATE_READY) | |
| 488 return false; | |
| 489 | |
| 490 // 1. Read body-part headers. | |
| 491 base::StringPiece name; | |
| 492 base::StringPiece value; | |
| 493 bool value_assigned = false; | |
| 494 bool value_assigned_temp; | |
| 495 while (TryReadHeader(&name, &value, &value_assigned_temp)) | |
| 496 value_assigned |= value_assigned_temp; | |
| 497 if (name.empty() || state_ == STATE_ERROR) { | |
| 498 state_ = STATE_ERROR; | |
| 499 return false; | |
| 500 } | |
| 501 | |
| 502 // 2. Read the trailing CRLF after headers. | |
| 503 if (!RE2::Consume(&source_, crlf_pattern())) { | |
| 504 state_ = STATE_ERROR; | |
| 505 return false; | |
| 506 } | |
| 507 | |
| 508 // 3. Read the data of this body part, i.e., everything until the first | |
| 509 // dash-boundary. | |
| 510 bool return_value; | |
| 511 if (value_assigned && source_.empty()) { // Wait for a new source? | |
| 512 return_value = true; | |
| 513 state_ = STATE_SUSPEND; | |
| 514 } else { | |
| 515 return_value = FinishReadingPart(value_assigned ? NULL : &value); | |
| 516 } | |
| 517 | |
| 518 std::string unescaped_name = net::UnescapeURLComponent( | |
| 519 name.as_string(), | |
| 520 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS); | |
| 521 result->set_name(unescaped_name); | |
| 522 result->set_value(value); | |
| 523 | |
| 524 return return_value; | |
| 525 } | |
| 526 | |
| 527 bool FormDataParserMultipart::SetSource(base::StringPiece source) { | |
| 528 if (source.data() == NULL || !source_.empty()) | |
| 529 return false; | |
| 530 source_.set(source.data(), source.size()); | |
| 531 | |
| 532 switch (state_) { | |
| 533 case STATE_INIT: | |
| 534 // Seek behind the preamble. | |
| 535 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
| 536 if (!RE2::Consume(&source_, preamble_pattern())) { | |
| 537 state_ = STATE_ERROR; | |
| 538 break; | |
| 539 } | |
| 540 } | |
| 541 // Read dash-boundary, transfer padding, and CRLF. | |
| 542 if (state_ != STATE_ERROR) { | |
| 543 if (!RE2::Consume(&source_, dash_boundary_pattern_) || | |
| 544 !RE2::Consume(&source_, transfer_padding_pattern())) | |
| 545 state_ = STATE_ERROR; | |
| 546 else | |
| 547 state_ = STATE_READY; | |
| 548 } | |
| 549 break; | |
| 550 case STATE_READY: // Nothing to do. | |
| 551 break; | |
| 552 case STATE_SUSPEND: | |
| 553 state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR; | |
| 554 break; | |
| 555 default: | |
| 556 state_ = STATE_ERROR; | |
| 557 } | |
| 558 return state_ != STATE_ERROR; | |
| 559 } | |
| 560 | |
| 561 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name, | |
| 562 base::StringPiece* value, | |
| 563 bool* value_assigned) { | |
| 564 *value_assigned = false; | |
| 565 const char* header_start = source_.data(); | |
| 566 if (!RE2::Consume(&source_, header_pattern())) | |
| 567 return false; | |
| 568 // (*) After this point we must return true, because we consumed one header. | |
| 569 | |
| 570 // Subtract 2 for the trailing "\r\n". | |
| 571 re2::StringPiece header(header_start, source_.data() - header_start - 2); | |
| 572 | |
| 573 if (!StartsWithPattern(header, content_disposition_pattern())) | |
| 574 return true; // Skip headers that don't describe the content-disposition. | |
| 575 | |
| 576 re2::StringPiece groups[2]; | |
| 577 | |
| 578 if (!name_pattern().Match(header, | |
| 579 kContentDispositionLength, header.size(), | |
| 580 RE2::UNANCHORED, groups, 2)) { | |
| 581 state_ = STATE_ERROR; | |
| 582 return true; // See (*) for why true. | |
| 583 } | |
| 584 name->set(groups[1].data(), groups[1].size()); | |
| 585 | |
| 586 if (value_pattern().Match(header, | |
| 587 kContentDispositionLength, header.size(), | |
| 588 RE2::UNANCHORED, groups, 2)) { | |
| 589 value->set(groups[1].data(), groups[1].size()); | |
| 590 *value_assigned = true; | |
| 591 } | |
| 592 return true; | |
| 593 } | |
| 594 | |
| 595 } // namespace extensions | |
| OLD | NEW |