OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" |
| 6 |
| 7 #include <vector> |
| 8 |
| 9 #include "base/lazy_instance.h" |
| 10 #include "base/string_util.h" |
| 11 #include "base/values.h" |
| 12 #include "net/base/escape.h" |
| 13 #include "net/url_request/url_request.h" |
| 14 #include "third_party/re2/re2/re2.h" |
| 15 |
| 16 using base::DictionaryValue; |
| 17 using base::ListValue; |
| 18 using base::StringPiece; |
| 19 using re2::RE2; |
| 20 |
| 21 namespace extensions { |
| 22 |
| 23 namespace { |
| 24 |
| 25 #define CONTENT_DISPOSITION "content-disposition:" |
| 26 |
| 27 static const char g_escape_closing_quote[] = "\\\\E"; |
| 28 static const size_t g_content_disposition_length = |
| 29 sizeof(CONTENT_DISPOSITION) - 1; |
| 30 |
| 31 // A wrapper struct for static RE2 objects to be held as LazyInstance. |
| 32 struct Patterns { |
| 33 Patterns(); |
| 34 ~Patterns(); |
| 35 const RE2 transfer_padding_pattern; |
| 36 const RE2 crlf_pattern; |
| 37 const RE2 closing_pattern; |
| 38 const RE2 epilogue_pattern; |
| 39 const RE2 crlf_free_pattern; |
| 40 const RE2 preamble_pattern; |
| 41 const RE2 header_pattern; |
| 42 const RE2 content_disposition_pattern; |
| 43 const RE2 name_pattern; |
| 44 const RE2 value_pattern; |
| 45 const RE2 unquote_pattern; |
| 46 const RE2 url_encoded_pattern; |
| 47 }; |
| 48 |
| 49 Patterns::Patterns() |
| 50 : transfer_padding_pattern("[ \\t]*\\r\\n"), |
| 51 crlf_pattern("\\r\\n"), |
| 52 closing_pattern("--[ \\t]*"), |
| 53 epilogue_pattern("|\\r\\n(?s:.)*"), |
| 54 crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"), |
| 55 preamble_pattern(".*?"), |
| 56 header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"), |
| 57 content_disposition_pattern("(?i:" CONTENT_DISPOSITION ")"), |
| 58 name_pattern("\\bname=\"([^\"]*)\""), |
| 59 value_pattern("\\bfilename=\"([^\"]*)\""), |
| 60 unquote_pattern(g_escape_closing_quote), |
| 61 url_encoded_pattern("([^=]*)=([^&]*)&?") {} |
| 62 |
| 63 #undef CONTENT_DISPOSITION |
| 64 |
| 65 Patterns::~Patterns() {} |
| 66 |
| 67 static base::LazyInstance<Patterns>::Leaky g_patterns = |
| 68 LAZY_INSTANCE_INITIALIZER; |
| 69 |
| 70 } // namespace |
| 71 |
| 72 // Parses URLencoded forms, see |
| 73 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . |
| 74 class FormDataParserUrlEncoded : public FormDataParser { |
| 75 public: |
| 76 FormDataParserUrlEncoded(); |
| 77 virtual ~FormDataParserUrlEncoded(); |
| 78 |
| 79 // Implementation of FormDataParser. |
| 80 virtual bool AllDataReadOK() OVERRIDE; |
| 81 virtual bool GetNextNameValue(Result* result) OVERRIDE; |
| 82 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; |
| 83 |
| 84 private: |
| 85 // The pattern to match a single name-value pair. This could be even static, |
| 86 // but then we would have to spend more code on initializing the cached |
| 87 // pointer to g_patterns.Get(). |
| 88 const RE2& pattern() const { |
| 89 return patterns_->url_encoded_pattern; |
| 90 } |
| 91 |
| 92 // Auxiliary constant for using RE2. Number of arguments for parsing |
| 93 // name-value pairs (one for name, one for value). |
| 94 static const size_t args_size_ = 2u; |
| 95 static const net::UnescapeRule::Type unescape_rules_; |
| 96 |
| 97 re2::StringPiece source_; |
| 98 bool source_set_; |
| 99 |
| 100 // Auxiliary store for using RE2. |
| 101 std::string name_; |
| 102 std::string value_; |
| 103 const RE2::Arg arg_name_; |
| 104 const RE2::Arg arg_value_; |
| 105 const RE2::Arg* args_[args_size_]; |
| 106 |
| 107 // Caching the pointer to g_patterns.Get(). |
| 108 const Patterns* patterns_; |
| 109 |
| 110 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded); |
| 111 }; |
| 112 |
| 113 // The following class, FormDataParserMultipart, parses forms encoded as |
| 114 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart |
| 115 // encoding) and 5322 (MIME-headers). |
| 116 // |
| 117 // Implementation details |
| 118 // |
| 119 // The original grammar from RFC 2046 is this, "multipart-body" being the root |
| 120 // non-terminal: |
| 121 // |
| 122 // boundary := 0*69<bchars> bcharsnospace |
| 123 // bchars := bcharsnospace / " " |
| 124 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," |
| 125 // / "-" / "." / "/" / ":" / "=" / "?" |
| 126 // dash-boundary := "--" boundary |
| 127 // multipart-body := [preamble CRLF] |
| 128 // dash-boundary transport-padding CRLF |
| 129 // body-part *encapsulation |
| 130 // close-delimiter transport-padding |
| 131 // [CRLF epilogue] |
| 132 // transport-padding := *LWSP-char |
| 133 // encapsulation := delimiter transport-padding CRLF body-part |
| 134 // delimiter := CRLF dash-boundary |
| 135 // close-delimiter := delimiter "--" |
| 136 // preamble := discard-text |
| 137 // epilogue := discard-text |
| 138 // discard-text := *(*text CRLF) *text |
| 139 // body-part := MIME-part-headers [CRLF *OCTET] |
| 140 // OCTET := <any 0-255 octet value> |
| 141 // |
| 142 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, |
| 143 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the |
| 144 // English alphabet, respectively. |
| 145 // The non-terminal "text" is presumably just any text, excluding line breaks. |
| 146 // The non-terminal "LWSP-char" is not directly defined in the original grammar |
| 147 // but it means "linear whitespace", which is a space or a horizontal tab. |
| 148 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use |
| 149 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: |
| 150 // |
| 151 // MIME-part-headers := field-name ":" unstructured CRLF |
| 152 // field-name := 1*ftext |
| 153 // ftext := %d33-57 / ; Printable US-ASCII |
| 154 // %d59-126 ; characters not including ":". |
| 155 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which |
| 156 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and |
| 157 // "CRLF<horizontal tab>", which serve for "folding". |
| 158 // |
| 159 // The FormDataParseMultipart class reads the input source and tries to parse it |
| 160 // according to the grammar above, rooted at the "multipart-body" non-terminal. |
| 161 // This happens in stages: |
| 162 // |
| 163 // 1. The optional preamble and the initial dash-boundary with transport padding |
| 164 // and a CRLF are read and ignored. |
| 165 // |
| 166 // 2. Repeatedly each body part is read. The body parts can either serve to |
| 167 // upload a file, or just a string of bytes. |
| 168 // 2.a. The headers of that part are searched for the "content-disposition" |
| 169 // header, which contains the name of the value represented by that body |
| 170 // part. If the body-part is for file upload, that header also contains a |
| 171 // filename. |
| 172 // 2.b. The "*OCTET" part of the body part is then read and passed as the value |
| 173 // of the name-value pair for body parts representing a string of bytes. |
| 174 // For body parts for uploading a file the "*OCTET" part is just ignored |
| 175 // and the filename is used for value instead. |
| 176 // |
| 177 // 3. The final close-delimiter and epilogue are read and ignored. |
| 178 // |
| 179 // IMPORTANT NOTE |
| 180 // This parser supports multiple sources, i.e., SetSource can be called multiple |
| 181 // times if the input is spread over several byte blocks. However, the split |
| 182 // may only occur inside a body part, right after the trailing CRLF of headers. |
| 183 class FormDataParserMultipart : public FormDataParser { |
| 184 public: |
| 185 explicit FormDataParserMultipart(const std::string& boundary_separator); |
| 186 virtual ~FormDataParserMultipart(); |
| 187 |
| 188 // Implementation of FormDataParser. |
| 189 virtual bool AllDataReadOK() OVERRIDE; |
| 190 virtual bool GetNextNameValue(Result* result) OVERRIDE; |
| 191 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; |
| 192 |
| 193 private: |
| 194 enum State { |
| 195 STATE_INIT, // No input read yet. |
| 196 STATE_READY, // Ready to call GetNextNameValue. |
| 197 STATE_FINISHED, // Read the input until the end. |
| 198 STATE_SUSPEND, // Waiting until a new |source_| is set. |
| 199 STATE_ERROR |
| 200 }; |
| 201 |
| 202 // Produces a regexp to match the string "--" + |literal|. The idea is to |
| 203 // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed |
| 204 // in "\\Q" and "\\E". The only catch is to watch out ofr occurences of "\\E" |
| 205 // inside |literal|. Those must be excluded from the quote and the backslash |
| 206 // doubly escaped. For example, for literal == "abc\\Edef" the result is |
| 207 // "\\Q--abc\\E\\\\E\\Qdef\\E". |
| 208 static std::string CreateBoundaryPatternFromLiteral( |
| 209 const std::string& literal); |
| 210 |
| 211 // Tests whether |input| has a prefix matching |pattern|. |
| 212 static bool StartsWithPattern(const re2::StringPiece& input, |
| 213 const RE2& pattern); |
| 214 |
| 215 // If |source_| starts with a header, seeks |source_| beyond the header. If |
| 216 // the header is Content-Disposition, extracts |name| from "name=" and |
| 217 // possibly |value| from "filename=" fields of that header. Only if the |
| 218 // "name" or "filename" fields are found, then |name| or |value| are touched. |
| 219 // Returns true iff |source_| is seeked forward. Sets |value_assigned| |
| 220 // to true iff |value| has been assigned to. |
| 221 bool TryReadHeader(base::StringPiece* name, |
| 222 base::StringPiece* value, |
| 223 bool* value_assigned); |
| 224 |
| 225 // Helper to GetNextNameValue. Expects that the input starts with a data |
| 226 // portion of a body part. An attempt is made to read the input until the end |
| 227 // of that body part. If |data| is not NULL, it is set to contain the data |
| 228 // portion. Returns true iff the reading was successful. |
| 229 bool FinishReadingPart(base::StringPiece* data); |
| 230 |
| 231 // These methods could be even static, but then we would have to spend more |
| 232 // code on initializing the cached pointer to g_patterns.Get(). |
| 233 const RE2& transfer_padding_pattern() const { |
| 234 return patterns_->transfer_padding_pattern; |
| 235 } |
| 236 const RE2& crlf_pattern() const { |
| 237 return patterns_->crlf_pattern; |
| 238 } |
| 239 const RE2& closing_pattern() const { |
| 240 return patterns_->closing_pattern; |
| 241 } |
| 242 const RE2& epilogue_pattern() const { |
| 243 return patterns_->epilogue_pattern; |
| 244 } |
| 245 const RE2& crlf_free_pattern() const { |
| 246 return patterns_->crlf_free_pattern; |
| 247 } |
| 248 const RE2& preamble_pattern() const { |
| 249 return patterns_->preamble_pattern; |
| 250 } |
| 251 const RE2& header_pattern() const { |
| 252 return patterns_->header_pattern; |
| 253 } |
| 254 const RE2& content_disposition_pattern() const { |
| 255 return patterns_->content_disposition_pattern; |
| 256 } |
| 257 const RE2& name_pattern() const { |
| 258 return patterns_->name_pattern; |
| 259 } |
| 260 const RE2& value_pattern() const { |
| 261 return patterns_->value_pattern; |
| 262 } |
| 263 // However, this is used in a static method so it needs to be static. |
| 264 static const RE2& unquote_pattern() { |
| 265 return g_patterns.Get().unquote_pattern; // No caching g_patterns here. |
| 266 } |
| 267 |
| 268 const RE2 dash_boundary_pattern_; |
| 269 |
| 270 // Because of initialisation dependency, |state_| needs to be declared after |
| 271 // |dash_boundary_pattern_|. |
| 272 State state_; |
| 273 |
| 274 // The parsed message can be split into multiple sources which we read |
| 275 // sequentially. |
| 276 re2::StringPiece source_; |
| 277 |
| 278 // Caching the pointer to g_patterns.Get(). |
| 279 const Patterns* patterns_; |
| 280 |
| 281 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart); |
| 282 }; |
| 283 |
| 284 // Implementation of FormDataParser and FormDataParser::Result. |
| 285 |
| 286 FormDataParser::Result::Result() {} |
| 287 FormDataParser::Result::~Result() {} |
| 288 |
| 289 void FormDataParser::Result::Reset() { |
| 290 name_.erase(); |
| 291 value_.erase(); |
| 292 } |
| 293 |
| 294 FormDataParser::~FormDataParser() {} |
| 295 |
| 296 // static |
| 297 scoped_ptr<FormDataParser> FormDataParser::Create( |
| 298 const net::URLRequest* request) { |
| 299 std::string value; |
| 300 const bool found = request->extra_request_headers().GetHeader( |
| 301 net::HttpRequestHeaders::kContentType, &value); |
| 302 return Create(found ? &value : NULL); |
| 303 } |
| 304 |
| 305 // static |
| 306 scoped_ptr<FormDataParser> FormDataParser::Create( |
| 307 const std::string* content_type_header) { |
| 308 enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE}; |
| 309 ParserChoice choice = ERROR_CHOICE; |
| 310 std::string boundary; |
| 311 |
| 312 if (content_type_header == NULL) { |
| 313 choice = URL_ENCODED; |
| 314 } else { |
| 315 const std::string content_type( |
| 316 content_type_header->substr(0, content_type_header->find(';'))); |
| 317 |
| 318 if (base::strcasecmp( |
| 319 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { |
| 320 choice = URL_ENCODED; |
| 321 } else if (base::strcasecmp( |
| 322 content_type.c_str(), "multipart/form-data") == 0) { |
| 323 static const char kBoundaryString[] = "boundary="; |
| 324 size_t offset = content_type_header->find(kBoundaryString); |
| 325 if (offset == std::string::npos) { |
| 326 // Malformed header. |
| 327 return scoped_ptr<FormDataParser>(); |
| 328 } |
| 329 offset += sizeof(kBoundaryString) - 1; |
| 330 boundary = content_type_header->substr( |
| 331 offset, content_type_header->find(';', offset)); |
| 332 if (!boundary.empty()) |
| 333 choice = MULTIPART; |
| 334 } |
| 335 } |
| 336 // Other cases are unparseable, including when |content_type| is "text/plain". |
| 337 |
| 338 switch (choice) { |
| 339 case URL_ENCODED: |
| 340 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); |
| 341 case MULTIPART: |
| 342 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); |
| 343 default: // In other words, case ERROR_CHOICE: |
| 344 return scoped_ptr<FormDataParser>(); |
| 345 } |
| 346 } |
| 347 |
| 348 FormDataParser::FormDataParser() {} |
| 349 |
| 350 // Implementation of FormDataParserUrlEncoded. |
| 351 |
| 352 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ = |
| 353 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | |
| 354 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; |
| 355 |
| 356 FormDataParserUrlEncoded::FormDataParserUrlEncoded() |
| 357 : source_(NULL), |
| 358 source_set_(false), |
| 359 arg_name_(&name_), |
| 360 arg_value_(&value_), |
| 361 patterns_(&(g_patterns.Get())) { |
| 362 args_[0] = &arg_name_; |
| 363 args_[1] = &arg_value_; |
| 364 } |
| 365 |
| 366 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} |
| 367 |
| 368 bool FormDataParserUrlEncoded::AllDataReadOK() { |
| 369 // All OK means we read the whole source. |
| 370 return source_set_ && source_.size() == 0; |
| 371 } |
| 372 |
| 373 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { |
| 374 if (!source_set_) |
| 375 return false; |
| 376 |
| 377 bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_); |
| 378 if (success) { |
| 379 result->set_name(net::UnescapeURLComponent(name_, unescape_rules_)); |
| 380 result->set_value(net::UnescapeURLComponent(value_, unescape_rules_)); |
| 381 } |
| 382 return success; |
| 383 } |
| 384 |
| 385 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) { |
| 386 if (source_set_) |
| 387 return false; // We do not allow multiple sources for this parser. |
| 388 source_.set(source.data(), source.size()); |
| 389 source_set_ = true; |
| 390 return true; |
| 391 } |
| 392 |
| 393 // Implementation of FormDataParserMultipart. |
| 394 |
| 395 // static |
| 396 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral( |
| 397 const std::string& literal) { |
| 398 #define OPEN_QUOTE "\\Q" |
| 399 static const char quote[] = OPEN_QUOTE; |
| 400 static const char unquote[] = "\\E"; |
| 401 |
| 402 // The result always starts with opening the qoute and then "--". |
| 403 std::string result(OPEN_QUOTE "--"); |
| 404 #undef OPEN_QUOTE |
| 405 |
| 406 // This StringPiece is used below to record the next occurrence of "\\E" in |
| 407 // |literal|. |
| 408 re2::StringPiece seek_unquote(literal); |
| 409 const char* copy_start = literal.data(); |
| 410 size_t copy_length = literal.size(); |
| 411 |
| 412 // Find all "\\E" in |literal| and exclude them from the \Q...\E quote. |
| 413 while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) { |
| 414 copy_length = seek_unquote.data() - copy_start; |
| 415 result.append(copy_start, copy_length); |
| 416 result.append(g_escape_closing_quote); |
| 417 result.append(quote); |
| 418 copy_start = seek_unquote.data(); |
| 419 } |
| 420 |
| 421 // Finish the last \Q...\E quote. |
| 422 copy_length = (literal.data() + literal.size()) - copy_start; |
| 423 result.append(copy_start, copy_length); |
| 424 result.append(unquote); |
| 425 return result; |
| 426 } |
| 427 |
| 428 // static |
| 429 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input, |
| 430 const RE2& pattern) { |
| 431 return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0); |
| 432 } |
| 433 |
| 434 FormDataParserMultipart::FormDataParserMultipart( |
| 435 const std::string& boundary_separator) |
| 436 : dash_boundary_pattern_( |
| 437 CreateBoundaryPatternFromLiteral(boundary_separator)), |
| 438 state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR), |
| 439 patterns_(&(g_patterns.Get())) {} |
| 440 |
| 441 FormDataParserMultipart::~FormDataParserMultipart() {} |
| 442 |
| 443 bool FormDataParserMultipart::AllDataReadOK() { |
| 444 return state_ == STATE_FINISHED; |
| 445 } |
| 446 |
| 447 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) { |
| 448 const char* data_start = source_.data(); |
| 449 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { |
| 450 if (!RE2::Consume(&source_, crlf_free_pattern()) || |
| 451 !RE2::Consume(&source_, crlf_pattern())) { |
| 452 state_ = STATE_ERROR; |
| 453 return false; |
| 454 } |
| 455 } |
| 456 if (data != NULL) { |
| 457 if (source_.data() == data_start) { |
| 458 // No data in this body part. |
| 459 state_ = STATE_ERROR; |
| 460 return false; |
| 461 } |
| 462 // Subtract 2u for the trailing "\r\n". |
| 463 data->set(data_start, source_.data() - data_start - 2u); |
| 464 } |
| 465 |
| 466 // Finally, read the dash-boundary and either skip to the next body part, or |
| 467 // finish reading the source. |
| 468 CHECK(RE2::Consume(&source_, dash_boundary_pattern_)); |
| 469 if (StartsWithPattern(source_, closing_pattern())) { |
| 470 CHECK(RE2::Consume(&source_, closing_pattern())); |
| 471 if (RE2::Consume(&source_, epilogue_pattern())) |
| 472 state_ = STATE_FINISHED; |
| 473 else |
| 474 state_ = STATE_ERROR; |
| 475 } else { // Next body part ahead. |
| 476 if (!RE2::Consume(&source_, transfer_padding_pattern())) |
| 477 state_ = STATE_ERROR; |
| 478 } |
| 479 return state_ != STATE_ERROR; |
| 480 } |
| 481 |
| 482 bool FormDataParserMultipart::GetNextNameValue(Result* result) { |
| 483 if (source_.size() == 0 || state_ != STATE_READY) |
| 484 return false; |
| 485 |
| 486 // 1. Read body-part headers. |
| 487 base::StringPiece name; |
| 488 base::StringPiece value; |
| 489 bool value_assigned = false; |
| 490 bool value_assigned_temp; |
| 491 while (TryReadHeader(&name, &value, &value_assigned_temp)) |
| 492 value_assigned |= value_assigned_temp; |
| 493 if (name.size() == 0 || state_ == STATE_ERROR) { |
| 494 state_ = STATE_ERROR; |
| 495 return false; |
| 496 } |
| 497 |
| 498 // 2. Read the trailing CRLF after headers. |
| 499 if (!RE2::Consume(&source_, crlf_pattern())) { |
| 500 state_ = STATE_ERROR; |
| 501 return false; |
| 502 } |
| 503 |
| 504 // 3. Read the data of this body part, i.e., everything until the first |
| 505 // dash-boundary. |
| 506 bool return_value; |
| 507 if (value_assigned && source_.size() == 0) { // Wait for a new source? |
| 508 return_value = true; |
| 509 state_ = STATE_SUSPEND; |
| 510 } else { |
| 511 return_value = FinishReadingPart(value_assigned ? NULL : &value); |
| 512 } |
| 513 |
| 514 std::string unescaped_name = net::UnescapeURLComponent( |
| 515 name.as_string(), |
| 516 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS); |
| 517 result->set_name(unescaped_name); |
| 518 result->set_value(value); |
| 519 |
| 520 return return_value; |
| 521 } |
| 522 |
| 523 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) { |
| 524 if (source.data() == NULL || source_.size() != 0) |
| 525 return false; |
| 526 source_.set(source.data(), source.size()); |
| 527 |
| 528 switch (state_) { |
| 529 case STATE_INIT: |
| 530 // Seek behind the preamble. |
| 531 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { |
| 532 if (!RE2::Consume(&source_, preamble_pattern())) { |
| 533 state_ = STATE_ERROR; |
| 534 break; |
| 535 } |
| 536 } |
| 537 // Read dash-boundary, transfer padding, and CRLF. |
| 538 if (state_ != STATE_ERROR) { |
| 539 if (!RE2::Consume(&source_, dash_boundary_pattern_) || |
| 540 !RE2::Consume(&source_, transfer_padding_pattern())) |
| 541 state_ = STATE_ERROR; |
| 542 else |
| 543 state_ = STATE_READY; |
| 544 } |
| 545 break; |
| 546 case STATE_READY: // Nothing to do. |
| 547 break; |
| 548 case STATE_SUSPEND: |
| 549 state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR; |
| 550 break; |
| 551 default: |
| 552 state_ = STATE_ERROR; |
| 553 } |
| 554 return state_ != STATE_ERROR; |
| 555 } |
| 556 |
| 557 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name, |
| 558 base::StringPiece* value, |
| 559 bool* value_assigned) { |
| 560 *value_assigned = false; |
| 561 const char* header_start = source_.data(); |
| 562 if (!RE2::Consume(&source_, header_pattern())) |
| 563 return false; |
| 564 // (*) After this point we must return true, because we consumed one header. |
| 565 |
| 566 // Subtract 2u for the trailing "\r\n". |
| 567 re2::StringPiece header(header_start, source_.data() - header_start - 2u); |
| 568 |
| 569 if (!StartsWithPattern(header, content_disposition_pattern())) |
| 570 return true; // Skip headers that don't describe the content-disposition. |
| 571 |
| 572 re2::StringPiece groups[2u]; |
| 573 |
| 574 if (!name_pattern().Match(header, |
| 575 g_content_disposition_length, header.size(), |
| 576 RE2::UNANCHORED, groups, 2)) { |
| 577 state_ = STATE_ERROR; |
| 578 return true; // See (*) for why true. |
| 579 } |
| 580 name->set(groups[1].data(), groups[1].size()); |
| 581 |
| 582 if (value_pattern().Match(header, |
| 583 g_content_disposition_length, header.size(), |
| 584 RE2::UNANCHORED, groups, 2)) { |
| 585 value->set(groups[1].data(), groups[1].size()); |
| 586 *value_assigned = true; |
| 587 } |
| 588 return true; |
| 589 } |
| 590 |
| 591 } // namespace extensions |
OLD | NEW |