Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
| 6 | |
| 7 #include <vector> | |
| 8 | |
| 9 #include "base/lazy_instance.h" | |
| 10 #include "base/string_util.h" | |
| 11 #include "base/values.h" | |
| 12 #include "net/base/escape.h" | |
| 13 #include "net/url_request/url_request.h" | |
| 14 #include "third_party/re2/re2/re2.h" | |
| 15 | |
| 16 using base::DictionaryValue; | |
| 17 using base::ListValue; | |
| 18 using base::StringPiece; | |
| 19 using re2::RE2; | |
| 20 | |
| 21 namespace extensions { | |
| 22 | |
| 23 namespace { | |
| 24 | |
| 25 #define CONTENT_DISPOSITION "content-disposition:" | |
| 26 | |
| 27 static const char g_escape_closing_quote[] = "\\\\E"; | |
| 28 static const size_t g_content_disposition_length = | |
| 29 sizeof(CONTENT_DISPOSITION) - 1; | |
| 30 | |
| 31 // A wrapper struct for static RE2 objects to be held as LazyInstance. | |
| 32 struct Patterns { | |
| 33 Patterns(); | |
| 34 ~Patterns(); | |
| 35 const RE2 transfer_padding_pattern; | |
| 36 const RE2 crlf_pattern; | |
| 37 const RE2 closing_pattern; | |
| 38 const RE2 epilogue_pattern; | |
| 39 const RE2 crlf_free_pattern; | |
| 40 const RE2 preamble_pattern; | |
| 41 const RE2 header_pattern; | |
| 42 const RE2 content_disposition_pattern; | |
| 43 const RE2 name_pattern; | |
| 44 const RE2 value_pattern; | |
| 45 const RE2 unquote_pattern; | |
| 46 const RE2 url_encoded_pattern; | |
| 47 }; | |
| 48 | |
| 49 Patterns::Patterns() | |
| 50 : transfer_padding_pattern("[ \\t]*\\r\\n"), | |
| 51 crlf_pattern("\\r\\n"), | |
| 52 closing_pattern("--[ \\t]*"), | |
| 53 epilogue_pattern("|\\r\\n(?s:.)*"), | |
| 54 crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"), | |
| 55 preamble_pattern(".*?"), | |
| 56 header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"), | |
| 57 content_disposition_pattern("(?i:" CONTENT_DISPOSITION ")"), | |
| 58 name_pattern("\\bname=\"([^\"]*)\""), | |
| 59 value_pattern("\\bfilename=\"([^\"]*)\""), | |
| 60 unquote_pattern(g_escape_closing_quote), | |
| 61 url_encoded_pattern("([^=]*)=([^&]*)&?") {} | |
| 62 | |
| 63 #undef CONTENT_DISPOSITION | |
| 64 | |
| 65 Patterns::~Patterns() {} | |
| 66 | |
| 67 static base::LazyInstance<Patterns>::Leaky g_patterns = | |
| 68 LAZY_INSTANCE_INITIALIZER; | |
| 69 | |
| 70 } // namespace | |
| 71 | |
| 72 // Parses URLencoded forms, see | |
| 73 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . | |
| 74 class FormDataParserUrlEncoded : public FormDataParser { | |
| 75 public: | |
| 76 FormDataParserUrlEncoded(); | |
| 77 virtual ~FormDataParserUrlEncoded(); | |
| 78 | |
| 79 // Implementation of FormDataParser. | |
| 80 virtual bool AllDataReadOK() OVERRIDE; | |
| 81 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
| 82 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; | |
| 83 | |
| 84 private: | |
| 85 // The pattern to match a single name-value pair. This could be even static, | |
| 86 // but then we would have to spend more code on initializing the cached | |
| 87 // pointer to g_patterns.Get() . | |
| 88 const RE2& pattern() const { | |
| 89 return patterns_->url_encoded_pattern; | |
| 90 } | |
| 91 | |
| 92 // Auxiliary constant for using RE2. Number of arguments for parsing | |
| 93 // name-value pairs (one for name, one for value). | |
| 94 static const size_t args_size_ = 2u; | |
| 95 static const net::UnescapeRule::Type unescape_rules_; | |
| 96 | |
| 97 re2::StringPiece source_; | |
| 98 bool source_set_; | |
| 99 | |
| 100 // Auxiliary store for using RE2. | |
| 101 std::string name_; | |
| 102 std::string value_; | |
| 103 const RE2::Arg arg_name_; | |
| 104 const RE2::Arg arg_value_; | |
| 105 const RE2::Arg* args_[args_size_]; | |
| 106 | |
| 107 // Caching the pointer to g_patterns.Get() . | |
| 108 const Patterns* patterns_; | |
| 109 | |
| 110 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded); | |
| 111 }; | |
| 112 | |
| 113 // The following class, FormDataParserMultipart, parses forms encoded as | |
| 114 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart | |
| 115 // encoding) and 5322 (MIME-headers). | |
| 116 // | |
| 117 // Implementation details | |
| 118 // | |
| 119 // The original grammar from RFC 2046 is this, "multipart-body" being the root | |
| 120 // non-terminal: | |
| 121 // | |
| 122 // boundary := 0*69<bchars> bcharsnospace | |
| 123 // bchars := bcharsnospace / " " | |
| 124 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," | |
| 125 // / "-" / "." / "/" / ":" / "=" / "?" | |
| 126 // dash-boundary := "--" boundary | |
| 127 // multipart-body := [preamble CRLF] | |
| 128 // dash-boundary transport-padding CRLF | |
| 129 // body-part *encapsulation | |
| 130 // close-delimiter transport-padding | |
| 131 // [CRLF epilogue] | |
| 132 // transport-padding := *LWSP-char | |
| 133 // encapsulation := delimiter transport-padding CRLF body-part | |
| 134 // delimiter := CRLF dash-boundary | |
| 135 // close-delimiter := delimiter "--" | |
| 136 // preamble := discard-text | |
| 137 // epilogue := discard-text | |
| 138 // discard-text := *(*text CRLF) *text | |
| 139 // body-part := MIME-part-headers [CRLF *OCTET] | |
| 140 // OCTET := <any 0-255 octet value> | |
| 141 // | |
| 142 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, | |
| 143 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the | |
| 144 // English alphabet, respectively. | |
| 145 // The non-terminal "text" is presumably just any text, excluding line breaks. | |
| 146 // The non-terminal "LWSP-char" is not directly defined in the original grammar | |
| 147 // but it means "linear whitespace", which is a space or a horizontal tab. | |
| 148 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use | |
| 149 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: | |
| 150 // | |
| 151 // MIME-part-headers := field-name ":" unstructured CRLF | |
| 152 // field-name := 1*ftext | |
| 153 // ftext := %d33-57 / ; Printable US-ASCII | |
| 154 // %d59-126 ; characters not including ":". | |
| 155 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which | |
| 156 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and | |
| 157 // "CRLF<horizontal tab>", which serve for "folding". | |
| 158 // | |
| 159 // The FormDataParseMultipart class reads the input source and tries to parse it | |
| 160 // according to the grammar above, rooted at the "multipart-body" non-terminal. | |
| 161 // This happens in stages: | |
| 162 // | |
| 163 // 1. The optional preamble and the initial dash-boundary with transport padding | |
| 164 // and a CRLF are read and ignored. | |
| 165 // | |
| 166 // 2. Repeatedly each body part is read. The body parts can either serve to | |
| 167 // upload a file, or just a string of bytes. | |
| 168 // 2.a. The headers of that part are searched for the "content-disposition" | |
| 169 // header, which contains the name of the value represented by that body | |
| 170 // part. If the body-part is for file upload, that header also contains a | |
| 171 // filename. | |
| 172 // 2.b. The "*OCTET" part of the body part is then read and passed as the value | |
| 173 // of the name-value pair for body parts representing a string of bytes. | |
| 174 // For body parts for uploading a file the "*OCTET" part is just ignored | |
| 175 // and the filename is used for value instead. | |
| 176 // | |
| 177 // 3. The final close-delimiter and epilogue are read and ignored. | |
| 178 // | |
| 179 // IMPORTANT NOTE | |
| 180 // This parser supports multiple sources, i.e., SetSource can be called multiple | |
| 181 // times if the input is spread over several byte blocks. However, the split | |
| 182 // may only occur inside a body part, right after the trailing CRLF of headers. | |
| 183 class FormDataParserMultipart : public FormDataParser { | |
| 184 public: | |
| 185 explicit FormDataParserMultipart(const std::string& boundary_separator); | |
| 186 virtual ~FormDataParserMultipart(); | |
| 187 | |
| 188 // Implementation of FormDataParser. | |
| 189 virtual bool AllDataReadOK() OVERRIDE; | |
| 190 virtual bool GetNextNameValue(Result* result) OVERRIDE; | |
| 191 virtual bool SetSource(const base::StringPiece& source) OVERRIDE; | |
| 192 | |
| 193 private: | |
| 194 enum State { | |
| 195 STATE_INIT, // No input read yet. | |
| 196 STATE_READY, // Ready to call GetNextNameValue. | |
| 197 STATE_FINISHED, // Read the input until the end. | |
| 198 STATE_SUSPEND, // Waiting until a new |source_| is set. | |
| 199 STATE_ERROR | |
| 200 }; | |
| 201 | |
| 202 // Produces a regexp to match the string "--" + |literal|. The idea is to | |
| 203 // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed | |
| 204 // in "\\Q" and "\\E". The only catch is to watch out ofr occurences of "\\E" | |
| 205 // inside |literal|. Those must be excluded from the quote and the backslash | |
| 206 // doubly escaped. For example, for literal == "abc\\Edef" the result is | |
| 207 // "\\Q--abc\\E\\\\E\\Qdef\\E". | |
| 208 static std::string CreateBoundaryPatternFromLiteral( | |
| 209 const std::string& literal); | |
| 210 | |
| 211 // Tests whether |input| has a prefix matching |pattern|. | |
| 212 static bool StartsWithPattern(const re2::StringPiece& input, | |
| 213 const RE2& pattern); | |
| 214 | |
| 215 // If |source_| starts with a header, seeks |source_| beyond the header. If | |
| 216 // the header is Content-Disposition, extracts |name| from "name=" and | |
| 217 // possibly |value| from "filename=" fields of that header. Only if the | |
| 218 // "name" or "filename" fields are found, then |name| or |value| are touched. | |
| 219 // Returns true iff |source_| is seeked forward. Sets |value_assigned| | |
| 220 // to true iff |value| has been assigned to. | |
| 221 bool TryReadHeader(base::StringPiece* name, | |
| 222 base::StringPiece* value, | |
| 223 bool* value_assigned); | |
| 224 | |
| 225 // Helper to GetNextNameValue. Expects that the input starts with a data | |
| 226 // portion of a body part. An attempt is made to read the input until the end | |
| 227 // of that body part. If |data| is not NULL, it is set to contain the data | |
| 228 // portion. Returns true iff the reading was successful. | |
| 229 bool FinishReadingPart(base::StringPiece* data); | |
| 230 | |
| 231 // These methods could be even static, but then we would have to spend more | |
| 232 // code on initializing the cached pointer to g_patterns.Get() . | |
| 233 const RE2& transfer_padding_pattern() const { | |
| 234 return patterns_->transfer_padding_pattern; | |
| 235 } | |
| 236 const RE2& crlf_pattern() const { | |
| 237 return patterns_->crlf_pattern; | |
| 238 } | |
| 239 const RE2& closing_pattern() const { | |
| 240 return patterns_->closing_pattern; | |
| 241 } | |
| 242 const RE2& epilogue_pattern() const { | |
| 243 return patterns_->epilogue_pattern; | |
| 244 } | |
| 245 const RE2& crlf_free_pattern() const { | |
| 246 return patterns_->crlf_free_pattern; | |
| 247 } | |
| 248 const RE2& preamble_pattern() const { | |
| 249 return patterns_->preamble_pattern; | |
| 250 } | |
| 251 const RE2& header_pattern() const { | |
| 252 return patterns_->header_pattern; | |
| 253 } | |
| 254 const RE2& content_disposition_pattern() const { | |
| 255 return patterns_->content_disposition_pattern; | |
| 256 } | |
| 257 const RE2& name_pattern() const { | |
| 258 return patterns_->name_pattern; | |
| 259 } | |
| 260 const RE2& value_pattern() const { | |
| 261 return patterns_->value_pattern; | |
| 262 } | |
| 263 // However, this is used in a static method so it needs to be static. | |
| 264 static const RE2& unquote_pattern() { | |
| 265 return g_patterns.Get().unquote_pattern; // No caching g_patterns here. | |
| 266 } | |
| 267 | |
| 268 const RE2 dash_boundary_pattern_; | |
| 269 | |
| 270 // Because of initialisation dependency, |state_| needs to be declared after | |
| 271 // |dash_boundary_pattern_|. | |
| 272 State state_; | |
| 273 | |
| 274 // The parsed message can be split into multiple sources which we read | |
| 275 // sequentially. | |
| 276 re2::StringPiece source_; | |
| 277 | |
| 278 // Caching the pointer to g_patterns.Get() . | |
|
battre
2012/09/12 18:08:40
nit: - space before .
| |
| 279 const Patterns* patterns_; | |
| 280 | |
| 281 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart); | |
| 282 }; | |
| 283 | |
| 284 // Implementation of FormDataParser and FormDataParser::Result . | |
| 285 | |
| 286 FormDataParser::Result::Result() {} | |
| 287 FormDataParser::Result::~Result() {} | |
| 288 | |
| 289 void FormDataParser::Result::Reset() { | |
| 290 name_.erase(); | |
| 291 value_.erase(); | |
| 292 } | |
| 293 | |
| 294 FormDataParser::~FormDataParser() {} | |
| 295 | |
| 296 // static | |
| 297 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 298 const net::URLRequest* request) { | |
| 299 std::string value; | |
| 300 const bool found = request->extra_request_headers().GetHeader( | |
| 301 net::HttpRequestHeaders::kContentType, &value); | |
| 302 return Create(found ? &value : NULL); | |
| 303 } | |
| 304 | |
| 305 // static | |
| 306 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 307 const std::string* content_type_header) { | |
| 308 enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE}; | |
| 309 ParserChoice choice = ERROR_CHOICE; | |
| 310 std::string boundary; | |
| 311 | |
| 312 if (content_type_header == NULL) { | |
| 313 choice = URL_ENCODED; | |
| 314 } else { | |
| 315 const std::string content_type( | |
| 316 content_type_header->substr(0, content_type_header->find(';'))); | |
| 317 | |
| 318 if (base::strcasecmp( | |
| 319 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
| 320 choice = URL_ENCODED; | |
| 321 } else if (base::strcasecmp( | |
| 322 content_type.c_str(), "multipart/form-data") == 0) { | |
| 323 static const char kBoundaryString[] = "boundary="; | |
| 324 size_t offset = content_type_header->find(kBoundaryString); | |
| 325 if (offset == std::string::npos) { | |
| 326 // Malformed header. | |
| 327 return scoped_ptr<FormDataParser>(); | |
| 328 } | |
| 329 offset += sizeof(kBoundaryString) - 1; | |
| 330 boundary = content_type_header->substr( | |
| 331 offset, content_type_header->find(';', offset)); | |
| 332 if (!boundary.empty()) | |
| 333 choice = MULTIPART; | |
| 334 } | |
| 335 } | |
| 336 // Other cases are unparseable, including when |content_type| is "text/plain". | |
| 337 | |
| 338 switch (choice) { | |
| 339 case URL_ENCODED: | |
| 340 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
| 341 case MULTIPART: | |
| 342 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
| 343 default: // In other words, case ERROR_CHOICE: | |
| 344 return scoped_ptr<FormDataParser>(); | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 FormDataParser::FormDataParser() {} | |
| 349 | |
| 350 // Implementation of FormDataParserUrlEncoded. | |
| 351 | |
| 352 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ = | |
| 353 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
| 354 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
| 355 | |
| 356 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
| 357 : source_(NULL), | |
| 358 source_set_(false), | |
| 359 arg_name_(&name_), | |
| 360 arg_value_(&value_), | |
| 361 patterns_(&(g_patterns.Get())) { | |
| 362 args_[0] = &arg_name_; | |
| 363 args_[1] = &arg_value_; | |
| 364 } | |
| 365 | |
| 366 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
| 367 | |
| 368 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
| 369 // All OK means we read the whole source. | |
| 370 return source_set_ && source_.size() == 0; | |
| 371 } | |
| 372 | |
| 373 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
| 374 if (!source_set_) | |
| 375 return false; | |
| 376 | |
| 377 bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_); | |
| 378 if (success) { | |
| 379 result->set_name(net::UnescapeURLComponent(name_, unescape_rules_)); | |
| 380 result->set_value(net::UnescapeURLComponent(value_, unescape_rules_)); | |
| 381 } | |
| 382 return success; | |
| 383 } | |
| 384 | |
| 385 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) { | |
| 386 if (source_set_) | |
| 387 return false; // We do not allow multiple sources for this parser. | |
| 388 source_.set(source.data(), source.size()); | |
| 389 source_set_ = true; | |
| 390 return true; | |
| 391 } | |
| 392 | |
| 393 // Implementation of FormDataParserMultipart. | |
| 394 | |
| 395 // static | |
| 396 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral( | |
| 397 const std::string& literal) { | |
| 398 #define OPEN_QUOTE "\\Q" | |
| 399 static const char quote[] = OPEN_QUOTE; | |
| 400 static const char unquote[] = "\\E"; | |
| 401 | |
| 402 // The result always starts with opening the qoute and then "--". | |
| 403 std::string result(OPEN_QUOTE "--"); | |
| 404 #undef OPEN_QUOTE | |
| 405 | |
| 406 // This StringPiece is used below to record the next occurrence of "\\E" in | |
| 407 // |literal|. | |
| 408 re2::StringPiece seek_unquote(literal); | |
| 409 const char* copy_start = literal.data(); | |
| 410 size_t copy_length = literal.size(); | |
| 411 | |
| 412 // Find all "\\E" in |literal| and exclude them from the \Q...\E quote. | |
| 413 while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) { | |
| 414 copy_length = seek_unquote.data() - copy_start; | |
| 415 result.append(copy_start, copy_length); | |
| 416 result.append(g_escape_closing_quote); | |
| 417 result.append(quote); | |
| 418 copy_start = seek_unquote.data(); | |
| 419 } | |
| 420 | |
| 421 // Finish the last \Q...\E quote. | |
| 422 copy_length = (literal.data() + literal.size()) - copy_start; | |
| 423 result.append(copy_start, copy_length); | |
| 424 result.append(unquote); | |
| 425 return result; | |
| 426 } | |
| 427 | |
| 428 // static | |
| 429 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input, | |
| 430 const RE2& pattern) { | |
| 431 return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0); | |
| 432 } | |
| 433 | |
| 434 FormDataParserMultipart::FormDataParserMultipart( | |
| 435 const std::string& boundary_separator) | |
| 436 : dash_boundary_pattern_( | |
| 437 CreateBoundaryPatternFromLiteral(boundary_separator)), | |
| 438 state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR), | |
| 439 patterns_(&(g_patterns.Get())) {} | |
| 440 | |
| 441 FormDataParserMultipart::~FormDataParserMultipart() {} | |
| 442 | |
| 443 bool FormDataParserMultipart::AllDataReadOK() { | |
| 444 return state_ == STATE_FINISHED; | |
| 445 } | |
| 446 | |
| 447 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) { | |
| 448 const char* data_start = source_.data(); | |
| 449 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
| 450 if (!RE2::Consume(&source_, crlf_free_pattern()) || | |
| 451 !RE2::Consume(&source_, crlf_pattern())) { | |
| 452 state_ = STATE_ERROR; | |
| 453 return false; | |
| 454 } | |
| 455 } | |
| 456 if (data != NULL) { | |
| 457 if (source_.data() == data_start) { | |
| 458 // No data in this body part. | |
| 459 state_ = STATE_ERROR; | |
| 460 return false; | |
| 461 } | |
| 462 // Subtract 2u for the trailing "\r\n". | |
| 463 data->set(data_start, source_.data() - data_start - 2u); | |
| 464 } | |
| 465 | |
| 466 // Finally, read the dash-boundary and either skip to the next body part, or | |
| 467 // finish reading the source. | |
| 468 CHECK(RE2::Consume(&source_, dash_boundary_pattern_)); | |
| 469 if (StartsWithPattern(source_, closing_pattern())) { | |
| 470 CHECK(RE2::Consume(&source_, closing_pattern())); | |
| 471 if (RE2::Consume(&source_, epilogue_pattern())) | |
| 472 state_ = STATE_FINISHED; | |
| 473 else | |
| 474 state_ = STATE_ERROR; | |
| 475 } else { // Next body part ahead. | |
| 476 if (!RE2::Consume(&source_, transfer_padding_pattern())) | |
| 477 state_ = STATE_ERROR; | |
| 478 } | |
| 479 return state_ != STATE_ERROR; | |
| 480 } | |
| 481 | |
| 482 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
| 483 if (source_.size() == 0 || state_ != STATE_READY) | |
| 484 return false; | |
| 485 | |
| 486 // 1. Read body-part headers. | |
| 487 base::StringPiece name; | |
| 488 base::StringPiece value; | |
| 489 bool value_assigned = false; | |
| 490 bool value_assigned_temp; | |
| 491 while (TryReadHeader(&name, &value, &value_assigned_temp)) | |
| 492 value_assigned |= value_assigned_temp; | |
| 493 if (name.size() == 0 || state_ == STATE_ERROR) { | |
| 494 state_ = STATE_ERROR; | |
| 495 return false; | |
| 496 } | |
| 497 | |
| 498 // 2. Read the trailing CRLF after headers. | |
| 499 if (!RE2::Consume(&source_, crlf_pattern())) { | |
| 500 state_ = STATE_ERROR; | |
| 501 return false; | |
| 502 } | |
| 503 | |
| 504 // 3. Read the data of this body part, i.e., everything until the first | |
| 505 // dash-boundary. | |
| 506 bool return_value; | |
| 507 if (value_assigned && source_.size() == 0) { // Wait for a new source? | |
| 508 return_value = true; | |
| 509 state_ = STATE_SUSPEND; | |
| 510 } else { | |
| 511 return_value = FinishReadingPart(value_assigned ? NULL : &value); | |
| 512 } | |
| 513 | |
| 514 std::string unescaped_name = net::UnescapeURLComponent( | |
| 515 name.as_string(), | |
| 516 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS); | |
| 517 result->set_name(unescaped_name); | |
| 518 result->set_value(value); | |
| 519 | |
| 520 return return_value; | |
| 521 } | |
| 522 | |
| 523 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) { | |
| 524 if (source.data() == NULL || source_.size() != 0) | |
| 525 return false; | |
| 526 source_.set(source.data(), source.size()); | |
| 527 | |
| 528 switch (state_) { | |
| 529 case STATE_INIT: | |
| 530 // Seek behind the preamble. | |
| 531 while (!StartsWithPattern(source_, dash_boundary_pattern_)) { | |
| 532 if (!RE2::Consume(&source_, preamble_pattern())) { | |
| 533 state_ = STATE_ERROR; | |
| 534 break; | |
| 535 } | |
| 536 } | |
| 537 // Read dash-boundary, transfer padding, and CRLF. | |
| 538 if (state_ != STATE_ERROR) { | |
| 539 if (!RE2::Consume(&source_, dash_boundary_pattern_) || | |
| 540 !RE2::Consume(&source_, transfer_padding_pattern())) | |
| 541 state_ = STATE_ERROR; | |
| 542 else | |
| 543 state_ = STATE_READY; | |
| 544 } | |
| 545 break; | |
| 546 case STATE_READY: // Nothing to do. | |
| 547 break; | |
| 548 case STATE_SUSPEND: | |
| 549 state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR; | |
| 550 break; | |
| 551 default: | |
| 552 state_ = STATE_ERROR; | |
| 553 } | |
| 554 return state_ != STATE_ERROR; | |
| 555 } | |
| 556 | |
| 557 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name, | |
| 558 base::StringPiece* value, | |
| 559 bool* value_assigned) { | |
| 560 *value_assigned = false; | |
| 561 const char* header_start = source_.data(); | |
| 562 if (!RE2::Consume(&source_, header_pattern())) | |
| 563 return false; | |
| 564 // (*) After this point we must return true, because we consumed one header. | |
| 565 | |
| 566 // Subtract 2u for the trailing "\r\n". | |
| 567 re2::StringPiece header(header_start, source_.data() - header_start - 2u); | |
| 568 | |
| 569 if (!StartsWithPattern(header, content_disposition_pattern())) | |
| 570 return true; // Skip headers that don't describe the content-disposition. | |
| 571 | |
| 572 re2::StringPiece groups[2u]; | |
| 573 | |
| 574 if (!name_pattern().Match(header, | |
| 575 g_content_disposition_length, header.size(), | |
| 576 RE2::UNANCHORED, groups, 2)) { | |
| 577 state_ = STATE_ERROR; | |
| 578 return true; // See (*) for why true. | |
| 579 } | |
| 580 name->set(groups[1].data(), groups[1].size()); | |
| 581 | |
| 582 if (value_pattern().Match(header, | |
| 583 g_content_disposition_length, header.size(), | |
| 584 RE2::UNANCHORED, groups, 2)) { | |
| 585 value->set(groups[1].data(), groups[1].size()); | |
| 586 *value_assigned = true; | |
| 587 } | |
| 588 return true; | |
| 589 } | |
| 590 | |
| 591 } // namespace extensions | |
| OLD | NEW |