Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
| 6 | |
| 7 #include "base/string_util.h" | |
| 8 #include "base/values.h" | |
| 9 #include "net/base/escape.h" | |
| 10 #include "net/url_request/url_request.h" | |
| 11 | |
| 12 using base::DictionaryValue; | |
| 13 using base::ListValue; | |
| 14 using base::StringPiece; | |
| 15 | |
| 16 namespace extensions { | |
| 17 | |
| 18 // Implementation of FormDataParser and FormDataParser::Result . | |
| 19 | |
| 20 FormDataParser::Result::Result() {} | |
| 21 FormDataParser::Result::~Result() {} | |
| 22 | |
| 23 void FormDataParser::Result::Reset() { | |
| 24 name_.erase(); | |
| 25 value_.erase(); | |
| 26 } | |
| 27 | |
| 28 FormDataParser::~FormDataParser() {} | |
| 29 | |
| 30 // static | |
| 31 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 32 const net::URLRequest* request) { | |
| 33 std::string value; | |
| 34 const bool found = request->extra_request_headers().GetHeader( | |
| 35 net::HttpRequestHeaders::kContentType, &value); | |
| 36 return Create(found ? &value : NULL); | |
| 37 } | |
| 38 | |
| 39 // static | |
| 40 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 41 const std::string* content_type_header) { | |
| 42 enum ParserChoice {kUrlEncoded, kMultipart, kError}; | |
| 43 ParserChoice choice = kError; | |
| 44 std::string boundary; | |
| 45 | |
| 46 if (content_type_header == NULL) { | |
| 47 choice = kUrlEncoded; | |
| 48 } else { | |
| 49 const std::string content_type( | |
| 50 content_type_header->substr(0, content_type_header->find(';'))); | |
| 51 | |
| 52 if (base::strcasecmp( | |
| 53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
| 54 choice = kUrlEncoded; | |
| 55 } else if (base::strcasecmp( | |
| 56 content_type.c_str(), "multipart/form-data") == 0) { | |
| 57 static const char kBoundaryString[] = "boundary="; | |
| 58 size_t offset = content_type_header->find(kBoundaryString); | |
| 59 if (offset == std::string::npos) { | |
| 60 // Malformed header. | |
| 61 return scoped_ptr<FormDataParser>(); | |
| 62 } | |
| 63 offset += strlen(kBoundaryString); | |
| 64 boundary = content_type_header->substr( | |
| 65 offset, content_type_header->find(';', offset)); | |
| 66 if (!boundary.empty()) | |
| 67 choice = kMultipart; | |
| 68 } | |
| 69 } | |
| 70 // Other cases are unparseable, including when |content_type| is "text/plain". | |
|
tkent
2012/08/24 14:26:50
Why text/plain is not supported?
vabr (Chromium)
2012/08/24 16:16:59
This encoding is ambiguous.
Nice description from
| |
| 71 | |
| 72 switch (choice) { | |
| 73 case kUrlEncoded: | |
| 74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
| 75 case kMultipart: | |
| 76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
| 77 default: // In other words, case kError: | |
| 78 return scoped_ptr<FormDataParser>(); | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 FormDataParser::FormDataParser() {} | |
| 83 | |
| 84 // Implementation of FormDataParserUrlEncoded. | |
| 85 | |
| 86 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
| 87 : source_end_(NULL), | |
| 88 aborted_(false), | |
| 89 offset_(NULL), | |
| 90 equality_signs_(0), | |
| 91 amp_signs_(0), | |
| 92 expect_equality_(true) { | |
| 93 } | |
| 94 | |
| 95 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
| 96 | |
| 97 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
| 98 return source_.data() != NULL && | |
| 99 !aborted_ && | |
| 100 offset_ == source_end_ && | |
| 101 equality_signs_ == amp_signs_ + 1; | |
| 102 } | |
| 103 | |
| 104 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
| 105 result->Reset(); | |
| 106 if (source_.data() == NULL || aborted_) | |
| 107 return false; | |
| 108 if (offset_ == source_end_) | |
| 109 return false; | |
| 110 const char* const name_start = &(*offset_); | |
| 111 char c; | |
| 112 bool last_read_success = GetNextChar(&c); | |
| 113 while (last_read_success && c != '=') | |
| 114 last_read_success = GetNextChar(&c); | |
| 115 if (!last_read_success) { // This means the data is malformed. | |
| 116 Abort(); | |
| 117 return false; | |
| 118 } | |
| 119 const char* const name_end = offset_ - 1; | |
| 120 const std::string encoded_name(name_start, name_end - name_start); | |
| 121 const net::UnescapeRule::Type unescape_rules = | |
| 122 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
| 123 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
| 124 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); | |
| 125 | |
| 126 const char* const value_start = offset_; | |
| 127 last_read_success = GetNextChar(&c); | |
| 128 while (last_read_success && c != '&') | |
| 129 last_read_success = GetNextChar(&c); | |
| 130 const char* const value_end = | |
| 131 last_read_success ? offset_ - 1 : offset_; | |
| 132 const std::string encoded_value(value_start, value_end - value_start); | |
| 133 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); | |
| 134 return true; | |
| 135 } | |
| 136 | |
| 137 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) { | |
| 138 if (source_ != NULL || source.data() == NULL || aborted_) | |
| 139 return false; | |
| 140 source_ = source; | |
| 141 source_end_ = source_.data() + source_.size(); | |
| 142 offset_ = source_.data(); | |
| 143 return true; | |
| 144 } | |
| 145 | |
| 146 bool FormDataParserUrlEncoded::GetNextChar(char* c) { | |
| 147 if (offset_ == source_end_ || aborted_) | |
| 148 return false; | |
| 149 *c = *offset_; | |
| 150 ++offset_; | |
| 151 | |
| 152 if (*c == '=') { | |
| 153 if (expect_equality_) { | |
| 154 ++equality_signs_; | |
| 155 expect_equality_ = false; | |
| 156 } else { | |
| 157 Abort(); | |
| 158 return false; | |
| 159 } | |
| 160 } | |
| 161 if (*c == '&' && offset_ != source_end_) { | |
| 162 if (!expect_equality_) { | |
| 163 ++amp_signs_; | |
| 164 expect_equality_ = true; | |
| 165 } else { | |
| 166 Abort(); | |
| 167 return false; | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 return true; | |
| 172 } | |
| 173 | |
| 174 void FormDataParserUrlEncoded::Abort() { | |
| 175 aborted_ = true; | |
| 176 } | |
| 177 | |
| 178 // Implementation of FormDataParserMultipart. | |
| 179 | |
| 180 FormDataParserMultipart::FormDataParserMultipart( | |
| 181 const std::string& boundary_separator) | |
| 182 : source_end_(NULL), | |
| 183 offset_(NULL), | |
| 184 dash_boundary_("--" + boundary_separator), | |
| 185 state_(kStart), | |
| 186 value_name_present_(false) { | |
| 187 } | |
| 188 | |
| 189 FormDataParserMultipart::~FormDataParserMultipart() {} | |
| 190 | |
| 191 bool FormDataParserMultipart::AllDataReadOK() { | |
| 192 return source_.data() != NULL && InFinalState(); | |
| 193 } | |
| 194 | |
| 195 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
| 196 if (!value_name_present_ || state_ == kError) | |
| 197 return false; | |
| 198 result->set_name(next_name_); | |
| 199 result->set_value(next_value_); | |
| 200 next_name_.clear(); | |
| 201 next_value_.clear(); | |
| 202 value_name_present_ = ReadNextNameValue(); | |
| 203 return true; | |
| 204 } | |
| 205 | |
| 206 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) { | |
| 207 if (state_ == kError || | |
| 208 source.data() == NULL || | |
| 209 // Message part across a source split is also an error. | |
| 210 next_name_.data() != NULL || next_value_.data() != NULL) | |
| 211 return false; | |
| 212 if (source_.data() != NULL && offset_ != source_end_){ | |
| 213 // Try to seek until the end. If no name-value pair is found, this is OK. | |
| 214 value_name_present_ = ReadNextNameValue(); | |
| 215 if (!value_name_present_ || offset_ != source_end_) | |
| 216 return false; | |
| 217 } | |
| 218 source_ = source; | |
| 219 source_end_ = source_.data() + source_.size(); | |
| 220 offset_ = source_.data(); | |
| 221 value_name_present_ = ReadNextNameValue(); | |
| 222 return true; | |
| 223 } | |
| 224 | |
| 225 // static | |
| 226 char FormDataParserMultipart::kTransitionToChar[] = { | |
| 227 '\n', // For kLF. | |
| 228 '\r', // For kCR. | |
| 229 0, // For kAscii. | |
| 230 0, // For kLwsp. | |
| 231 0, // For kDashBoundary. | |
| 232 ':', // For kColonT. | |
| 233 '-', // For kDash. | |
| 234 0, // For kAny. | |
| 235 }; | |
| 236 | |
| 237 // static | |
| 238 FormDataParserMultipart::Transition | |
| 239 FormDataParserMultipart::kAvailableTransitions[] = { | |
| 240 kDashBoundary, kCR, kAny, // For kStart. | |
| 241 kLF, kAny, // For kCR1. | |
| 242 kCR, kAny, // For kIgnorePreamble. | |
| 243 kLwsp, kCR, kAny, // For kDB1. | |
| 244 kLF, kAny, // For kCR2. | |
| 245 kAscii, kCR, kAny, // For kPart. | |
| 246 kAscii, kColonT, kAny, // For kName. | |
| 247 kLF, kCR, kAny, // For kColonS. | |
| 248 kCR, kAscii, kAny, // For kEnd1. | |
| 249 kLF, kCR, kAscii, kAny, // For kEnd2. | |
| 250 kLwsp, kCR, kAscii, kAny, // For kEnd3. | |
| 251 kLF, kAny, // For kCR3. | |
| 252 kDashBoundary, kCR, kAny, // For kPreData. | |
| 253 kLF, kAny, // For kCR4. | |
| 254 kCR, kAny, // For kData. | |
| 255 kDashBoundary, kAny, // For kData2. | |
| 256 kLwsp, kCR, kDash, kAny, // For kDB2. | |
| 257 kDash, kAny, // For kD. | |
| 258 kLwsp, kCR, kAny, // For kEnd. | |
| 259 kLF, kAny, // For kCR5. | |
| 260 kAny, // For kIgnoreEpilogue. | |
| 261 kAny // For kError. | |
| 262 }; | |
| 263 | |
| 264 // static | |
| 265 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { | |
| 266 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. | |
| 267 kStart, kIgnorePreamble, // For kCR1; 05. | |
| 268 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. | |
| 269 kDB1, kCR2, kError, // For kDB1; 10. | |
| 270 kPart, kError, // For kCR2; 12. | |
| 271 kName, kCR3, kError, // For kPart; 15. | |
| 272 kName, kColonS, kError, // For kName; 18. | |
| 273 kEnd1, kEnd2, kColonS, // For kColonS; 21. | |
| 274 kCR3, kName, kError, // For kEnd1; 24. | |
| 275 kEnd3, kCR3, kName, kError, // For kEnd2; 28. | |
| 276 kColonS, kCR3, kName, kError, // For kEnd3; 32. | |
| 277 kPreData, kError, // For kCR3; 34. | |
| 278 kDB2, kCR3, kData, // For kPreData; 37. | |
| 279 kData2, kData, // For kCR4; 39. | |
| 280 kCR4, kData, // For kData; 41. | |
| 281 kDB2, kCR4, // For kData2; 43. | |
| 282 kDB1, kCR2, kD, kError, // For kDB2; 47. | |
| 283 kEnd, kError, // For kD; 49. | |
| 284 kEnd, kCR5, kError, // For kEnd; 52. | |
| 285 kIgnoreEpilogue, kError, // For kCR5; 54. | |
| 286 kIgnoreEpilogue, // For kIgnoreEpilogue; 55. | |
| 287 kError // For kError; 56. | |
| 288 }; | |
| 289 | |
| 290 // static | |
| 291 size_t FormDataParserMultipart::kStateToTransition[] = { | |
| 292 0u, // For kStart | |
| 293 3u, // For kCR1 | |
| 294 5u, // For kIgnorePreamble | |
| 295 7u, // For kDB1 | |
| 296 10u, // For kCR2 | |
| 297 12u, // For kPart | |
| 298 15u, // For kName | |
| 299 18u, // For kColonS | |
| 300 21u, // For kEnd1 | |
| 301 24u, // For kEnd2 | |
| 302 28u, // For kEnd3 | |
| 303 32u, // For kCR3 | |
| 304 34u, // For kPreData | |
| 305 37u, // For kCR4 | |
| 306 39u, // For kData | |
| 307 41u, // For kData2 | |
| 308 43u, // For kDB2 | |
| 309 47u, // For kD | |
| 310 49u, // For kEnd | |
| 311 52u, // For kCR5 | |
| 312 54u, // For kIgnoreEpilogue | |
| 313 55u, // For kError | |
| 314 }; | |
| 315 | |
| 316 bool FormDataParserMultipart::ReadNextNameValue() { | |
| 317 if (state_ == kError || source_.data() == NULL || | |
| 318 next_name_.data() != NULL || next_value_.data() != NULL) | |
| 319 return false; | |
| 320 | |
| 321 // Seek to the next part's headers. | |
| 322 while (state_ != kPart) { | |
| 323 if (!DoStep()) | |
| 324 return false; | |
| 325 } | |
| 326 while (state_ != kPreData) { | |
| 327 const char* header = offset_; | |
| 328 while (state_ != kColonS) { | |
| 329 if (!DoStep()) | |
| 330 return false; | |
| 331 } | |
| 332 size_t header_length = 0u; | |
| 333 while (state_ != kPreData && state_ != kName) { | |
| 334 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) { | |
| 335 // The cast is safe, we know that offset only moves forward. | |
| 336 header_length = static_cast<size_t>(offset_ - header); | |
| 337 } | |
| 338 if (!DoStep()) | |
| 339 return false; | |
| 340 } | |
| 341 if (ParseHeader(base::StringPiece(header, header_length))) { | |
| 342 // Found what we were looking for, just skip to the part's body. | |
| 343 while (state_ != kPreData) { | |
| 344 if (!DoStep()) | |
| 345 return false; | |
| 346 } | |
| 347 } | |
| 348 } | |
| 349 | |
| 350 const char* body = offset_; | |
| 351 size_t body_length = 0; | |
| 352 while (state_ != kDB2 && offset_ != source_end_) { | |
| 353 if (!DoStep()) | |
| 354 return false; | |
| 355 if (state_ == kCR4) { | |
| 356 // We are in the middle of which might be the CRLF starting the part | |
| 357 // separator (see the "delimiter" non-terminal from the grammar given | |
| 358 // in the header file). The cast is safe, we know that offset only moves | |
| 359 // forward and body was assigned at least 1 transition ago. | |
| 360 body_length = static_cast<size_t>(offset_ - body - 1); | |
| 361 } | |
| 362 } | |
| 363 if (body_length > 0) | |
| 364 next_value_.set(body, body_length); | |
| 365 return true; | |
| 366 } | |
| 367 | |
| 368 bool FormDataParserMultipart::DoStep() { | |
| 369 if (state_ == kError || offset_ == source_end_) | |
| 370 return false; | |
| 371 size_t transition_index = kStateToTransition[state_]; | |
| 372 Transition t = kAvailableTransitions[transition_index]; | |
| 373 while (t != kAny) { | |
| 374 const State s = kNextState[transition_index]; | |
| 375 const size_t length = LookUp(t); | |
| 376 if (length > 0) { | |
| 377 offset_ += length; | |
| 378 state_ = s; | |
| 379 return true; | |
| 380 } | |
| 381 t = kAvailableTransitions[++transition_index]; | |
| 382 } | |
| 383 // We have kAny, the default choice. Seek by one and switch the state. | |
| 384 ++offset_; | |
| 385 state_ = kNextState[transition_index]; | |
| 386 return true; | |
| 387 } | |
| 388 | |
| 389 // Contract -- the following must be true: offset_ != source_end_ . | |
| 390 // The idea is to check this only once in the caller (DoStep()), and do not | |
| 391 // repeat it here every time, as this can be called many times from one call | |
| 392 // to DoStep(). | |
| 393 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { | |
| 394 const char ahead = *offset_; | |
| 395 const char first_char = kTransitionToChar[t]; | |
| 396 | |
| 397 // Easy case: labels corresponding to a single char. | |
| 398 if (first_char != 0) | |
| 399 return ahead == first_char ? 1u : 0u; | |
| 400 | |
| 401 // Harder cases. | |
| 402 switch (t) { | |
| 403 // Multiple alternatives, 1-char long: return immediately. | |
| 404 case kAscii: | |
| 405 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u; | |
| 406 case kLwsp: | |
| 407 return (ahead == ' ' || ahead == '\t') ? 1u : 0u; | |
| 408 | |
| 409 // Longer than 1 char: prepare work for later. | |
| 410 case kDashBoundary: { | |
| 411 const size_t length = dash_boundary_.size(); | |
| 412 // The cast below is safe, we know that the difference is not negative. | |
| 413 if (static_cast<size_t>(source_end_ - offset_) < length || | |
| 414 memcmp(dash_boundary_.c_str(), offset_, length) != 0) | |
| 415 return 0u; | |
| 416 return length; | |
| 417 } | |
| 418 case kAny: | |
| 419 // We are not supposed to be asked for kAny, but this is the right answer: | |
| 420 return 1u; | |
| 421 default: // We never get here -- the rest has already been handled above. | |
| 422 NOTREACHED(); | |
| 423 return 0u; | |
| 424 } | |
| 425 } | |
| 426 | |
| 427 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { | |
| 428 static const char kContentDisposition[] = "Content-Disposition:"; | |
| 429 if (memcmp(header.data(), kContentDisposition, | |
| 430 strlen(kContentDisposition) != 0)) | |
| 431 return false; | |
| 432 static const char kNameEquals[] = " name=\""; | |
| 433 static const char kFilenameEquals[] = " filename=\""; | |
| 434 | |
| 435 // Mandatory part: find the name and set it as |next_name_|. | |
| 436 StringPiece::size_type field_offset = header.find(kNameEquals); | |
| 437 if (field_offset == StringPiece::npos) | |
| 438 return false; | |
| 439 field_offset += strlen(kNameEquals); | |
| 440 StringPiece::size_type field_end = header.find('"', field_offset); | |
| 441 if (field_end == StringPiece::npos) | |
| 442 return false; | |
| 443 next_name_.set(header.data() + field_offset, field_end - field_offset); | |
| 444 | |
| 445 // Optional part: find the filename and set it as |next_value_|. | |
| 446 field_offset = header.find(kFilenameEquals); | |
| 447 if (field_offset == StringPiece::npos) | |
| 448 return true; // This was only optional | |
| 449 field_offset += strlen(kFilenameEquals); | |
| 450 field_end = header.find('"', field_offset); | |
| 451 if (field_end == StringPiece::npos) | |
| 452 return false; // This is a malformed header. | |
| 453 next_value_.set(header.data() + field_offset, field_end - field_offset); | |
| 454 return true; | |
| 455 } | |
| 456 | |
| 457 } // namespace extensions | |
| OLD | NEW |