Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" | |
| 6 | |
| 7 #include "base/string_util.h" | |
| 8 #include "base/values.h" | |
| 9 #include "net/base/escape.h" | |
| 10 #include "net/url_request/url_request.h" | |
| 11 | |
| 12 using base::DictionaryValue; | |
| 13 using base::ListValue; | |
| 14 using base::StringPiece; | |
| 15 | |
| 16 namespace extensions { | |
| 17 | |
| 18 // Implementation of FormDataParser and FormDataParser::Result . | |
| 19 | |
| 20 FormDataParser::Result::Result() {} | |
| 21 FormDataParser::Result::~Result() {} | |
| 22 | |
| 23 void FormDataParser::Result::Reset() { | |
| 24 name_.erase(); | |
| 25 value_.erase(); | |
| 26 } | |
| 27 | |
| 28 FormDataParser::~FormDataParser() {} | |
| 29 | |
| 30 // static | |
| 31 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 32 const net::URLRequest* request) { | |
| 33 std::string value; | |
| 34 const bool found = request->extra_request_headers().GetHeader( | |
| 35 net::HttpRequestHeaders::kContentType, &value); | |
| 36 return Create(found ? &value : NULL); | |
| 37 } | |
| 38 | |
| 39 // static | |
| 40 scoped_ptr<FormDataParser> FormDataParser::Create( | |
| 41 const std::string* content_type_header) { | |
| 42 enum ParserChoice {kUrlEncoded, kMultipart, kError}; | |
| 43 ParserChoice choice = kError; | |
| 44 std::string boundary; | |
| 45 | |
| 46 if (content_type_header == NULL) { | |
| 47 choice = kUrlEncoded; | |
| 48 } else { | |
| 49 const std::string content_type( | |
| 50 content_type_header->substr(0, content_type_header->find(';'))); | |
| 51 | |
| 52 if (base::strcasecmp( | |
| 53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { | |
| 54 choice = kUrlEncoded; | |
| 55 } else if (base::strcasecmp( | |
| 56 content_type.c_str(), "multipart/form-data") == 0) { | |
| 57 static const char kBoundaryString[] = "boundary="; | |
| 58 size_t offset = content_type_header->find(kBoundaryString); | |
| 59 if (offset == std::string::npos) { | |
| 60 // Malformed header. | |
| 61 return scoped_ptr<FormDataParser>(); | |
| 62 } | |
| 63 offset += strlen(kBoundaryString); | |
| 64 boundary = content_type_header->substr( | |
| 65 offset, content_type_header->find(';', offset)); | |
| 66 if (!boundary.empty()) | |
| 67 choice = kMultipart; | |
| 68 } | |
| 69 } | |
| 70 // Other cases are unparseable, including when |content_type| is "text/plain". | |
| 71 | |
| 72 switch (choice) { | |
| 73 case kUrlEncoded: | |
| 74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); | |
| 75 case kMultipart: | |
| 76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); | |
| 77 default: // In other words, case kError: | |
| 78 return scoped_ptr<FormDataParser>(); | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 FormDataParser::FormDataParser() {} | |
| 83 | |
| 84 // Implementation of FormDataParserUrlEncoded. | |
| 85 | |
| 86 FormDataParserUrlEncoded::FormDataParserUrlEncoded() | |
| 87 : source_(NULL), | |
| 88 aborted_(false), | |
| 89 equality_signs_(0), | |
| 90 amp_signs_(0), | |
| 91 expect_equality_(true) { | |
| 92 } | |
| 93 | |
| 94 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} | |
| 95 | |
| 96 bool FormDataParserUrlEncoded::AllDataReadOK() { | |
| 97 return source_ != NULL && | |
| 98 !aborted_ && | |
| 99 offset_ == source_->end() && | |
| 100 equality_signs_ == amp_signs_ + 1; | |
| 101 } | |
| 102 | |
| 103 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { | |
| 104 result->Reset(); | |
| 105 if (source_ == NULL || aborted_) | |
| 106 return false; | |
| 107 if (offset_ == source_->end()) | |
| 108 return false; | |
| 109 const char* const name_start = &(*offset_); | |
| 110 char c; | |
| 111 bool last_read_success = GetNextChar(&c); | |
| 112 while (last_read_success && c != '=') | |
| 113 last_read_success = GetNextChar(&c); | |
| 114 if (!last_read_success) { // This means the data is malformed. | |
| 115 Abort(); | |
| 116 return false; | |
| 117 } | |
| 118 const char* const name_end = &(*(offset_ - 1)); | |
| 119 const std::string encoded_name(name_start, name_end - name_start); | |
| 120 const net::UnescapeRule::Type unescape_rules = | |
| 121 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | | |
| 122 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; | |
| 123 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); | |
| 124 | |
| 125 const char* const value_start = &(*offset_); | |
| 126 last_read_success = GetNextChar(&c); | |
| 127 while (last_read_success && c != '&') | |
| 128 last_read_success = GetNextChar(&c); | |
| 129 const char* const value_end = | |
| 130 last_read_success ? &(*(offset_ - 1)) : &(*offset_); | |
| 131 const std::string encoded_value(value_start, value_end - value_start); | |
| 132 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); | |
| 133 return true; | |
| 134 } | |
| 135 | |
| 136 bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) { | |
| 137 if (source_ != NULL || source == NULL || aborted_) | |
| 138 return false; | |
| 139 source_ = source; | |
| 140 offset_ = source_->begin(); | |
| 141 return true; | |
| 142 } | |
| 143 | |
| 144 bool FormDataParserUrlEncoded::GetNextChar(char* c) { | |
| 145 if (offset_ == source_->end() || aborted_) | |
| 146 return false; | |
| 147 *c = *offset_; | |
| 148 ++offset_; | |
| 149 | |
| 150 if (*c == '=') { | |
| 151 if (expect_equality_) { | |
| 152 ++equality_signs_; | |
| 153 expect_equality_ = false; | |
| 154 } else { | |
| 155 Abort(); | |
| 156 return false; | |
| 157 } | |
| 158 } | |
| 159 if (*c == '&' && offset_ != source_->end()) { | |
| 160 if (!expect_equality_) { | |
| 161 ++amp_signs_; | |
| 162 expect_equality_ = true; | |
| 163 } else { | |
| 164 Abort(); | |
| 165 return false; | |
| 166 } | |
| 167 } | |
| 168 | |
| 169 return true; | |
| 170 } | |
| 171 | |
| 172 void FormDataParserUrlEncoded::Abort() { | |
| 173 aborted_ = true; | |
| 174 } | |
| 175 | |
| 176 // Implementation of FormDataParserMultipart. | |
| 177 | |
| 178 FormDataParserMultipart::FormDataParserMultipart( | |
| 179 const std::string& boundary_separator) | |
| 180 : source_(NULL), | |
| 181 dash_boundary_("--" + boundary_separator), | |
| 182 state_(kStart), | |
| 183 value_name_present_(false) { | |
| 184 } | |
| 185 | |
| 186 FormDataParserMultipart::~FormDataParserMultipart() {} | |
| 187 | |
| 188 bool FormDataParserMultipart::AllDataReadOK() { | |
| 189 return source_ != NULL && InFinalState(); | |
| 190 } | |
| 191 | |
| 192 bool FormDataParserMultipart::GetNextNameValue(Result* result) { | |
| 193 if (!value_name_present_ || state_ == kError) | |
| 194 return false; | |
| 195 result->set_name(next_name_); | |
| 196 result->set_value(next_value_); | |
| 197 next_name_.clear(); | |
| 198 next_value_.clear(); | |
| 199 value_name_present_ = ReadNextNameValue(); | |
| 200 return true; | |
| 201 } | |
| 202 | |
| 203 bool FormDataParserMultipart::SetSource(const std::vector<char>* source) { | |
| 204 if (state_ == kError || | |
| 205 source == NULL || | |
| 206 // Message part across a source split is also an error. | |
| 207 next_name_.data() != NULL || next_value_.data() != NULL) | |
| 208 return false; | |
| 209 if (source_ != NULL && offset_ != source_->end()){ | |
| 210 // Try to seek until the end. If no name-value pair is found, this is OK. | |
| 211 value_name_present_ = ReadNextNameValue(); | |
| 212 if (!value_name_present_ || offset_ != source_->end()) | |
| 213 return false; | |
| 214 } | |
| 215 source_ = source; | |
| 216 offset_ = source_->begin(); | |
| 217 value_name_present_ = ReadNextNameValue(); | |
| 218 return true; | |
| 219 } | |
| 220 | |
| 221 // static | |
| 222 char FormDataParserMultipart::kTransitionToChar[] = { | |
| 223 '\n', // For kLF. | |
| 224 '\r', // For kCR. | |
| 225 0, // For kAscii. | |
| 226 0, // For kLwsp. | |
| 227 0, // For kDashBoundary. | |
| 228 ':', // For kColonT. | |
| 229 '-', // For kDash. | |
| 230 0, // For kAny. | |
| 231 }; | |
| 232 | |
| 233 // static | |
| 234 FormDataParserMultipart::Transition | |
| 235 FormDataParserMultipart::kAvailableTransitions[] = { | |
| 236 kDashBoundary, kCR, kAny, // For kStart. | |
| 237 kLF, kAny, // For kCR1. | |
| 238 kCR, kAny, // For kIgnorePreamble. | |
| 239 kLwsp, kCR, kAny, // For kDB1. | |
| 240 kLF, kAny, // For kCR2. | |
| 241 kAscii, kCR, kAny, // For kPart. | |
| 242 kAscii, kColonT, kAny, // For kName. | |
| 243 kLF, kCR, kAny, // For kColonS. | |
| 244 kCR, kAscii, kAny, // For kEnd1. | |
| 245 kLF, kCR, kAscii, kAny, // For kEnd2. | |
| 246 kLwsp, kCR, kAscii, kAny, // For kEnd3. | |
| 247 kLF, kAny, // For kCR3. | |
| 248 kDashBoundary, kCR, kAny, // For kPreData. | |
| 249 kLF, kAny, // For kCR4. | |
| 250 kCR, kAny, // For kData. | |
| 251 kDashBoundary, kAny, // For kData2. | |
| 252 kLwsp, kCR, kDash, kAny, // For kDB2. | |
| 253 kDash, kAny, // For kD. | |
| 254 kLwsp, kCR, kAny, // For kEnd. | |
| 255 kLF, kAny, // For kCR5. | |
| 256 kAny, // For kIgnoreEpilogue. | |
| 257 kAny // For kError. | |
| 258 }; | |
| 259 | |
| 260 // static | |
| 261 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { | |
| 262 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. | |
| 263 kStart, kIgnorePreamble, // For kCR1; 05. | |
| 264 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. | |
| 265 kDB1, kCR2, kError, // For kDB1; 10. | |
| 266 kPart, kError, // For kCR2; 12. | |
| 267 kName, kCR3, kError, // For kPart; 15. | |
| 268 kName, kColonS, kError, // For kName; 18. | |
| 269 kEnd1, kEnd2, kColonS, // For kColonS; 21. | |
| 270 kCR3, kName, kError, // For kEnd1; 24. | |
| 271 kEnd3, kCR3, kName, kError, // For kEnd2; 28. | |
| 272 kColonS, kCR3, kName, kError, // For kEnd3; 32. | |
| 273 kPreData, kError, // For kCR3; 34. | |
| 274 kDB2, kCR3, kData, // For kPreData; 37. | |
| 275 kData2, kData, // For kCR4; 39. | |
| 276 kCR4, kData, // For kData; 41. | |
| 277 kDB2, kCR4, // For kData2; 43. | |
| 278 kDB1, kCR2, kD, kError, // For kDB2; 47. | |
| 279 kEnd, kError, // For kD; 49. | |
| 280 kEnd, kCR5, kError, // For kEnd; 52. | |
| 281 kIgnoreEpilogue, kError, // For kCR5; 54. | |
| 282 kIgnoreEpilogue, // For kIgnoreEpilogue; 55. | |
| 283 kError // For kError; 56. | |
| 284 }; | |
| 285 | |
| 286 // static | |
| 287 size_t FormDataParserMultipart::kStateToTransition[] = { | |
| 288 0u, // For kStart | |
| 289 3u, // For kCR1 | |
| 290 5u, // For kIgnorePreamble | |
| 291 7u, // For kDB1 | |
| 292 10u, // For kCR2 | |
| 293 12u, // For kPart | |
| 294 15u, // For kName | |
| 295 18u, // For kColonS | |
| 296 21u, // For kEnd1 | |
| 297 24u, // For kEnd2 | |
| 298 28u, // For kEnd3 | |
| 299 32u, // For kCR3 | |
| 300 34u, // For kPreData | |
| 301 37u, // For kCR4 | |
| 302 39u, // For kData | |
| 303 41u, // For kData2 | |
| 304 43u, // For kDB2 | |
| 305 47u, // For kD | |
| 306 49u, // For kEnd | |
| 307 52u, // For kCR5 | |
| 308 54u, // For kIgnoreEpilogue | |
| 309 55u, // For kError | |
| 310 }; | |
| 311 | |
| 312 bool FormDataParserMultipart::ReadNextNameValue() { | |
| 313 if (state_ == kError || source_ == NULL || | |
| 314 next_name_.data() != NULL || next_value_.data() != NULL) | |
| 315 return false; | |
| 316 | |
| 317 // Seek to the next part's headers. | |
| 318 while (state_ != kPart) | |
|
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 319 if (!DoStep()) | |
| 320 return false; | |
| 321 while (state_ != kPreData) { | |
| 322 const char* header = &(*offset_); | |
| 323 while (state_ != kColonS) | |
|
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 324 if (!DoStep()) | |
| 325 return false; | |
| 326 size_t header_length = 0u; | |
| 327 while (state_ != kPreData && state_ != kName) { | |
| 328 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) | |
|
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 329 // The cast is safe, we know that offset only moves forward. | |
| 330 header_length = static_cast<size_t>(&(*offset_) - header); | |
| 331 if (!DoStep()) | |
| 332 return false; | |
| 333 } | |
| 334 if (ParseHeader(base::StringPiece(header, header_length))) { | |
| 335 // Found what we were looking for, just skip to the part's body. | |
| 336 while (state_ != kPreData) | |
|
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 337 if (!DoStep()) | |
| 338 return false; | |
| 339 } | |
| 340 } | |
| 341 | |
| 342 const char* body = &(*offset_); | |
| 343 size_t body_length = 0; | |
| 344 while (state_ != kDB2 && offset_ != source_->end()) { | |
| 345 if (!DoStep()) | |
| 346 return false; | |
| 347 if (state_ == kCR4) | |
|
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 348 // We are in the middle of which might be the CRLF starting the part | |
| 349 // separator (see the "delimiter" non-terminal from the grammar given | |
| 350 // in the header file). The cast is safe, we know that offset only moves | |
| 351 // forward and body was assigned at least 1 transition ago. | |
| 352 body_length = static_cast<size_t>(&(*offset_) - body - 1); | |
| 353 } | |
| 354 if (body_length > 0) | |
| 355 next_value_.set(body, body_length); | |
| 356 return true; | |
| 357 } | |
| 358 | |
| 359 bool FormDataParserMultipart::DoStep() { | |
| 360 if (state_ == kError || source_ == NULL || offset_ == source_->end()) | |
| 361 return false; | |
| 362 size_t transition_index = kStateToTransition[state_]; | |
| 363 Transition t = kAvailableTransitions[transition_index]; | |
| 364 while (t != kAny) { | |
| 365 const State s = kNextState[transition_index]; | |
| 366 const size_t length = LookUp(t); | |
| 367 if (length > 0) { | |
| 368 offset_ += length; | |
| 369 state_ = s; | |
| 370 return true; | |
| 371 } | |
| 372 t = kAvailableTransitions[++transition_index]; | |
| 373 } | |
| 374 // We have kAny, the default choice. Seek by one and switch the state. | |
| 375 ++offset_; | |
| 376 state_ = kNextState[transition_index]; | |
| 377 return true; | |
| 378 } | |
| 379 | |
| 380 // Contract -- the following must be true: | |
| 381 // source_ != NULL && offset_ != source.end() | |
| 382 // The idea is to check this only once in the caller (DoStep()), and do not | |
| 383 // repeat it here every time, as this can be called many times from one call | |
| 384 // to DoStep(). | |
| 385 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { | |
| 386 const char ahead = *offset_; | |
| 387 const char first_char = kTransitionToChar[t]; | |
| 388 | |
| 389 // Easy case: labels corresponding to a single char. | |
| 390 if (first_char != 0) | |
| 391 return ahead == first_char ? 1u : 0u; | |
| 392 | |
| 393 // Harder cases. | |
| 394 switch (t) { | |
| 395 // Multiple alternatives, 1-char long: return immediately. | |
| 396 case kAscii: | |
| 397 return ahead >= 33 && ahead <= 126 && ahead != ':' ? 1u : 0u; | |
| 398 case kLwsp: | |
| 399 return ahead == ' ' || ahead == '\t' ? 1u : 0u; | |
|
battre
2012/08/16 19:18:03
nit: () around condition, also above
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 400 | |
| 401 // Longer than 1 char: prepare work for later. | |
| 402 case kDashBoundary: { | |
| 403 const size_t length = dash_boundary_.size(); | |
| 404 // The cast below is safe, we know that the difference is not negative. | |
| 405 if (static_cast<size_t>(source_->end() - offset_) < length || | |
| 406 memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0) | |
|
vabr (Chromium)
2012/08/16 08:00:59
Doing this each time is unnecessary expensive. It
| |
| 407 return 0u; | |
| 408 return length; | |
| 409 } | |
| 410 case kAny: | |
| 411 // We are not supposed to be asked for kAny, but this is the right answer: | |
| 412 return 1u; | |
| 413 default: // We never get here -- the rest has already been handled above. | |
| 414 NOTREACHED(); | |
| 415 return 0u; | |
| 416 } | |
| 417 } | |
| 418 | |
| 419 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { | |
| 420 static const char kContentDisposition[] = "Content-Disposition:"; | |
| 421 if (memcmp( | |
| 422 header.data(), kContentDisposition, strlen(kContentDisposition) != 0)) | |
|
battre
2012/08/16 19:18:03
I would move header.data(), kContentDisposition, t
vabr (Chromium)
2012/08/17 18:29:57
Done.
| |
| 423 return false; | |
| 424 static const char kNameEquals[] = " name=\""; | |
| 425 static const char kFilenameEquals[] = " filename=\""; | |
| 426 | |
| 427 // Mandatory part: find the name and set it as |next_name_|. | |
| 428 StringPiece::size_type field_offset = header.find(kNameEquals); | |
| 429 if (field_offset == StringPiece::npos) | |
| 430 return false; | |
| 431 field_offset += strlen(kNameEquals); | |
| 432 StringPiece::size_type field_end = header.find('"', field_offset); | |
| 433 if (field_end == StringPiece::npos) | |
| 434 return false; | |
| 435 next_name_.set(header.data() + field_offset, field_end - field_offset); | |
| 436 | |
| 437 // Optional part: find the filename and set it as |next_value_|. | |
| 438 field_offset = header.find(kFilenameEquals); | |
| 439 if (field_offset == StringPiece::npos) | |
| 440 return true; // This was only optional | |
| 441 field_offset += strlen(kFilenameEquals); | |
| 442 field_end = header.find('"', field_offset); | |
| 443 if (field_end == StringPiece::npos) | |
| 444 return false; // This is a malformed header. | |
| 445 next_value_.set(header.data() + field_offset, field_end - field_offset); | |
| 446 return true; | |
| 447 } | |
| 448 | |
| 449 } // namespace extensions | |
| OLD | NEW |