OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h" |
| 6 |
| 7 #include "base/string_util.h" |
| 8 #include "base/values.h" |
| 9 #include "net/base/escape.h" |
| 10 #include "net/url_request/url_request.h" |
| 11 |
| 12 using base::DictionaryValue; |
| 13 using base::ListValue; |
| 14 using base::StringPiece; |
| 15 |
| 16 namespace extensions { |
| 17 |
| 18 // Implementation of FormDataParser and FormDataParser::Result . |
| 19 |
| 20 FormDataParser::Result::Result() {} |
| 21 FormDataParser::Result::~Result() {} |
| 22 |
| 23 void FormDataParser::Result::Reset() { |
| 24 name_.erase(); |
| 25 value_.erase(); |
| 26 } |
| 27 |
| 28 FormDataParser::~FormDataParser() {} |
| 29 |
| 30 // static |
| 31 scoped_ptr<FormDataParser> FormDataParser::Create( |
| 32 const net::URLRequest* request) { |
| 33 std::string value; |
| 34 const bool found = request->extra_request_headers().GetHeader( |
| 35 net::HttpRequestHeaders::kContentType, &value); |
| 36 return Create(found ? &value : NULL); |
| 37 } |
| 38 |
| 39 // static |
| 40 scoped_ptr<FormDataParser> FormDataParser::Create( |
| 41 const std::string* content_type_header) { |
| 42 enum ParserChoice {kUrlEncoded, kMultipart, kError}; |
| 43 ParserChoice choice = kError; |
| 44 std::string boundary; |
| 45 |
| 46 if (content_type_header == NULL) { |
| 47 choice = kUrlEncoded; |
| 48 } else { |
| 49 const std::string content_type( |
| 50 content_type_header->substr(0, content_type_header->find(';'))); |
| 51 |
| 52 if (base::strcasecmp( |
| 53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) { |
| 54 choice = kUrlEncoded; |
| 55 } else if (base::strcasecmp( |
| 56 content_type.c_str(), "multipart/form-data") == 0) { |
| 57 static const char kBoundaryString[] = "boundary="; |
| 58 size_t offset = content_type_header->find(kBoundaryString); |
| 59 if (offset == std::string::npos) { |
| 60 // Malformed header. |
| 61 return scoped_ptr<FormDataParser>(); |
| 62 } |
| 63 offset += strlen(kBoundaryString); |
| 64 boundary = content_type_header->substr( |
| 65 offset, content_type_header->find(';', offset)); |
| 66 if (!boundary.empty()) |
| 67 choice = kMultipart; |
| 68 } |
| 69 } |
| 70 // Other cases are unparseable, including when |content_type| is "text/plain". |
| 71 |
| 72 switch (choice) { |
| 73 case kUrlEncoded: |
| 74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); |
| 75 case kMultipart: |
| 76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); |
| 77 default: // In other words, case kError: |
| 78 return scoped_ptr<FormDataParser>(); |
| 79 } |
| 80 } |
| 81 |
| 82 FormDataParser::FormDataParser() {} |
| 83 |
| 84 // Implementation of FormDataParserUrlEncoded. |
| 85 |
| 86 FormDataParserUrlEncoded::FormDataParserUrlEncoded() |
| 87 : source_(NULL), |
| 88 aborted_(false), |
| 89 equality_signs_(0), |
| 90 amp_signs_(0), |
| 91 expect_equality_(true) { |
| 92 } |
| 93 |
| 94 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} |
| 95 |
| 96 bool FormDataParserUrlEncoded::AllDataReadOK() { |
| 97 return source_ != NULL && |
| 98 !aborted_ && |
| 99 offset_ == source_->end() && |
| 100 equality_signs_ == amp_signs_ + 1; |
| 101 } |
| 102 |
| 103 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { |
| 104 result->Reset(); |
| 105 if (source_ == NULL || aborted_) |
| 106 return false; |
| 107 if (offset_ == source_->end()) |
| 108 return false; |
| 109 const char* const name_start = &(*offset_); |
| 110 char c; |
| 111 bool last_read_success = GetNextChar(&c); |
| 112 while (last_read_success && c != '=') |
| 113 last_read_success = GetNextChar(&c); |
| 114 if (!last_read_success) { // This means the data is malformed. |
| 115 Abort(); |
| 116 return false; |
| 117 } |
| 118 const char* const name_end = &(*(offset_ - 1)); |
| 119 const std::string encoded_name(name_start, name_end - name_start); |
| 120 const net::UnescapeRule::Type unescape_rules = |
| 121 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | |
| 122 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; |
| 123 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); |
| 124 |
| 125 const char* const value_start = &(*offset_); |
| 126 last_read_success = GetNextChar(&c); |
| 127 while (last_read_success && c != '&') |
| 128 last_read_success = GetNextChar(&c); |
| 129 const char* const value_end = |
| 130 last_read_success ? &(*(offset_ - 1)) : &(*offset_); |
| 131 const std::string encoded_value(value_start, value_end - value_start); |
| 132 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); |
| 133 return true; |
| 134 } |
| 135 |
| 136 bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) { |
| 137 if (source_ != NULL || source == NULL || aborted_) |
| 138 return false; |
| 139 source_ = source; |
| 140 offset_ = source_->begin(); |
| 141 return true; |
| 142 } |
| 143 |
| 144 bool FormDataParserUrlEncoded::GetNextChar(char* c) { |
| 145 if (offset_ == source_->end() || aborted_) |
| 146 return false; |
| 147 *c = *offset_; |
| 148 ++offset_; |
| 149 |
| 150 if (*c == '=') { |
| 151 if (expect_equality_) { |
| 152 ++equality_signs_; |
| 153 expect_equality_ = false; |
| 154 } else { |
| 155 Abort(); |
| 156 return false; |
| 157 } |
| 158 } |
| 159 if (*c == '&' && offset_ != source_->end()) { |
| 160 if (!expect_equality_) { |
| 161 ++amp_signs_; |
| 162 expect_equality_ = true; |
| 163 } else { |
| 164 Abort(); |
| 165 return false; |
| 166 } |
| 167 } |
| 168 |
| 169 return true; |
| 170 } |
| 171 |
| 172 void FormDataParserUrlEncoded::Abort() { |
| 173 aborted_ = true; |
| 174 } |
| 175 |
| 176 // Implementation of FormDataParserMultipart. |
| 177 |
| 178 FormDataParserMultipart::FormDataParserMultipart( |
| 179 const std::string& boundary_separator) |
| 180 : source_(NULL), |
| 181 dash_boundary_("--" + boundary_separator), |
| 182 state_(kStart), |
| 183 value_name_present_(false) { |
| 184 } |
| 185 |
| 186 FormDataParserMultipart::~FormDataParserMultipart() {} |
| 187 |
| 188 bool FormDataParserMultipart::AllDataReadOK() { |
| 189 return source_ != NULL && InFinalState(); |
| 190 } |
| 191 |
| 192 bool FormDataParserMultipart::GetNextNameValue(Result* result) { |
| 193 if (!value_name_present_ || state_ == kError) |
| 194 return false; |
| 195 result->set_name(next_name_); |
| 196 result->set_value(next_value_); |
| 197 next_name_.clear(); |
| 198 next_value_.clear(); |
| 199 value_name_present_ = ReadNextNameValue(); |
| 200 return true; |
| 201 } |
| 202 |
| 203 bool FormDataParserMultipart::SetSource(const std::vector<char>* source) { |
| 204 if (state_ == kError || |
| 205 source == NULL || |
| 206 // Message part across a source split is also an error. |
| 207 next_name_.data() != NULL || next_value_.data() != NULL) |
| 208 return false; |
| 209 if (source_ != NULL && offset_ != source_->end()){ |
| 210 // Try to seek until the end. If no name-value pair is found, this is OK. |
| 211 value_name_present_ = ReadNextNameValue(); |
| 212 if (!value_name_present_ || offset_ != source_->end()) |
| 213 return false; |
| 214 } |
| 215 source_ = source; |
| 216 offset_ = source_->begin(); |
| 217 value_name_present_ = ReadNextNameValue(); |
| 218 return true; |
| 219 } |
| 220 |
| 221 // static |
| 222 char FormDataParserMultipart::kTransitionToChar[] = { |
| 223 '\n', // For kLF. |
| 224 '\r', // For kCR. |
| 225 0, // For kAscii. |
| 226 0, // For kLwsp. |
| 227 0, // For kDashBoundary. |
| 228 ':', // For kColonT. |
| 229 '-', // For kDash. |
| 230 0, // For kAny. |
| 231 }; |
| 232 |
| 233 // static |
| 234 FormDataParserMultipart::Transition |
| 235 FormDataParserMultipart::kAvailableTransitions[] = { |
| 236 kDashBoundary, kCR, kAny, // For kStart. |
| 237 kLF, kAny, // For kCR1. |
| 238 kCR, kAny, // For kIgnorePreamble. |
| 239 kLwsp, kCR, kAny, // For kDB1. |
| 240 kLF, kAny, // For kCR2. |
| 241 kAscii, kCR, kAny, // For kPart. |
| 242 kAscii, kColonT, kAny, // For kName. |
| 243 kLF, kCR, kAny, // For kColonS. |
| 244 kCR, kAscii, kAny, // For kEnd1. |
| 245 kLF, kCR, kAscii, kAny, // For kEnd2. |
| 246 kLwsp, kCR, kAscii, kAny, // For kEnd3. |
| 247 kLF, kAny, // For kCR3. |
| 248 kDashBoundary, kCR, kAny, // For kPreData. |
| 249 kLF, kAny, // For kCR4. |
| 250 kCR, kAny, // For kData. |
| 251 kDashBoundary, kAny, // For kData2. |
| 252 kLwsp, kCR, kDash, kAny, // For kDB2. |
| 253 kDash, kAny, // For kD. |
| 254 kLwsp, kCR, kAny, // For kEnd. |
| 255 kLF, kAny, // For kCR5. |
| 256 kAny, // For kIgnoreEpilogue. |
| 257 kAny // For kError. |
| 258 }; |
| 259 |
| 260 // static |
| 261 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { |
| 262 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. |
| 263 kStart, kIgnorePreamble, // For kCR1; 05. |
| 264 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. |
| 265 kDB1, kCR2, kError, // For kDB1; 10. |
| 266 kPart, kError, // For kCR2; 12. |
| 267 kName, kCR3, kError, // For kPart; 15. |
| 268 kName, kColonS, kError, // For kName; 18. |
| 269 kEnd1, kEnd2, kColonS, // For kColonS; 21. |
| 270 kCR3, kName, kError, // For kEnd1; 24. |
| 271 kEnd3, kCR3, kName, kError, // For kEnd2; 28. |
| 272 kColonS, kCR3, kName, kError, // For kEnd3; 32. |
| 273 kPreData, kError, // For kCR3; 34. |
| 274 kDB2, kCR3, kData, // For kPreData; 37. |
| 275 kData2, kData, // For kCR4; 39. |
| 276 kCR4, kData, // For kData; 41. |
| 277 kDB2, kCR4, // For kData2; 43. |
| 278 kDB1, kCR2, kD, kError, // For kDB2; 47. |
| 279 kEnd, kError, // For kD; 49. |
| 280 kEnd, kCR5, kError, // For kEnd; 52. |
| 281 kIgnoreEpilogue, kError, // For kCR5; 54. |
| 282 kIgnoreEpilogue, // For kIgnoreEpilogue; 55. |
| 283 kError // For kError; 56. |
| 284 }; |
| 285 |
| 286 // static |
| 287 size_t FormDataParserMultipart::kStateToTransition[] = { |
| 288 0u, // For kStart |
| 289 3u, // For kCR1 |
| 290 5u, // For kIgnorePreamble |
| 291 7u, // For kDB1 |
| 292 10u, // For kCR2 |
| 293 12u, // For kPart |
| 294 15u, // For kName |
| 295 18u, // For kColonS |
| 296 21u, // For kEnd1 |
| 297 24u, // For kEnd2 |
| 298 28u, // For kEnd3 |
| 299 32u, // For kCR3 |
| 300 34u, // For kPreData |
| 301 37u, // For kCR4 |
| 302 39u, // For kData |
| 303 41u, // For kData2 |
| 304 43u, // For kDB2 |
| 305 47u, // For kD |
| 306 49u, // For kEnd |
| 307 52u, // For kCR5 |
| 308 54u, // For kIgnoreEpilogue |
| 309 55u, // For kError |
| 310 }; |
| 311 |
| 312 bool FormDataParserMultipart::ReadNextNameValue() { |
| 313 if (state_ == kError || source_ == NULL || |
| 314 next_name_.data() != NULL || next_value_.data() != NULL) |
| 315 return false; |
| 316 |
| 317 // Seek to the next part's headers. |
| 318 while (state_ != kPart) { |
| 319 if (!DoStep()) |
| 320 return false; |
| 321 } |
| 322 while (state_ != kPreData) { |
| 323 const char* header = &(*offset_); |
| 324 while (state_ != kColonS) { |
| 325 if (!DoStep()) |
| 326 return false; |
| 327 } |
| 328 size_t header_length = 0u; |
| 329 while (state_ != kPreData && state_ != kName) { |
| 330 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) { |
| 331 // The cast is safe, we know that offset only moves forward. |
| 332 header_length = static_cast<size_t>(&(*offset_) - header); |
| 333 } |
| 334 if (!DoStep()) |
| 335 return false; |
| 336 } |
| 337 if (ParseHeader(base::StringPiece(header, header_length))) { |
| 338 // Found what we were looking for, just skip to the part's body. |
| 339 while (state_ != kPreData) { |
| 340 if (!DoStep()) |
| 341 return false; |
| 342 } |
| 343 } |
| 344 } |
| 345 |
| 346 const char* body = &(*offset_); |
| 347 size_t body_length = 0; |
| 348 while (state_ != kDB2 && offset_ != source_->end()) { |
| 349 if (!DoStep()) |
| 350 return false; |
| 351 if (state_ == kCR4) { |
| 352 // We are in the middle of which might be the CRLF starting the part |
| 353 // separator (see the "delimiter" non-terminal from the grammar given |
| 354 // in the header file). The cast is safe, we know that offset only moves |
| 355 // forward and body was assigned at least 1 transition ago. |
| 356 body_length = static_cast<size_t>(&(*offset_) - body - 1); |
| 357 } |
| 358 } |
| 359 if (body_length > 0) |
| 360 next_value_.set(body, body_length); |
| 361 return true; |
| 362 } |
| 363 |
| 364 bool FormDataParserMultipart::DoStep() { |
| 365 if (state_ == kError || source_ == NULL || offset_ == source_->end()) |
| 366 return false; |
| 367 size_t transition_index = kStateToTransition[state_]; |
| 368 Transition t = kAvailableTransitions[transition_index]; |
| 369 while (t != kAny) { |
| 370 const State s = kNextState[transition_index]; |
| 371 const size_t length = LookUp(t); |
| 372 if (length > 0) { |
| 373 offset_ += length; |
| 374 state_ = s; |
| 375 return true; |
| 376 } |
| 377 t = kAvailableTransitions[++transition_index]; |
| 378 } |
| 379 // We have kAny, the default choice. Seek by one and switch the state. |
| 380 ++offset_; |
| 381 state_ = kNextState[transition_index]; |
| 382 return true; |
| 383 } |
| 384 |
| 385 // Contract -- the following must be true: |
| 386 // source_ != NULL && offset_ != source.end() |
| 387 // The idea is to check this only once in the caller (DoStep()), and do not |
| 388 // repeat it here every time, as this can be called many times from one call |
| 389 // to DoStep(). |
| 390 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { |
| 391 const char ahead = *offset_; |
| 392 const char first_char = kTransitionToChar[t]; |
| 393 |
| 394 // Easy case: labels corresponding to a single char. |
| 395 if (first_char != 0) |
| 396 return ahead == first_char ? 1u : 0u; |
| 397 |
| 398 // Harder cases. |
| 399 switch (t) { |
| 400 // Multiple alternatives, 1-char long: return immediately. |
| 401 case kAscii: |
| 402 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u; |
| 403 case kLwsp: |
| 404 return (ahead == ' ' || ahead == '\t') ? 1u : 0u; |
| 405 |
| 406 // Longer than 1 char: prepare work for later. |
| 407 case kDashBoundary: { |
| 408 const size_t length = dash_boundary_.size(); |
| 409 // The cast below is safe, we know that the difference is not negative. |
| 410 if (static_cast<size_t>(source_->end() - offset_) < length || |
| 411 memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0) |
| 412 return 0u; |
| 413 return length; |
| 414 } |
| 415 case kAny: |
| 416 // We are not supposed to be asked for kAny, but this is the right answer: |
| 417 return 1u; |
| 418 default: // We never get here -- the rest has already been handled above. |
| 419 NOTREACHED(); |
| 420 return 0u; |
| 421 } |
| 422 } |
| 423 |
| 424 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { |
| 425 static const char kContentDisposition[] = "Content-Disposition:"; |
| 426 if (memcmp(header.data(), kContentDisposition, |
| 427 strlen(kContentDisposition) != 0)) |
| 428 return false; |
| 429 static const char kNameEquals[] = " name=\""; |
| 430 static const char kFilenameEquals[] = " filename=\""; |
| 431 |
| 432 // Mandatory part: find the name and set it as |next_name_|. |
| 433 StringPiece::size_type field_offset = header.find(kNameEquals); |
| 434 if (field_offset == StringPiece::npos) |
| 435 return false; |
| 436 field_offset += strlen(kNameEquals); |
| 437 StringPiece::size_type field_end = header.find('"', field_offset); |
| 438 if (field_end == StringPiece::npos) |
| 439 return false; |
| 440 next_name_.set(header.data() + field_offset, field_end - field_offset); |
| 441 |
| 442 // Optional part: find the filename and set it as |next_value_|. |
| 443 field_offset = header.find(kFilenameEquals); |
| 444 if (field_offset == StringPiece::npos) |
| 445 return true; // This was only optional |
| 446 field_offset += strlen(kFilenameEquals); |
| 447 field_end = header.find('"', field_offset); |
| 448 if (field_end == StringPiece::npos) |
| 449 return false; // This is a malformed header. |
| 450 next_value_.set(header.data() + field_offset, field_end - field_offset); |
| 451 return true; |
| 452 } |
| 453 |
| 454 } // namespace extensions |
OLD | NEW |