Index: chrome/browser/extensions/api/web_request/form_data_parser.cc |
diff --git a/chrome/browser/extensions/api/web_request/form_data_parser.cc b/chrome/browser/extensions/api/web_request/form_data_parser.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..66118e3e38c1a469d56947958608038355f3a7f0 |
--- /dev/null |
+++ b/chrome/browser/extensions/api/web_request/form_data_parser.cc |
@@ -0,0 +1,449 @@ |
+// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/browser/extensions/api/web_request/form_data_parser.h" |
+ |
+#include "base/string_util.h" |
+#include "base/values.h" |
+#include "net/base/escape.h" |
+#include "net/url_request/url_request.h" |
+ |
+using base::DictionaryValue; |
+using base::ListValue; |
+using base::StringPiece; |
+ |
+namespace extensions { |
+ |
+// Implementation of FormDataParser and FormDataParser::Result . |
+ |
+FormDataParser::Result::Result() {} |
+FormDataParser::Result::~Result() {} |
+ |
+void FormDataParser::Result::Reset() { |
+ name_.erase(); |
+ value_.erase(); |
+} |
+ |
+FormDataParser::~FormDataParser() {} |
+ |
+// static |
+scoped_ptr<FormDataParser> FormDataParser::Create( |
+ const net::URLRequest* request) { |
+ std::string value; |
+ const bool found = request->extra_request_headers().GetHeader( |
+ net::HttpRequestHeaders::kContentType, &value); |
+ return Create(found ? &value : NULL); |
+} |
+ |
+// static |
+scoped_ptr<FormDataParser> FormDataParser::Create( |
+ const std::string* content_type_header) { |
+ enum ParserChoice {kUrlEncoded, kMultipart, kError}; |
+ ParserChoice choice = kError; |
+ std::string boundary; |
+ |
+ if (content_type_header == NULL) { |
+ choice = kUrlEncoded; |
+ } else { |
+ const std::string content_type( |
+ content_type_header->substr(0, content_type_header->find(';'))); |
+ |
+ if (base::strcasecmp( |
+ content_type.c_str(), "application/x-www-form-urlencoded") == 0) { |
+ choice = kUrlEncoded; |
+ } else if (base::strcasecmp( |
+ content_type.c_str(), "multipart/form-data") == 0) { |
+ static const char kBoundaryString[] = "boundary="; |
+ size_t offset = content_type_header->find(kBoundaryString); |
+ if (offset == std::string::npos) { |
+ // Malformed header. |
+ return scoped_ptr<FormDataParser>(); |
+ } |
+ offset += strlen(kBoundaryString); |
+ boundary = content_type_header->substr( |
+ offset, content_type_header->find(';', offset)); |
+ if (!boundary.empty()) |
+ choice = kMultipart; |
+ } |
+ } |
+ // Other cases are unparseable, including when |content_type| is "text/plain". |
+ |
+ switch (choice) { |
+ case kUrlEncoded: |
+ return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded()); |
+ case kMultipart: |
+ return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary)); |
+ default: // In other words, case kError: |
+ return scoped_ptr<FormDataParser>(); |
+ } |
+} |
+ |
+FormDataParser::FormDataParser() {} |
+ |
+// Implementation of FormDataParserUrlEncoded. |
+ |
+FormDataParserUrlEncoded::FormDataParserUrlEncoded() |
+ : source_(NULL), |
+ aborted_(false), |
+ equality_signs_(0), |
+ amp_signs_(0), |
+ expect_equality_(true) { |
+} |
+ |
+FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {} |
+ |
+bool FormDataParserUrlEncoded::AllDataReadOK() { |
+ return source_ != NULL && |
+ !aborted_ && |
+ offset_ == source_->end() && |
+ equality_signs_ == amp_signs_ + 1; |
+} |
+ |
+bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { |
+ result->Reset(); |
+ if (source_ == NULL || aborted_) |
+ return false; |
+ if (offset_ == source_->end()) |
+ return false; |
+ const char* const name_start = &(*offset_); |
+ char c; |
+ bool last_read_success = GetNextChar(&c); |
+ while (last_read_success && c != '=') |
+ last_read_success = GetNextChar(&c); |
+ if (!last_read_success) { // This means the data is malformed. |
+ Abort(); |
+ return false; |
+ } |
+ const char* const name_end = &(*(offset_ - 1)); |
+ const std::string encoded_name(name_start, name_end - name_start); |
+ const net::UnescapeRule::Type unescape_rules = |
+ net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS | |
+ net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; |
+ result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules)); |
+ |
+ const char* const value_start = &(*offset_); |
+ last_read_success = GetNextChar(&c); |
+ while (last_read_success && c != '&') |
+ last_read_success = GetNextChar(&c); |
+ const char* const value_end = |
+ last_read_success ? &(*(offset_ - 1)) : &(*offset_); |
+ const std::string encoded_value(value_start, value_end - value_start); |
+ result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules)); |
+ return true; |
+} |
+ |
+bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) { |
+ if (source_ != NULL || source == NULL || aborted_) |
+ return false; |
+ source_ = source; |
+ offset_ = source_->begin(); |
+ return true; |
+} |
+ |
+bool FormDataParserUrlEncoded::GetNextChar(char* c) { |
+ if (offset_ == source_->end() || aborted_) |
+ return false; |
+ *c = *offset_; |
+ ++offset_; |
+ |
+ if (*c == '=') { |
+ if (expect_equality_) { |
+ ++equality_signs_; |
+ expect_equality_ = false; |
+ } else { |
+ Abort(); |
+ return false; |
+ } |
+ } |
+ if (*c == '&' && offset_ != source_->end()) { |
+ if (!expect_equality_) { |
+ ++amp_signs_; |
+ expect_equality_ = true; |
+ } else { |
+ Abort(); |
+ return false; |
+ } |
+ } |
+ |
+ return true; |
+} |
+ |
+void FormDataParserUrlEncoded::Abort() { |
+ aborted_ = true; |
+} |
+ |
+// Implementation of FormDataParserMultipart. |
+ |
+FormDataParserMultipart::FormDataParserMultipart( |
+ const std::string& boundary_separator) |
+ : source_(NULL), |
+ dash_boundary_("--" + boundary_separator), |
+ state_(kStart), |
+ value_name_present_(false) { |
+} |
+ |
+FormDataParserMultipart::~FormDataParserMultipart() {} |
+ |
+bool FormDataParserMultipart::AllDataReadOK() { |
+ return source_ != NULL && InFinalState(); |
+} |
+ |
+bool FormDataParserMultipart::GetNextNameValue(Result* result) { |
+ if (!value_name_present_ || state_ == kError) |
+ return false; |
+ result->set_name(next_name_); |
+ result->set_value(next_value_); |
+ next_name_.clear(); |
+ next_value_.clear(); |
+ value_name_present_ = ReadNextNameValue(); |
+ return true; |
+} |
+ |
+bool FormDataParserMultipart::SetSource(const std::vector<char>* source) { |
+ if (state_ == kError || |
+ source == NULL || |
+ // Message part across a source split is also an error. |
+ next_name_.data() != NULL || next_value_.data() != NULL) |
+ return false; |
+ if (source_ != NULL && offset_ != source_->end()){ |
+ // Try to seek until the end. If no name-value pair is found, this is OK. |
+ value_name_present_ = ReadNextNameValue(); |
+ if (!value_name_present_ || offset_ != source_->end()) |
+ return false; |
+ } |
+ source_ = source; |
+ offset_ = source_->begin(); |
+ value_name_present_ = ReadNextNameValue(); |
+ return true; |
+} |
+ |
+// static |
+char FormDataParserMultipart::kTransitionToChar[] = { |
+ '\n', // For kLF. |
+ '\r', // For kCR. |
+ 0, // For kAscii. |
+ 0, // For kLwsp. |
+ 0, // For kDashBoundary. |
+ ':', // For kColonT. |
+ '-', // For kDash. |
+ 0, // For kAny. |
+}; |
+ |
+// static |
+FormDataParserMultipart::Transition |
+ FormDataParserMultipart::kAvailableTransitions[] = { |
+ kDashBoundary, kCR, kAny, // For kStart. |
+ kLF, kAny, // For kCR1. |
+ kCR, kAny, // For kIgnorePreamble. |
+ kLwsp, kCR, kAny, // For kDB1. |
+ kLF, kAny, // For kCR2. |
+ kAscii, kCR, kAny, // For kPart. |
+ kAscii, kColonT, kAny, // For kName. |
+ kLF, kCR, kAny, // For kColonS. |
+ kCR, kAscii, kAny, // For kEnd1. |
+ kLF, kCR, kAscii, kAny, // For kEnd2. |
+ kLwsp, kCR, kAscii, kAny, // For kEnd3. |
+ kLF, kAny, // For kCR3. |
+ kDashBoundary, kCR, kAny, // For kPreData. |
+ kLF, kAny, // For kCR4. |
+ kCR, kAny, // For kData. |
+ kDashBoundary, kAny, // For kData2. |
+ kLwsp, kCR, kDash, kAny, // For kDB2. |
+ kDash, kAny, // For kD. |
+ kLwsp, kCR, kAny, // For kEnd. |
+ kLF, kAny, // For kCR5. |
+ kAny, // For kIgnoreEpilogue. |
+ kAny // For kError. |
+}; |
+ |
+// static |
+FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = { |
+ kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03. |
+ kStart, kIgnorePreamble, // For kCR1; 05. |
+ kCR1, kIgnorePreamble, // For kIgnorePreamble; 07. |
+ kDB1, kCR2, kError, // For kDB1; 10. |
+ kPart, kError, // For kCR2; 12. |
+ kName, kCR3, kError, // For kPart; 15. |
+ kName, kColonS, kError, // For kName; 18. |
+ kEnd1, kEnd2, kColonS, // For kColonS; 21. |
+ kCR3, kName, kError, // For kEnd1; 24. |
+ kEnd3, kCR3, kName, kError, // For kEnd2; 28. |
+ kColonS, kCR3, kName, kError, // For kEnd3; 32. |
+ kPreData, kError, // For kCR3; 34. |
+ kDB2, kCR3, kData, // For kPreData; 37. |
+ kData2, kData, // For kCR4; 39. |
+ kCR4, kData, // For kData; 41. |
+ kDB2, kCR4, // For kData2; 43. |
+ kDB1, kCR2, kD, kError, // For kDB2; 47. |
+ kEnd, kError, // For kD; 49. |
+ kEnd, kCR5, kError, // For kEnd; 52. |
+ kIgnoreEpilogue, kError, // For kCR5; 54. |
+ kIgnoreEpilogue, // For kIgnoreEpilogue; 55. |
+ kError // For kError; 56. |
+}; |
+ |
+// static |
+size_t FormDataParserMultipart::kStateToTransition[] = { |
+ 0u, // For kStart |
+ 3u, // For kCR1 |
+ 5u, // For kIgnorePreamble |
+ 7u, // For kDB1 |
+ 10u, // For kCR2 |
+ 12u, // For kPart |
+ 15u, // For kName |
+ 18u, // For kColonS |
+ 21u, // For kEnd1 |
+ 24u, // For kEnd2 |
+ 28u, // For kEnd3 |
+ 32u, // For kCR3 |
+ 34u, // For kPreData |
+ 37u, // For kCR4 |
+ 39u, // For kData |
+ 41u, // For kData2 |
+ 43u, // For kDB2 |
+ 47u, // For kD |
+ 49u, // For kEnd |
+ 52u, // For kCR5 |
+ 54u, // For kIgnoreEpilogue |
+ 55u, // For kError |
+}; |
+ |
+bool FormDataParserMultipart::ReadNextNameValue() { |
+ if (state_ == kError || source_ == NULL || |
+ next_name_.data() != NULL || next_value_.data() != NULL) |
+ return false; |
+ |
+ // Seek to the next part's headers. |
+ while (state_ != kPart) |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ if (!DoStep()) |
+ return false; |
+ while (state_ != kPreData) { |
+ const char* header = &(*offset_); |
+ while (state_ != kColonS) |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ if (!DoStep()) |
+ return false; |
+ size_t header_length = 0u; |
+ while (state_ != kPreData && state_ != kName) { |
+ if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ // The cast is safe, we know that offset only moves forward. |
+ header_length = static_cast<size_t>(&(*offset_) - header); |
+ if (!DoStep()) |
+ return false; |
+ } |
+ if (ParseHeader(base::StringPiece(header, header_length))) { |
+ // Found what we were looking for, just skip to the part's body. |
+ while (state_ != kPreData) |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ if (!DoStep()) |
+ return false; |
+ } |
+ } |
+ |
+ const char* body = &(*offset_); |
+ size_t body_length = 0; |
+ while (state_ != kDB2 && offset_ != source_->end()) { |
+ if (!DoStep()) |
+ return false; |
+ if (state_ == kCR4) |
battre
2012/08/16 19:18:03
nit: {}
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ // We are in the middle of which might be the CRLF starting the part |
+ // separator (see the "delimiter" non-terminal from the grammar given |
+ // in the header file). The cast is safe, we know that offset only moves |
+ // forward and body was assigned at least 1 transition ago. |
+ body_length = static_cast<size_t>(&(*offset_) - body - 1); |
+ } |
+ if (body_length > 0) |
+ next_value_.set(body, body_length); |
+ return true; |
+} |
+ |
+bool FormDataParserMultipart::DoStep() { |
+ if (state_ == kError || source_ == NULL || offset_ == source_->end()) |
+ return false; |
+ size_t transition_index = kStateToTransition[state_]; |
+ Transition t = kAvailableTransitions[transition_index]; |
+ while (t != kAny) { |
+ const State s = kNextState[transition_index]; |
+ const size_t length = LookUp(t); |
+ if (length > 0) { |
+ offset_ += length; |
+ state_ = s; |
+ return true; |
+ } |
+ t = kAvailableTransitions[++transition_index]; |
+ } |
+ // We have kAny, the default choice. Seek by one and switch the state. |
+ ++offset_; |
+ state_ = kNextState[transition_index]; |
+ return true; |
+} |
+ |
+// Contract -- the following must be true: |
+// source_ != NULL && offset_ != source.end() |
+// The idea is to check this only once in the caller (DoStep()), and do not |
+// repeat it here every time, as this can be called many times from one call |
+// to DoStep(). |
+size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) { |
+ const char ahead = *offset_; |
+ const char first_char = kTransitionToChar[t]; |
+ |
+ // Easy case: labels corresponding to a single char. |
+ if (first_char != 0) |
+ return ahead == first_char ? 1u : 0u; |
+ |
+ // Harder cases. |
+ switch (t) { |
+ // Multiple alternatives, 1-char long: return immediately. |
+ case kAscii: |
+ return ahead >= 33 && ahead <= 126 && ahead != ':' ? 1u : 0u; |
+ case kLwsp: |
+ return ahead == ' ' || ahead == '\t' ? 1u : 0u; |
battre
2012/08/16 19:18:03
nit: () around condition, also above
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ |
+ // Longer than 1 char: prepare work for later. |
+ case kDashBoundary: { |
+ const size_t length = dash_boundary_.size(); |
+ // The cast below is safe, we know that the difference is not negative. |
+ if (static_cast<size_t>(source_->end() - offset_) < length || |
+ memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0) |
vabr (Chromium)
2012/08/16 08:00:59
Doing this each time is unnecessary expensive. It
|
+ return 0u; |
+ return length; |
+ } |
+ case kAny: |
+ // We are not supposed to be asked for kAny, but this is the right answer: |
+ return 1u; |
+ default: // We never get here -- the rest has already been handled above. |
+ NOTREACHED(); |
+ return 0u; |
+ } |
+} |
+ |
+bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) { |
+ static const char kContentDisposition[] = "Content-Disposition:"; |
+ if (memcmp( |
+ header.data(), kContentDisposition, strlen(kContentDisposition) != 0)) |
battre
2012/08/16 19:18:03
I would move header.data(), kContentDisposition, t
vabr (Chromium)
2012/08/17 18:29:57
Done.
|
+ return false; |
+ static const char kNameEquals[] = " name=\""; |
+ static const char kFilenameEquals[] = " filename=\""; |
+ |
+ // Mandatory part: find the name and set it as |next_name_|. |
+ StringPiece::size_type field_offset = header.find(kNameEquals); |
+ if (field_offset == StringPiece::npos) |
+ return false; |
+ field_offset += strlen(kNameEquals); |
+ StringPiece::size_type field_end = header.find('"', field_offset); |
+ if (field_end == StringPiece::npos) |
+ return false; |
+ next_name_.set(header.data() + field_offset, field_end - field_offset); |
+ |
+ // Optional part: find the filename and set it as |next_value_|. |
+ field_offset = header.find(kFilenameEquals); |
+ if (field_offset == StringPiece::npos) |
+ return true; // This was only optional |
+ field_offset += strlen(kFilenameEquals); |
+ field_end = header.find('"', field_offset); |
+ if (field_end == StringPiece::npos) |
+ return false; // This is a malformed header. |
+ next_value_.set(header.data() + field_offset, field_end - field_offset); |
+ return true; |
+} |
+ |
+} // namespace extensions |