Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1610)

Unified Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Dominic's comments + adjusting to the recent move of UploadElement out of UploadData. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/extensions/api/web_request/form_data_parser.cc
diff --git a/chrome/browser/extensions/api/web_request/form_data_parser.cc b/chrome/browser/extensions/api/web_request/form_data_parser.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4b8fb139dfa36e6176551390b2292dbfe9812249
--- /dev/null
+++ b/chrome/browser/extensions/api/web_request/form_data_parser.cc
@@ -0,0 +1,457 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/extensions/api/web_request/form_data_parser.h"
+
+#include "base/string_util.h"
+#include "base/values.h"
+#include "net/base/escape.h"
+#include "net/url_request/url_request.h"
+
+using base::DictionaryValue;
+using base::ListValue;
+using base::StringPiece;
+
+namespace extensions {
+
+// Implementation of FormDataParser and FormDataParser::Result .
+
+FormDataParser::Result::Result() {}
+FormDataParser::Result::~Result() {}
+
+void FormDataParser::Result::Reset() {
+ name_.erase();
+ value_.erase();
+}
+
+FormDataParser::~FormDataParser() {}
+
+// static
+scoped_ptr<FormDataParser> FormDataParser::Create(
+ const net::URLRequest* request) {
+ std::string value;
+ const bool found = request->extra_request_headers().GetHeader(
+ net::HttpRequestHeaders::kContentType, &value);
+ return Create(found ? &value : NULL);
+}
+
+// static
+scoped_ptr<FormDataParser> FormDataParser::Create(
+ const std::string* content_type_header) {
+ enum ParserChoice {kUrlEncoded, kMultipart, kError};
+ ParserChoice choice = kError;
+ std::string boundary;
+
+ if (content_type_header == NULL) {
+ choice = kUrlEncoded;
+ } else {
+ const std::string content_type(
+ content_type_header->substr(0, content_type_header->find(';')));
+
+ if (base::strcasecmp(
+ content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
+ choice = kUrlEncoded;
+ } else if (base::strcasecmp(
+ content_type.c_str(), "multipart/form-data") == 0) {
+ static const char kBoundaryString[] = "boundary=";
+ size_t offset = content_type_header->find(kBoundaryString);
+ if (offset == std::string::npos) {
+ // Malformed header.
+ return scoped_ptr<FormDataParser>();
+ }
+ offset += strlen(kBoundaryString);
+ boundary = content_type_header->substr(
+ offset, content_type_header->find(';', offset));
+ if (!boundary.empty())
+ choice = kMultipart;
+ }
+ }
+ // Other cases are unparseable, including when |content_type| is "text/plain".
tkent 2012/08/24 14:26:50 Why text/plain is not supported?
vabr (Chromium) 2012/08/24 16:16:59 This encoding is ambiguous. Nice description from
+
+ switch (choice) {
+ case kUrlEncoded:
+ return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
+ case kMultipart:
+ return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
+ default: // In other words, case kError:
+ return scoped_ptr<FormDataParser>();
+ }
+}
+
+FormDataParser::FormDataParser() {}
+
+// Implementation of FormDataParserUrlEncoded.
+
+FormDataParserUrlEncoded::FormDataParserUrlEncoded()
+ : source_end_(NULL),
+ aborted_(false),
+ offset_(NULL),
+ equality_signs_(0),
+ amp_signs_(0),
+ expect_equality_(true) {
+}
+
+FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
+
+bool FormDataParserUrlEncoded::AllDataReadOK() {
+ return source_.data() != NULL &&
+ !aborted_ &&
+ offset_ == source_end_ &&
+ equality_signs_ == amp_signs_ + 1;
+}
+
+bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
+ result->Reset();
+ if (source_.data() == NULL || aborted_)
+ return false;
+ if (offset_ == source_end_)
+ return false;
+ const char* const name_start = &(*offset_);
+ char c;
+ bool last_read_success = GetNextChar(&c);
+ while (last_read_success && c != '=')
+ last_read_success = GetNextChar(&c);
+ if (!last_read_success) { // This means the data is malformed.
+ Abort();
+ return false;
+ }
+ const char* const name_end = offset_ - 1;
+ const std::string encoded_name(name_start, name_end - name_start);
+ const net::UnescapeRule::Type unescape_rules =
+ net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
+ net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
+ result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));
+
+ const char* const value_start = offset_;
+ last_read_success = GetNextChar(&c);
+ while (last_read_success && c != '&')
+ last_read_success = GetNextChar(&c);
+ const char* const value_end =
+ last_read_success ? offset_ - 1 : offset_;
+ const std::string encoded_value(value_start, value_end - value_start);
+ result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));
+ return true;
+}
+
+bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {
+ if (source_ != NULL || source.data() == NULL || aborted_)
+ return false;
+ source_ = source;
+ source_end_ = source_.data() + source_.size();
+ offset_ = source_.data();
+ return true;
+}
+
+bool FormDataParserUrlEncoded::GetNextChar(char* c) {
+ if (offset_ == source_end_ || aborted_)
+ return false;
+ *c = *offset_;
+ ++offset_;
+
+ if (*c == '=') {
+ if (expect_equality_) {
+ ++equality_signs_;
+ expect_equality_ = false;
+ } else {
+ Abort();
+ return false;
+ }
+ }
+ if (*c == '&' && offset_ != source_end_) {
+ if (!expect_equality_) {
+ ++amp_signs_;
+ expect_equality_ = true;
+ } else {
+ Abort();
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void FormDataParserUrlEncoded::Abort() {
+ aborted_ = true;
+}
+
+// Implementation of FormDataParserMultipart.
+
+FormDataParserMultipart::FormDataParserMultipart(
+ const std::string& boundary_separator)
+ : source_end_(NULL),
+ offset_(NULL),
+ dash_boundary_("--" + boundary_separator),
+ state_(kStart),
+ value_name_present_(false) {
+}
+
+FormDataParserMultipart::~FormDataParserMultipart() {}
+
+bool FormDataParserMultipart::AllDataReadOK() {
+ return source_.data() != NULL && InFinalState();
+}
+
+bool FormDataParserMultipart::GetNextNameValue(Result* result) {
+ if (!value_name_present_ || state_ == kError)
+ return false;
+ result->set_name(next_name_);
+ result->set_value(next_value_);
+ next_name_.clear();
+ next_value_.clear();
+ value_name_present_ = ReadNextNameValue();
+ return true;
+}
+
+bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {
+ if (state_ == kError ||
+ source.data() == NULL ||
+ // Message part across a source split is also an error.
+ next_name_.data() != NULL || next_value_.data() != NULL)
+ return false;
+ if (source_.data() != NULL && offset_ != source_end_){
+ // Try to seek until the end. If no name-value pair is found, this is OK.
+ value_name_present_ = ReadNextNameValue();
+ if (!value_name_present_ || offset_ != source_end_)
+ return false;
+ }
+ source_ = source;
+ source_end_ = source_.data() + source_.size();
+ offset_ = source_.data();
+ value_name_present_ = ReadNextNameValue();
+ return true;
+}
+
+// static
+char FormDataParserMultipart::kTransitionToChar[] = {
+ '\n', // For kLF.
+ '\r', // For kCR.
+ 0, // For kAscii.
+ 0, // For kLwsp.
+ 0, // For kDashBoundary.
+ ':', // For kColonT.
+ '-', // For kDash.
+ 0, // For kAny.
+};
+
+// static
+FormDataParserMultipart::Transition
+ FormDataParserMultipart::kAvailableTransitions[] = {
+ kDashBoundary, kCR, kAny, // For kStart.
+ kLF, kAny, // For kCR1.
+ kCR, kAny, // For kIgnorePreamble.
+ kLwsp, kCR, kAny, // For kDB1.
+ kLF, kAny, // For kCR2.
+ kAscii, kCR, kAny, // For kPart.
+ kAscii, kColonT, kAny, // For kName.
+ kLF, kCR, kAny, // For kColonS.
+ kCR, kAscii, kAny, // For kEnd1.
+ kLF, kCR, kAscii, kAny, // For kEnd2.
+ kLwsp, kCR, kAscii, kAny, // For kEnd3.
+ kLF, kAny, // For kCR3.
+ kDashBoundary, kCR, kAny, // For kPreData.
+ kLF, kAny, // For kCR4.
+ kCR, kAny, // For kData.
+ kDashBoundary, kAny, // For kData2.
+ kLwsp, kCR, kDash, kAny, // For kDB2.
+ kDash, kAny, // For kD.
+ kLwsp, kCR, kAny, // For kEnd.
+ kLF, kAny, // For kCR5.
+ kAny, // For kIgnoreEpilogue.
+ kAny // For kError.
+};
+
+// static
+FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {
+ kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.
+ kStart, kIgnorePreamble, // For kCR1; 05.
+ kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.
+ kDB1, kCR2, kError, // For kDB1; 10.
+ kPart, kError, // For kCR2; 12.
+ kName, kCR3, kError, // For kPart; 15.
+ kName, kColonS, kError, // For kName; 18.
+ kEnd1, kEnd2, kColonS, // For kColonS; 21.
+ kCR3, kName, kError, // For kEnd1; 24.
+ kEnd3, kCR3, kName, kError, // For kEnd2; 28.
+ kColonS, kCR3, kName, kError, // For kEnd3; 32.
+ kPreData, kError, // For kCR3; 34.
+ kDB2, kCR3, kData, // For kPreData; 37.
+ kData2, kData, // For kCR4; 39.
+ kCR4, kData, // For kData; 41.
+ kDB2, kCR4, // For kData2; 43.
+ kDB1, kCR2, kD, kError, // For kDB2; 47.
+ kEnd, kError, // For kD; 49.
+ kEnd, kCR5, kError, // For kEnd; 52.
+ kIgnoreEpilogue, kError, // For kCR5; 54.
+ kIgnoreEpilogue, // For kIgnoreEpilogue; 55.
+ kError // For kError; 56.
+};
+
+// static
+size_t FormDataParserMultipart::kStateToTransition[] = {
+ 0u, // For kStart
+ 3u, // For kCR1
+ 5u, // For kIgnorePreamble
+ 7u, // For kDB1
+ 10u, // For kCR2
+ 12u, // For kPart
+ 15u, // For kName
+ 18u, // For kColonS
+ 21u, // For kEnd1
+ 24u, // For kEnd2
+ 28u, // For kEnd3
+ 32u, // For kCR3
+ 34u, // For kPreData
+ 37u, // For kCR4
+ 39u, // For kData
+ 41u, // For kData2
+ 43u, // For kDB2
+ 47u, // For kD
+ 49u, // For kEnd
+ 52u, // For kCR5
+ 54u, // For kIgnoreEpilogue
+ 55u, // For kError
+};
+
+bool FormDataParserMultipart::ReadNextNameValue() {
+ if (state_ == kError || source_.data() == NULL ||
+ next_name_.data() != NULL || next_value_.data() != NULL)
+ return false;
+
+ // Seek to the next part's headers.
+ while (state_ != kPart) {
+ if (!DoStep())
+ return false;
+ }
+ while (state_ != kPreData) {
+ const char* header = offset_;
+ while (state_ != kColonS) {
+ if (!DoStep())
+ return false;
+ }
+ size_t header_length = 0u;
+ while (state_ != kPreData && state_ != kName) {
+ if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) {
+ // The cast is safe, we know that offset only moves forward.
+ header_length = static_cast<size_t>(offset_ - header);
+ }
+ if (!DoStep())
+ return false;
+ }
+ if (ParseHeader(base::StringPiece(header, header_length))) {
+ // Found what we were looking for, just skip to the part's body.
+ while (state_ != kPreData) {
+ if (!DoStep())
+ return false;
+ }
+ }
+ }
+
+ const char* body = offset_;
+ size_t body_length = 0;
+ while (state_ != kDB2 && offset_ != source_end_) {
+ if (!DoStep())
+ return false;
+ if (state_ == kCR4) {
+ // We are in the middle of which might be the CRLF starting the part
+ // separator (see the "delimiter" non-terminal from the grammar given
+ // in the header file). The cast is safe, we know that offset only moves
+ // forward and body was assigned at least 1 transition ago.
+ body_length = static_cast<size_t>(offset_ - body - 1);
+ }
+ }
+ if (body_length > 0)
+ next_value_.set(body, body_length);
+ return true;
+}
+
+bool FormDataParserMultipart::DoStep() {
+ if (state_ == kError || offset_ == source_end_)
+ return false;
+ size_t transition_index = kStateToTransition[state_];
+ Transition t = kAvailableTransitions[transition_index];
+ while (t != kAny) {
+ const State s = kNextState[transition_index];
+ const size_t length = LookUp(t);
+ if (length > 0) {
+ offset_ += length;
+ state_ = s;
+ return true;
+ }
+ t = kAvailableTransitions[++transition_index];
+ }
+ // We have kAny, the default choice. Seek by one and switch the state.
+ ++offset_;
+ state_ = kNextState[transition_index];
+ return true;
+}
+
+// Contract -- the following must be true: offset_ != source_end_ .
+// The idea is to check this only once in the caller (DoStep()), and do not
+// repeat it here every time, as this can be called many times from one call
+// to DoStep().
+size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {
+ const char ahead = *offset_;
+ const char first_char = kTransitionToChar[t];
+
+ // Easy case: labels corresponding to a single char.
+ if (first_char != 0)
+ return ahead == first_char ? 1u : 0u;
+
+ // Harder cases.
+ switch (t) {
+ // Multiple alternatives, 1-char long: return immediately.
+ case kAscii:
+ return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;
+ case kLwsp:
+ return (ahead == ' ' || ahead == '\t') ? 1u : 0u;
+
+ // Longer than 1 char: prepare work for later.
+ case kDashBoundary: {
+ const size_t length = dash_boundary_.size();
+ // The cast below is safe, we know that the difference is not negative.
+ if (static_cast<size_t>(source_end_ - offset_) < length ||
+ memcmp(dash_boundary_.c_str(), offset_, length) != 0)
+ return 0u;
+ return length;
+ }
+ case kAny:
+ // We are not supposed to be asked for kAny, but this is the right answer:
+ return 1u;
+ default: // We never get here -- the rest has already been handled above.
+ NOTREACHED();
+ return 0u;
+ }
+}
+
+bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {
+ static const char kContentDisposition[] = "Content-Disposition:";
+ if (memcmp(header.data(), kContentDisposition,
+ strlen(kContentDisposition) != 0))
+ return false;
+ static const char kNameEquals[] = " name=\"";
+ static const char kFilenameEquals[] = " filename=\"";
+
+ // Mandatory part: find the name and set it as |next_name_|.
+ StringPiece::size_type field_offset = header.find(kNameEquals);
+ if (field_offset == StringPiece::npos)
+ return false;
+ field_offset += strlen(kNameEquals);
+ StringPiece::size_type field_end = header.find('"', field_offset);
+ if (field_end == StringPiece::npos)
+ return false;
+ next_name_.set(header.data() + field_offset, field_end - field_offset);
+
+ // Optional part: find the filename and set it as |next_value_|.
+ field_offset = header.find(kFilenameEquals);
+ if (field_offset == StringPiece::npos)
+ return true; // This was only optional
+ field_offset += strlen(kFilenameEquals);
+ field_end = header.find('"', field_offset);
+ if (field_end == StringPiece::npos)
+ return false; // This is a malformed header.
+ next_value_.set(header.data() + field_offset, field_end - field_offset);
+ return true;
+}
+
+} // namespace extensions

Powered by Google App Engine
This is Rietveld 408576698