chrome/browser/extensions/api/web_request/post_data_parser.cc - Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest

Unified Diff: chrome/browser/extensions/api/web_request/post_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Rebased + some corrections Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/extensions/api/web_request/post_data_parser.h ('K') | « chrome/browser/extensions/api/web_request/post_data_parser.h ('k') | chrome/browser/extensions/api/web_request/post_data_parser_unittest.cc » ('j') | chrome/browser/extensions/api/web_request/web_request_api.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/extensions/api/web_request/post_data_parser.cc

diff --git a/chrome/browser/extensions/api/web_request/post_data_parser.cc b/chrome/browser/extensions/api/web_request/post_data_parser.cc

new file mode 100644

index 0000000000000000000000000000000000000000..e1474dd0334bb7e1fb77388d7da294eebb7bd2f1

--- /dev/null

+++ b/chrome/browser/extensions/api/web_request/post_data_parser.cc

@@ -0,0 +1,529 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/extensions/api/web_request/post_data_parser.h"

+#include "base/base64.h"

+#include "base/file_path.h"

+#include "base/string_piece.h"

+#include "base/string_util.h"

+#include "base/values.h"

+#include "net/base/escape.h"

+#include "net/url_request/url_request.h"

+namespace {

+const char kContentDisposition[] = "Content-Disposition:";

+// Takes |dictionary| of <string, list of strings> pairs, and gets the list

+// for |key|, creating it if necessary.

+ListValue* GetOrCreateList(DictionaryValue* dictionary,

+ const std::string& key) {

+ ListValue* list = NULL;

+ if (!dictionary->GetList(key, &list)) {

+ list = new ListValue();

+ dictionary->Set(key, list);

+ }

+ return list;

+} // namespace

+namespace extensions {

+// Implementation of FormDataParser and FormDataParser::Result .

+FormDataParser::Result::Result() {}

+FormDataParser::Result::~Result() {}

+void FormDataParser::Result::Reset() {

+ name_.erase();

+ value_.erase();

+void FormDataParser::Result::set_name(const base::StringPiece& str) {

+ str.CopyToString(&name_);

+void FormDataParser::Result::set_value(const base::StringPiece& str) {

+ str.CopyToString(&value_);

+void FormDataParser::Result::set_name(const std::string& str) {

+ name_ = str;

+void FormDataParser::Result::set_value(const std::string& str) {

+ value_ = str;

+FormDataParser::~FormDataParser() {}

+// static

+scoped_ptr<FormDataParser> FormDataParser::Create(

+ const net::URLRequest* request) {

+ std::string value;

+ const bool found = request->extra_request_headers().GetHeader(

+ net::HttpRequestHeaders::kContentType, &value);

+ return Create(found ? &value : NULL);

+// static

+scoped_ptr<FormDataParser> FormDataParser::Create(

+ const std::string* content_type_header) {

+ enum ParserChoice {kUrlEncoded, kMultipart, kError};

+ ParserChoice choice = kError;

+ std::string boundary;

+ if (content_type_header == NULL) {

+ choice = kUrlEncoded;

+ } else {

+ const std::string content_type(

+ content_type_header->substr(0, content_type_header->find(';')));

+ if (base::strcasecmp(

+ content_type.c_str(), "application/x-www-form-urlencoded") == 0) {

+ choice = kUrlEncoded;

+ } else if (base::strcasecmp(

+ content_type.c_str(), "multipart/form-data") == 0) {

+ static const char kBoundaryString[] = "boundary=";

+ size_t offset = content_type_header->find(kBoundaryString);

+ if (offset == std::string::npos) {

+ // Malformed header.

+ return scoped_ptr<FormDataParser>();

+ }

+ offset += strlen(kBoundaryString);

+ boundary = content_type_header->substr(

+ offset, content_type_header->find(';', offset));

+ if (!boundary.empty())

+ choice = kMultipart;

+ }

+ // Other cases are unparseable, including when |content_type| is "text/plain".

+ switch (choice) {

+ case kUrlEncoded:

+ return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());

+ case kMultipart:

+ return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));

+ default: // In other words, case kError:

+ return scoped_ptr<FormDataParser>();

+ }

+FormDataParser::FormDataParser() {}

+// Implementation of FormDataParserUrlEncoded.

+FormDataParserUrlEncoded::FormDataParserUrlEncoded()

+ : source_(NULL),

+ aborted_(false),

+ equality_signs_(0),

+ amp_signs_(0),

+ expect_equality_(true) {

+FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}

+bool FormDataParserUrlEncoded::AllDataReadOK() {

+ return source_ != NULL &&

+ !aborted_ &&

+ offset_ == source_->end() &&

+ equality_signs_ == amp_signs_ + 1;

+bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {

+ result->Reset();

+ if (source_ == NULL || aborted_)

+ return false;

+ if (offset_ == source_->end())

+ return false;

+ const char* const name_start = &(*offset_);

+ char c;

+ bool last_read_success = GetNextChar(&c);

+ while (last_read_success && c != '=')

+ last_read_success = GetNextChar(&c);

wtc 2012/08/09 23:39:40 Nit: this can be a do-while loop: bool last_rea

vabr (Chromium) 2012/08/10 17:12:55 I prefer the while-loop because: * it is shorter,

+ if (!last_read_success) { // This means the data is malformed.

+ Abort();

+ return false;

+ }

+ const char* const name_end = &(*(offset_ - 1));

+ const std::string encoded_name(name_start, name_end - name_start);

+ const net::UnescapeRule::Type unescape_rules =

+ net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |

+ net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;

+ result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));

+ const char* const value_start = &(*offset_);

+ last_read_success = GetNextChar(&c);

+ while (last_read_success && c != '&')

+ last_read_success = GetNextChar(&c);

+ const char* const value_end =

+ last_read_success ? &(*(offset_ - 1)) : &(*offset_);

+ const std::string encoded_value(value_start, value_end - value_start);

+ result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));

+ return true;

+bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) {

+ if (source_ != NULL || aborted_)

+ return false;

+ source_ = source;

+ offset_ = source_->begin();

+ return true;

+bool FormDataParserUrlEncoded::GetNextChar(char* c) {

+ if (offset_ == source_->end() || aborted_)

+ return false;

+ *c = *offset_;

+ ++offset_;

+ if (*c == '=') {

+ if (expect_equality_) {

+ ++equality_signs_;

+ expect_equality_ = false;

+ } else {

+ Abort();

+ return false;

+ }

+ if (*c == '&' && offset_ != source_->end()) {

+ if (!expect_equality_) {

+ ++amp_signs_;

+ expect_equality_ = true;

+ } else {

+ Abort();

+ return false;

+ }

+ return true;

+void FormDataParserUrlEncoded::Abort() {

+ aborted_ = true;

+// Implementation of FormDataParserMultipart.

+FormDataParserMultipart::FormDataParserMultipart(

+ const std::string& boundary_separator)

+ : source_(NULL),

+ length_(0), // Dummy value.

+ line_start_(0), // Dummy value.

+ line_end_(0), // Dummy value.

+ next_line_(0), // Dummy value.

+ boundary_("--" + boundary_separator),

+ end_boundary_(boundary_ + "--"),

+ state_(kInit),

+ line_type_(kEmpty) { // Dummy value.

+FormDataParserMultipart::~FormDataParserMultipart() {}

+bool FormDataParserMultipart::AllDataReadOK() {

+ return source_ != NULL && next_line_ >= length_ && state_ == kFinal;

+// This function reads one block of the data, between two boundaries.

+// First it reads the header to learn the name, and possibly also the

+// value, if this block is for a file input element.

+// Otherwise it then reads the value from the body.

+bool FormDataParserMultipart::GetNextNameValue(Result* result) {

+ result->Reset();

+ if (state_ == kError)

+ return false;

+ while (state_ != kHeaderRead) {

+ if (!DoStep())

+ return false;

+ }

+ bool value_extracted = false;

+ bool name_parsed = ParseHeader(result, &value_extracted);

+ while (state_ != kBody) {

+ if (!DoStep())

+ return false;

+ }

+ size_t value_start;

+ size_t value_end = 0; // Dummy value, replaced below, see (*).

+ // There may not be more to read from |source_| if the current result comes

+ // from a "file" input element. But then |result| is complete already.

+ if (!DoStep())

+ return value_extracted;

+ value_start = line_start_;

+ // (*) Now state_ == kBody, so value_end gets updated below.

wtc 2012/08/09 23:39:40 Are you sure state_ == kBody here? At line 249, s

vabr (Chromium) 2012/08/10 17:12:55 Thanks for catching this. I was missing a corner c

+ while (state_ != kHeaderStart && state_ != kFinal) {

+ value_end = line_end_;

+ if (!DoStep()) break;

+ }

+ if (name_parsed && !value_extracted) {

+ result->set_value(

+ base::StringPiece(source_ + value_start, value_end - value_start));

+ }

+ return name_parsed;

+bool FormDataParserMultipart::SetSource(const std::vector<char>* source) {

+ if (state_ == kError)

+ return false;

+ if (source_ != NULL && next_line_ < length_)

+ return false;

+ source_ = &(source->front());

+ length_ = source->size();

+ next_line_ = 0;

+ return true;

+bool FormDataParserMultipart::DoStep() {

+ if (!SeekNextLine())

+ return false;

+ switch (state_) {

+ case kInit:

+ if (line_type_ == kBoundary)

+ state_ = kHeaderStart;

+ else

+ state_ = kError;

+ break;

+ case kHeaderStart:

+ if (line_type_ == kDisposition)

+ state_ = kHeaderRead;

+ else

+ state_ = kHeader;

+ break;

+ case kHeader:

+ if (line_type_ == kDisposition)

+ state_ = kHeaderRead;

+ break;

+ case kHeaderRead:

+ if (line_type_ == kEmpty)

+ state_ = kBody;

+ break;

+ case kBody:

+ if (line_type_ == kBoundary)

+ state_ = kHeaderStart;

+ else if (line_type_ == kEndBoundary)

+ state_ = kFinal;

+ break;

+ case kFinal:

+ if (line_type_ != kEmpty)

+ state_ = kError;

+ break;

+ case kError:

+ break;

+ }

+ return true;

+FormDataParserMultipart::LineType FormDataParserMultipart::GetLineType() {

+ const size_t line_length = line_end_ - line_start_;

wtc 2012/08/09 23:39:40 If empty lines are common, you can do this optimiz

vabr (Chromium) 2012/08/10 17:12:55 Good point, done.

+ const base::StringPiece line(source_ + line_start_, line_length);

+ if (line == boundary_)

+ return kBoundary;

+ else if (line == end_boundary_)

+ return kEndBoundary;

+ else if (line.starts_with(kContentDisposition))

+ return kDisposition;

+ else if (line_start_ == line_end_)

+ return kEmpty;

+ else

+ return kOther;

+// Contract: only to be called from DoStep().

+bool FormDataParserMultipart::SeekNextLine() {

+ if (source_ == NULL || state_ == kError)

+ return false;

+ if (next_line_ >= length_)

+ return false;

+ line_start_ = next_line_;

+ size_t seek = line_start_;

+ while (seek < length_ && *(source_ + seek) != '\r')

+ ++seek;

+ line_end_ = seek;

+ line_type_ = GetLineType();

+ next_line_ = seek;

+ if (seek + 1 < length_ && strncmp(source_ + seek, "\r\n", 2) == 0) {

wtc 2012/08/09 23:39:40 Nit: since you have already checked the length, yo

vabr (Chromium) 2012/08/10 17:12:55 Done.

+ next_line_ += 2;

+ return true;

+ } else if (seek == length_) {

wtc 2012/08/09 23:39:40 Nit: don't use "else" after a return statement. S

vabr (Chromium) 2012/08/10 17:12:55 Done. (Not quite sure, though: what is wrong with

+ return true;

+ } else {

+ // Neither at the end, nor ending with a CRLF -- abort.

wtc 2012/08/09 23:39:40 Can you find out if we need to support a line endi

vabr (Chromium) 2012/08/10 17:12:55 RFC 2388 (Returning Values from Forms: multipart/

+ state_ = kError;

+ return false;

+ }

+// Contract: line_type_ == kDisposition.

+bool FormDataParserMultipart::ParseHeader(

+ Result* result, bool* value_extracted) {

+ DCHECK_EQ(kDisposition, line_type_);

+ base::StringPiece line(source_ + line_start_, line_end_ - line_start_);

+ const char kNameEquals[] = " name=\"";

+ const char kFilenameEquals[] = " filename=\"";

wtc 2012/08/09 23:39:40 Nit: these two const char arrays can be 'static'.

vabr (Chromium) 2012/08/10 17:12:55 Done.

+ size_t name_offset = line.find(kNameEquals);

+ if (name_offset == base::StringPiece::npos)

+ return false;

+ name_offset += strlen(kNameEquals);

wtc 2012/08/09 23:39:40 Nit: strlen(kNameEquals) can be replaced by sizeof

vabr (Chromium) 2012/08/10 17:12:55 Actually, at least in GCC, strlen called on a comp

+ result->set_name(base::StringPiece(

+ source_ + line_start_ + name_offset,

+ line.find('"', name_offset) - name_offset));

wtc 2012/08/09 23:39:40 If line.find('"', name_offset) does not find the '

vabr (Chromium) 2012/08/10 17:12:55 Check added. Thanks for catching this.

+ size_t value_offset = line.find(kFilenameEquals);

+ if (value_offset == std::string::npos) {

+ *value_extracted = false;

+ } else {

+ *value_extracted = true;

+ value_offset += strlen(kFilenameEquals);

+ result->set_value(base::StringPiece(

+ source_ + line_start_ + value_offset,

+ line.find('"', value_offset) - value_offset));

+ }

+ return true;

+// Implementation of RequestDataRepresentationProducer.

+RequestDataRepresentationProducer::~RequestDataRepresentationProducer() {}

+// Implementation of ChunkedErrorProducer.

+ChunkedErrorProducer::ChunkedErrorProducer(const net::URLRequest* request)

+ : chunks_found_(TransferEncodingChunked(request)) {

+ChunkedErrorProducer::~ChunkedErrorProducer() {}

+// static

+bool ChunkedErrorProducer::TransferEncodingChunked(

+ const net::URLRequest* request){

+ std::string transfer_encoding;

+ if (!request->extra_request_headers().GetHeader(

+ net::HttpRequestHeaders::kTransferEncoding, &transfer_encoding))

+ return false;

+ return base::strcasecmp(transfer_encoding.c_str(), "chunked") == 0;

+void ChunkedErrorProducer::FeedNext(const net::UploadData::Element& element) {

+ if (chunks_found_)

+ return; // We already found a reason to report an error.

+ if (element.type() == net::UploadData::TYPE_CHUNK)

+ chunks_found_ = true;

+bool ChunkedErrorProducer::Succeeded() {

+ return chunks_found_;

+scoped_ptr<Value> ChunkedErrorProducer::Result() {

+ if (!chunks_found_)

+ return scoped_ptr<Value>();

+ scoped_ptr<StringValue> error_string(new StringValue(

+ "Not supported: data is uploaded chunked."));

+ return error_string.PassAs<Value>();

+// Implementation of RawDataProducer.

+RawDataProducer::RawDataProducer() : success_(true) {}

+RawDataProducer::~RawDataProducer() {}

+void RawDataProducer::FeedNext(const net::UploadData::Element& element) {

+ if (!success_)

+ return;

+ if (element.type() == net::UploadData::TYPE_BYTES) {

+ data_.insert(data_.end(), element.bytes().begin(), element.bytes().end());

+ } else if (element.type() == net::UploadData::TYPE_CHUNK) {

+ Abort(); // Chunks are not supported (yet).

+ } else if (element.type() == net::UploadData::TYPE_FILE) {

+ // Insert the file path instead of the contents, which may be too large.

+ const char kFileEntryPrefix[] = "FILE_PATH=[";

vabr (Chromium) 2012/08/05 18:54:47 This might not be optimal to distinguish the file

Matt Perry 2012/08/06 21:06:45 So, our raw data is still not quite "raw". Maybe w

vabr (Chromium) 2012/08/10 17:12:55 Great idea, done. Now I'm returning an array (List

+ const char kFileEntrySuffix[] = "]";

+ data_.insert(data_.end(),

+ kFileEntryPrefix,

+ kFileEntryPrefix + arraysize(kFileEntryPrefix));

wtc 2012/08/09 23:39:40 NOTE: there may be an off-by-one error here. arra

vabr (Chromium) 2012/08/10 17:12:55 Indeed, this was an error. However, the code was r

+ const std::string& path = element.file_path().AsUTF8Unsafe();

+ data_.insert(data_.end(), path.begin(), path.end());

+ data_.insert(data_.end(),

+ kFileEntrySuffix,

+ kFileEntrySuffix + arraysize(kFileEntrySuffix));

+ } // TYPE_BLOB is silently ignored.

+bool RawDataProducer::Succeeded() {

+ if (!success_)

+ return false;

+ // Ideally, we would pass data_ as a BinaryValue. But those cannot be

+ // serialized via JSON, so we pass it as a Byte64 encoded string.

+ base::StringPiece data_raw(&(data_[0]), data_.size());

+ return base::Base64Encode(data_raw, &data_string_);

+scoped_ptr<Value> RawDataProducer::Result() {

+ if (!success_)

+ return scoped_ptr<Value>();

+ return scoped_ptr<Value>(new StringValue(data_string_));

+void RawDataProducer::Abort() {

+ success_ = false;

+ data_.clear();

+// Implementation of ParsedDataProducer.

+ParsedDataProducer::ParsedDataProducer(const net::URLRequest* request)

+ : parser_(FormDataParser::Create(request).release()),

+ success_(parser_.get() != NULL),

+ dictionary_(success_ ? new DictionaryValue() : NULL) {

+ParsedDataProducer::~ParsedDataProducer() {}

+void ParsedDataProducer::FeedNext(const net::UploadData::Element& element) {

+ if (!success_)

+ return;

+ if (element.type() != net::UploadData::TYPE_BYTES) {

+ if (element.type() != net::UploadData::TYPE_FILE) {

+ Abort(); // We do not handle blobs nor chunks.

+ }

+ return; // But we just ignore files.

+ }

+ if (!parser_->SetSource(&(element.bytes()))) {

+ Abort();

+ return;

+ }

+ FormDataParser::Result result;

+ while (parser_->GetNextNameValue(&result)) {

+ GetOrCreateList(dictionary_.get(), result.name())->Append(

+ new StringValue(result.value()));

+ }

+bool ParsedDataProducer::Succeeded() {

+ if (success_ && !parser_->AllDataReadOK())

+ Abort();

+ return success_;

+scoped_ptr<Value> ParsedDataProducer::Result() {

+ if (success_)

+ return dictionary_.PassAs<Value>();

+ else

+ return scoped_ptr<Value>();

+void ParsedDataProducer::Abort() {

+ success_ = false;

+ dictionary_.reset();

+ parser_.reset();

+} // namespace extensions