chrome/browser/extensions/api/web_request/post_data_parser.cc - Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest

Unified Diff: chrome/browser/extensions/api/web_request/post_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Windows, what's your problem with scoped_ptr? Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/extensions/api/web_request/post_data_parser.h ('K') | « chrome/browser/extensions/api/web_request/post_data_parser.h ('k') | chrome/browser/extensions/api/web_request/post_data_parser_unittest.cc » ('j') | chrome/browser/extensions/api/web_request/web_request_api.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/extensions/api/web_request/post_data_parser.cc

diff --git a/chrome/browser/extensions/api/web_request/post_data_parser.cc b/chrome/browser/extensions/api/web_request/post_data_parser.cc

new file mode 100644

index 0000000000000000000000000000000000000000..7d4ef8e17122a21e5556c76c80443f3c6b1621f4

--- /dev/null

+++ b/chrome/browser/extensions/api/web_request/post_data_parser.cc

@@ -0,0 +1,354 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/extensions/api/web_request/post_data_parser.h"

+#include "base/values.h"

+#include "net/base/escape.h"

+#include "net/base/upload_data.h"

+#include "net/url_request/url_request.h"

+namespace {

+const char kContentDisposition[] = "Content-Disposition:";

+// Takes |dictionary| of <string, list of strings> pairs, and gets the list

+// for |key|, creating it if necessary.

+ListValue* GetOrCreateList(DictionaryValue* dictionary,

+ const std::string& key) {

+ ListValue* list = NULL;

+ if (!dictionary->GetList(key, &list)) {

+ list = new ListValue();

+ dictionary->Set(key, list);

+ }

+ return list;

wtc 2012/08/03 00:57:19 Nit: add a //namespace comment, like this: } /

vabr (Chromium) 2012/08/05 18:54:46 Done.

+namespace extensions {

+// Implementation of PostDataParser and PostDataParser::Result .

+PostDataParser::Result::Result() {}

+PostDataParser::Result::~Result() {}

+void PostDataParser::Result::Reset() {

+ key_.erase();

+ val_.erase();

+void PostDataParser::Result::SetKey(const base::StringPiece& str) {

+ key_.replace(0, std::string::npos, str.data(), str.size());

+void PostDataParser::Result::SetVal(const base::StringPiece& str) {

+ val_.replace(0, std::string::npos, str.data(), str.size());

wtc 2012/08/03 00:57:19 I think you can use base::CopyToString (declared i

vabr (Chromium) 2012/08/05 18:54:46 Thanks, that's much more readable!

+void PostDataParser::Result::SetKey(const std::string& str) {

+ key_ = str;

+void PostDataParser::Result::SetVal(const std::string& str) {

+ val_ = str;

wtc 2012/08/03 00:57:19 I think these four methods should be named set_key

vabr (Chromium) 2012/08/05 18:54:46 Done. (For the record, if somebody asks me later:

Matt Perry 2012/08/06 21:06:45 Interesting, I didn't know that! Thanks!

+PostDataParser::~PostDataParser() {}

+// static

+scoped_ptr<PostDataParser> PostDataParser::CreatePostDataParser(

+ const net::URLRequest* request) {

+ std::string value;

+ const bool found = request->extra_request_headers().GetHeader(

+ net::HttpRequestHeaders::kContentType, &value);

+ return CreatePostDataParser(found ? &value : NULL);

+// static

+scoped_ptr<PostDataParser> PostDataParser::CreatePostDataParser(

+ const std::string* content_type_header) {

wtc 2012/08/03 00:57:19 Nit: this argument is named |content_type| in the

vabr (Chromium) 2012/08/05 18:54:46 Done (corrected the header).

+ enum ParserChoice {kUrlEncoded, kMultipart, kError};

+ ParserChoice choice = kError;

+ std::string boundary;

+ if (content_type_header == NULL) {

+ choice = kUrlEncoded;

+ } else {

+ const std::string content_type(

+ content_type_header->substr(0, content_type_header->find(';')));

+ if (content_type == "application/x-www-form-urlencoded") {

+ choice = kUrlEncoded;

+ } else if (content_type == "multipart/form-data") {

wtc 2012/08/03 00:57:19 Should we do case-insensitive string comparison in

vabr (Chromium) 2012/08/05 18:54:46 That's a good point. RFC 2388 does not specify cas

+ const char kBoundaryString[] = "boundary=";

wtc 2012/08/03 00:57:19 Nit: add 'static'

vabr (Chromium) 2012/08/05 18:54:46 Done.

+ size_t offset = content_type_header->find(kBoundaryString);

+ if (offset == std::string::npos) {

+ // Malformed header.

+ return scoped_ptr<PostDataParser>();

+ }

+ offset += strlen(kBoundaryString);

+ boundary = content_type_header->substr(

+ offset, content_type_header->find(';', offset));

wtc 2012/08/03 00:57:19 I think we should also return scoped_ptr<PostDataP

vabr (Chromium) 2012/08/05 18:54:46 Indeed, thanks for catching this.

+ choice = kMultipart;

+ }

+ // Other cases are unparseable, including when |content_type| is "text/plain".

+ switch (choice) {

+ case kUrlEncoded:

+ return scoped_ptr<PostDataParser>(new PostDataParserUrlEncoded());

+ case kMultipart:

+ return scoped_ptr<PostDataParser>(new PostDataParserMultipart(boundary));

+ default: // In other words, case kError:

+ return scoped_ptr<PostDataParser>();

+ }

+// static

+scoped_ptr<base::DictionaryValue> PostDataParser::ParseURLRequestData(

+ const net::URLRequest* request) {

+ if (request->method() != "POST")

+ return scoped_ptr<base::DictionaryValue>();

+ const std::vector<net::UploadData::Element>* elements =

+ request->get_upload()->elements();

+ scoped_ptr<PostDataParser> parser = CreatePostDataParser(request);

+ if (parser.get() == NULL) {

wtc 2012/08/03 00:57:19 Nit: just do if (!parser) {

vabr (Chromium) 2012/08/05 18:54:46 Function deleted in the meantime.

+ // No parser means most probably unsupported form encoding.

+ return scoped_ptr<base::DictionaryValue>();

+ }

+ scoped_ptr<base::DictionaryValue> form_data(new base::DictionaryValue);

+ std::vector<net::UploadData::Element>::const_iterator element;

+ bool data_valid = true;

+ for (element = elements->begin();

+ data_valid && element != elements->end(); ++element) {

+ if (element->type() != net::UploadData::TYPE_BYTES) {

+ // We do not handle data including blobs or chunks.

+ if (element->type() != net::UploadData::TYPE_FILE)

+ data_valid = false;

wtc 2012/08/03 00:57:19 If we simply return scoped_ptr<base::DictionaryVal

vabr (Chromium) 2012/08/05 18:54:46 Function, and |data_valid| deleted in the meantime

+ continue;

+ }

+ if (!parser->SetSource(&(element->bytes())))

+ continue;

+ Result result;

+ while (parser->GetNextPair(&result)) {

+ GetOrCreateList(form_data.get(), result.key())->Append(

+ new StringValue(result.val()));

+ }

+ if (data_valid && parser->AllDataReadOK())

+ return form_data.Pass();

+ else

+ return scoped_ptr<base::DictionaryValue>();

+// Implementation of PostDataParserUrlEncoded.

+PostDataParserUrlEncoded::PostDataParserUrlEncoded() : source_(NULL) {}

+PostDataParserUrlEncoded::~PostDataParserUrlEncoded() {}

+bool PostDataParserUrlEncoded::AllDataReadOK() {

+ return source_ != NULL && offset_ == source_->end();

+bool PostDataParserUrlEncoded::GetNextPair(Result* result) {

+ result->Reset();

+ if (source_ == NULL)

+ return false;

+ if (offset_ == source_->end())

+ return false;

+ std::vector<char>::const_iterator seek = offset_;

+ // (*) Now we have |seek| >= |offset_| until the end of this function:

+ while (seek != source_->end() && *seek != '=')

+ ++seek;

+ if (seek == source_->end()) {

+ // This means the data is malformed.

+ offset_ = seek;

wtc 2012/08/03 00:57:19 BUG: if you set offset_ to seek here, it'll cause

vabr (Chromium) 2012/08/05 18:54:46 Indeed, very nicely spotted, thanks! However, thi

+ return false;

+ }

+ std::string encoded_key(&(*offset_), seek - offset_); // Safe, see (*).

wtc 2012/08/03 00:57:19 Is it necessary to do &(*offset_) ? I guess offse

vabr (Chromium) 2012/08/05 18:54:46 I'm afraid it is necessary. offset_ is a iterator,

+ const net::UnescapeRule::Type unescape_rules =

+ net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |

+ net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;

+ result->SetKey(net::UnescapeURLComponent(encoded_key, unescape_rules));

+ offset_ = ++seek;

+ while (seek != source_->end() && *seek != '&')

+ ++seek;

+ std::string encoded_val(&(*offset_), seek - offset_); // Safe, see (*).

+ result->SetVal(net::UnescapeURLComponent(encoded_val, unescape_rules));

+ offset_ = (seek == source_->end()) ? seek : seek + 1;

+ return true;

+bool PostDataParserUrlEncoded::SetSource(const std::vector<char>* source) {

+ if (source_ != NULL)

+ return false;

wtc 2012/08/03 00:57:19 This allows SetSource() to be called only once. W

vabr (Chromium) 2012/08/05 18:54:46 I don't think Chrome/WebKit splits URLEncoded form

wtc 2012/08/09 22:02:50 Thank you for the explanation. I was wondering if

vabr (Chromium) 2012/08/10 17:12:55 I can confirm that the each instance of PostDataPa

+ source_ = source;

+ offset_ = source_->begin();

+ return true;

+// Implementation of PostDataParserMultipart.

+PostDataParserMultipart::PostDataParserMultipart(

+ const std::string& boundary_separator)

+ : source_(NULL),

+ length_(0), // Dummy value.

+ line_start_(0), // Dummy value.

+ line_end_(0), // Dummy value.

+ next_line_(0), // Dummy value.

+ boundary_("--" + boundary_separator),

+ final_boundary_(boundary_ + "--"),

+ state_(kInit),

+ line_type_(kEmpty) // Dummy value.

+{}

wtc 2012/08/03 00:57:19 Nit: the opening curly brace '{' probably should b

vabr (Chromium) 2012/08/05 18:54:46 Done.

+PostDataParserMultipart::~PostDataParserMultipart() {}

+bool PostDataParserMultipart::AllDataReadOK() {

+ return source_ != NULL && next_line_ >= length_ && state_ == kFinal;

+// This function reads one block of the data, between two boundaries.

+// First it reads the header to learn the key, and possibly also the

+// value, if this block is for a file input element.

+// Otherwise it then reads the value from the body.

+bool PostDataParserMultipart::GetNextPair(Result* result) {

+ result->Reset();

+ if (state_ == kError)

+ return false;

+ while (state_ != kHeadRead) {

+ if (!DoStep())

+ return false;

+ }

+ bool val_extracted = false;

+ bool name_parsed = ParseHead(result, &val_extracted);

+ while (state_ != kBody) {

+ if (!DoStep())

+ return false;

+ }

+ size_t val_start;

+ size_t val_end = 0; // Dummy value, replaced below, see (*).

+ // There may not be more to read from |source_| if the current result comes

+ // from a "file" input element. But then |result| is complete already.

+ if (!DoStep())

+ return val_extracted;

+ val_start = line_start_;

+ // (*) Now state_ == kBody, so val_end gets updated below.

+ while (state_ != kHeadStart && state_ != kFinal) {

+ val_end = line_end_;

+ if (!DoStep()) break;

+ }

+ if (name_parsed && !val_extracted) {

+ result->SetVal(base::StringPiece(source_ + val_start, val_end - val_start));

+ }

+ return name_parsed;

+bool PostDataParserMultipart::SetSource(const std::vector<char>* source) {

+ if (state_ == kError)

+ return false;

+ if (source_ != NULL && next_line_ < length_)

+ return false;

+ source_ = &(source->front());

+ length_ = source->size();

+ next_line_ = 0;

wtc 2012/08/03 00:57:19 Should we also set line_start_ and line_end_ to 0?

vabr (Chromium) 2012/08/05 18:54:46 We do not need to, they will be set accordingly as

wtc 2012/08/09 22:02:50 No, I don't. My previous suggestion was based on

vabr (Chromium) 2012/08/10 17:12:55 Just to be clear, PostDataParserMultipart::SetSour

+ return true;

+bool PostDataParserMultipart::DoStep() {

+ if (!SeekNextLine())

+ return false;

+ switch (state_) {

+ case kInit:

+ if (line_type_ == kBoundary)

+ state_ = kHeadStart;

+ else

+ state_ = kError;

+ break;

+ case kHeadStart:

+ if (line_type_ == kDisposition)

+ state_ = kHeadRead;

+ else

+ state_ = kHead;

+ break;

+ case kHead:

+ if (line_type_ == kDisposition)

+ state_ = kHeadRead;

+ break;

+ case kHeadRead:

+ if (line_type_ == kEmpty)

+ state_ = kBody;

+ break;

+ case kBody:

+ if (line_type_ == kBoundary)

+ state_ = kHeadStart;

+ else if (line_type_ == kEndBoundary)

+ state_ = kFinal;

+ break;

+ case kFinal:

+ if (line_type_ != kEmpty)

+ state_ = kError;

wtc 2012/08/03 00:57:19 Add a break statement.

vabr (Chromium) 2012/08/05 18:54:46 Done.

+ case kError:

+ break;

+ }

+ return true;

+PostDataParserMultipart::LineType PostDataParserMultipart::GetLineType() {

+ const size_t line_length = line_end_ - line_start_;

+ const base::StringPiece line(source_ + line_start_, line_length);

+ if (line == boundary_)

+ return kBoundary;

+ else if (line == final_boundary_)

+ return kEndBoundary;

wtc 2012/08/03 00:57:19 Nit: I suggest making "final boundary" and "end bo

vabr (Chromium) 2012/08/05 18:54:46 Done.

+ else if (line.starts_with(kContentDisposition))

+ return kDisposition;

+ else if (line_start_ == line_end_)

+ return kEmpty;

+ else

+ return kOther;

+// Contract: only to be called from DoStep().

+bool PostDataParserMultipart::SeekNextLine() {

+ if (source_ == NULL || state_ == kError)

+ return false;

+ if (next_line_ >= length_)

+ return false;

+ line_start_ = next_line_;

+ size_t seek = line_start_;

+ while (seek < length_ && *(source_ + seek) != '\r')

+ ++seek;

+ line_end_ = seek;

+ line_type_ = GetLineType();

+ if ((seek+1) < length_ && strncmp(source_ + seek, "\r\n", 2) != 0)

+ return false;

+ next_line_ = seek + 2;

wtc 2012/08/03 00:57:19 If the data does not end with a final "\r\n", this

vabr (Chromium) 2012/08/05 18:54:46 Due to adding a check for garbage at the end, this

+ return true;

+// Contract: line_type_ == kDisposition.

+bool PostDataParserMultipart::ParseHead(Result* result, bool* val_extracted) {

+ DCHECK_EQ(kDisposition, line_type_);

+ base::StringPiece line(source_ + line_start_, line_end_ - line_start_);

+ const char kNameEquals[] = " name=\"";

+ const char kFilenameEquals[] = " filename=\"";

+ size_t key_offset = line.find(kNameEquals);

+ if (key_offset == base::StringPiece::npos)

+ return false;

+ key_offset += strlen(kNameEquals);

+ result->SetKey(base::StringPiece(source_ + line_start_ + key_offset,

+ line.find('"', key_offset) - key_offset));

+ size_t val_offset = line.find(kFilenameEquals);

+ if (val_offset == std::string::npos) {

+ *val_extracted = false;

+ } else {

+ *val_extracted = true;

+ val_offset += strlen(kFilenameEquals);

+ result->SetVal(base::StringPiece(source_ + line_start_ + val_offset,

+ line.find('"', val_offset) - val_offset));

+ }

+ return true;

+} // namespace extensions