chrome/browser/extensions/api/web_request/form_data_parser.cc - Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest

Side by Side Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Dominic's comments + adjusting to the recent move of UploadElement out of UploadData. Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« chrome/browser/extensions/api/web_request/form_data_parser.h ('K') | « chrome/browser/extensions/api/web_request/form_data_parser.h ('k') | chrome/browser/extensions/api/web_request/form_data_parser_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"

	6

	7 #include "base/string_util.h"

	8 #include "base/values.h"

	9 #include "net/base/escape.h"

	10 #include "net/url_request/url_request.h"

	11

	12 using base::DictionaryValue;

	13 using base::ListValue;

	14 using base::StringPiece;

	15

	16 namespace extensions {

	17

	18 // Implementation of FormDataParser and FormDataParser::Result .

	19

	20 FormDataParser::Result::Result() {}

	21 FormDataParser::Result::~Result() {}

	22

	23 void FormDataParser::Result::Reset() {

	24 name_.erase();

	25 value_.erase();

	26 }

	27

	28 FormDataParser::~FormDataParser() {}

	29

	30 // static

	31 scoped_ptr<FormDataParser> FormDataParser::Create(

	32 const net::URLRequest* request) {

	33 std::string value;

	34 const bool found = request->extra_request_headers().GetHeader(

	35 net::HttpRequestHeaders::kContentType, &value);

	36 return Create(found ? &value : NULL);

	37 }

	38

	39 // static

	40 scoped_ptr<FormDataParser> FormDataParser::Create(

	41 const std::string* content_type_header) {

	42 enum ParserChoice {kUrlEncoded, kMultipart, kError};

	43 ParserChoice choice = kError;

	44 std::string boundary;

	45

	46 if (content_type_header == NULL) {

	47 choice = kUrlEncoded;

	48 } else {

	49 const std::string content_type(

	50 content_type_header->substr(0, content_type_header->find(';')));

	51

	52 if (base::strcasecmp(

	53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) {

	54 choice = kUrlEncoded;

	55 } else if (base::strcasecmp(

	56 content_type.c_str(), "multipart/form-data") == 0) {

	57 static const char kBoundaryString[] = "boundary=";

	58 size_t offset = content_type_header->find(kBoundaryString);

	59 if (offset == std::string::npos) {

	60 // Malformed header.

	61 return scoped_ptr<FormDataParser>();

	62 }

	63 offset += strlen(kBoundaryString);

	64 boundary = content_type_header->substr(

	65 offset, content_type_header->find(';', offset));

	66 if (!boundary.empty())

	67 choice = kMultipart;

	68 }

	69 }

	70 // Other cases are unparseable, including when \|content_type\| is "text/plain".
	tkent 2012/08/24 14:26:50 Why text/plain is not supported? Why text/plain is not supported? vabr (Chromium) 2012/08/24 16:16:59 This encoding is ambiguous. Nice description from Show quoted text On 2012/08/24 14:26:50, Kent Tamura wrote: > Why text/plain is not supported? This encoding is ambiguous. Nice description from http://stackoverflow.com/questions/7628249/method-post-enctype-text-plain-are... has this example: <form method="post" enctype="text/plain" action="..."> <textarea name="input1">abc input2=def</textarea> <input name="input2" value="ghi" /> <input type="submit"> </form> results in input1=abc input2=def input2=ghi But the first "input2" has nothing to do with the field named "input2".
	71

	72 switch (choice) {

	73 case kUrlEncoded:

	74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());

	75 case kMultipart:

	76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));

	77 default: // In other words, case kError:

	78 return scoped_ptr<FormDataParser>();

	79 }

	80 }

	81

	82 FormDataParser::FormDataParser() {}

	83

	84 // Implementation of FormDataParserUrlEncoded.

	85

	86 FormDataParserUrlEncoded::FormDataParserUrlEncoded()

	87 : source_end_(NULL),

	88 aborted_(false),

	89 offset_(NULL),

	90 equality_signs_(0),

	91 amp_signs_(0),

	92 expect_equality_(true) {

	93 }

	94

	95 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}

	96

	97 bool FormDataParserUrlEncoded::AllDataReadOK() {

	98 return source_.data() != NULL &&

	99 !aborted_ &&

	100 offset_ == source_end_ &&

	101 equality_signs_ == amp_signs_ + 1;

	102 }

	103

	104 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {

	105 result->Reset();

	106 if (source_.data() == NULL \|\| aborted_)

	107 return false;

	108 if (offset_ == source_end_)

	109 return false;

	110 const char* const name_start = &(*offset_);

	111 char c;

	112 bool last_read_success = GetNextChar(&c);

	113 while (last_read_success && c != '=')

	114 last_read_success = GetNextChar(&c);

	115 if (!last_read_success) { // This means the data is malformed.

	116 Abort();

	117 return false;

	118 }

	119 const char* const name_end = offset_ - 1;

	120 const std::string encoded_name(name_start, name_end - name_start);

	121 const net::UnescapeRule::Type unescape_rules =

	122 net::UnescapeRule::URL_SPECIAL_CHARS \| net::UnescapeRule::CONTROL_CHARS \|

	123 net::UnescapeRule::SPACES \| net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;

	124 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));

	125

	126 const char* const value_start = offset_;

	127 last_read_success = GetNextChar(&c);

	128 while (last_read_success && c != '&')

	129 last_read_success = GetNextChar(&c);

	130 const char* const value_end =

	131 last_read_success ? offset_ - 1 : offset_;

	132 const std::string encoded_value(value_start, value_end - value_start);

	133 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));

	134 return true;

	135 }

	136

	137 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {

	138 if (source_ != NULL \|\| source.data() == NULL \|\| aborted_)

	139 return false;

	140 source_ = source;

	141 source_end_ = source_.data() + source_.size();

	142 offset_ = source_.data();

	143 return true;

	144 }

	145

	146 bool FormDataParserUrlEncoded::GetNextChar(char* c) {

	147 if (offset_ == source_end_ \|\| aborted_)

	148 return false;

	149 c = offset_;

	150 ++offset_;

	151

	152 if (*c == '=') {

	153 if (expect_equality_) {

	154 ++equality_signs_;

	155 expect_equality_ = false;

	156 } else {

	157 Abort();

	158 return false;

	159 }

	160 }

	161 if (*c == '&' && offset_ != source_end_) {

	162 if (!expect_equality_) {

	163 ++amp_signs_;

	164 expect_equality_ = true;

	165 } else {

	166 Abort();

	167 return false;

	168 }

	169 }

	170

	171 return true;

	172 }

	173

	174 void FormDataParserUrlEncoded::Abort() {

	175 aborted_ = true;

	176 }

	177

	178 // Implementation of FormDataParserMultipart.

	179

	180 FormDataParserMultipart::FormDataParserMultipart(

	181 const std::string& boundary_separator)

	182 : source_end_(NULL),

	183 offset_(NULL),

	184 dash_boundary_("--" + boundary_separator),

	185 state_(kStart),

	186 value_name_present_(false) {

	187 }

	188

	189 FormDataParserMultipart::~FormDataParserMultipart() {}

	190

	191 bool FormDataParserMultipart::AllDataReadOK() {

	192 return source_.data() != NULL && InFinalState();

	193 }

	194

	195 bool FormDataParserMultipart::GetNextNameValue(Result* result) {

	196 if (!value_name_present_ \|\| state_ == kError)

	197 return false;

	198 result->set_name(next_name_);

	199 result->set_value(next_value_);

	200 next_name_.clear();

	201 next_value_.clear();

	202 value_name_present_ = ReadNextNameValue();

	203 return true;

	204 }

	205

	206 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {

	207 if (state_ == kError \|\|

	208 source.data() == NULL \|\|

	209 // Message part across a source split is also an error.

	210 next_name_.data() != NULL \|\| next_value_.data() != NULL)

	211 return false;

	212 if (source_.data() != NULL && offset_ != source_end_){

	213 // Try to seek until the end. If no name-value pair is found, this is OK.

	214 value_name_present_ = ReadNextNameValue();

	215 if (!value_name_present_ \|\| offset_ != source_end_)

	216 return false;

	217 }

	218 source_ = source;

	219 source_end_ = source_.data() + source_.size();

	220 offset_ = source_.data();

	221 value_name_present_ = ReadNextNameValue();

	222 return true;

	223 }

	224

	225 // static

	226 char FormDataParserMultipart::kTransitionToChar[] = {

	227 '\n', // For kLF.

	228 '\r', // For kCR.

	229 0, // For kAscii.

	230 0, // For kLwsp.

	231 0, // For kDashBoundary.

	232 ':', // For kColonT.

	233 '-', // For kDash.

	234 0, // For kAny.

	235 };

	236

	237 // static

	238 FormDataParserMultipart::Transition

	239 FormDataParserMultipart::kAvailableTransitions[] = {

	240 kDashBoundary, kCR, kAny, // For kStart.

	241 kLF, kAny, // For kCR1.

	242 kCR, kAny, // For kIgnorePreamble.

	243 kLwsp, kCR, kAny, // For kDB1.

	244 kLF, kAny, // For kCR2.

	245 kAscii, kCR, kAny, // For kPart.

	246 kAscii, kColonT, kAny, // For kName.

	247 kLF, kCR, kAny, // For kColonS.

	248 kCR, kAscii, kAny, // For kEnd1.

	249 kLF, kCR, kAscii, kAny, // For kEnd2.

	250 kLwsp, kCR, kAscii, kAny, // For kEnd3.

	251 kLF, kAny, // For kCR3.

	252 kDashBoundary, kCR, kAny, // For kPreData.

	253 kLF, kAny, // For kCR4.

	254 kCR, kAny, // For kData.

	255 kDashBoundary, kAny, // For kData2.

	256 kLwsp, kCR, kDash, kAny, // For kDB2.

	257 kDash, kAny, // For kD.

	258 kLwsp, kCR, kAny, // For kEnd.

	259 kLF, kAny, // For kCR5.

	260 kAny, // For kIgnoreEpilogue.

	261 kAny // For kError.

	262 };

	263

	264 // static

	265 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {

	266 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.

	267 kStart, kIgnorePreamble, // For kCR1; 05.

	268 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.

	269 kDB1, kCR2, kError, // For kDB1; 10.

	270 kPart, kError, // For kCR2; 12.

	271 kName, kCR3, kError, // For kPart; 15.

	272 kName, kColonS, kError, // For kName; 18.

	273 kEnd1, kEnd2, kColonS, // For kColonS; 21.

	274 kCR3, kName, kError, // For kEnd1; 24.

	275 kEnd3, kCR3, kName, kError, // For kEnd2; 28.

	276 kColonS, kCR3, kName, kError, // For kEnd3; 32.

	277 kPreData, kError, // For kCR3; 34.

	278 kDB2, kCR3, kData, // For kPreData; 37.

	279 kData2, kData, // For kCR4; 39.

	280 kCR4, kData, // For kData; 41.

	281 kDB2, kCR4, // For kData2; 43.

	282 kDB1, kCR2, kD, kError, // For kDB2; 47.

	283 kEnd, kError, // For kD; 49.

	284 kEnd, kCR5, kError, // For kEnd; 52.

	285 kIgnoreEpilogue, kError, // For kCR5; 54.

	286 kIgnoreEpilogue, // For kIgnoreEpilogue; 55.

	287 kError // For kError; 56.

	288 };

	289

	290 // static

	291 size_t FormDataParserMultipart::kStateToTransition[] = {

	292 0u, // For kStart

	293 3u, // For kCR1

	294 5u, // For kIgnorePreamble

	295 7u, // For kDB1

	296 10u, // For kCR2

	297 12u, // For kPart

	298 15u, // For kName

	299 18u, // For kColonS

	300 21u, // For kEnd1

	301 24u, // For kEnd2

	302 28u, // For kEnd3

	303 32u, // For kCR3

	304 34u, // For kPreData

	305 37u, // For kCR4

	306 39u, // For kData

	307 41u, // For kData2

	308 43u, // For kDB2

	309 47u, // For kD

	310 49u, // For kEnd

	311 52u, // For kCR5

	312 54u, // For kIgnoreEpilogue

	313 55u, // For kError

	314 };

	315

	316 bool FormDataParserMultipart::ReadNextNameValue() {

	317 if (state_ == kError \|\| source_.data() == NULL \|\|

	318 next_name_.data() != NULL \|\| next_value_.data() != NULL)

	319 return false;

	320

	321 // Seek to the next part's headers.

	322 while (state_ != kPart) {

	323 if (!DoStep())

	324 return false;

	325 }

	326 while (state_ != kPreData) {

	327 const char* header = offset_;

	328 while (state_ != kColonS) {

	329 if (!DoStep())

	330 return false;

	331 }

	332 size_t header_length = 0u;

	333 while (state_ != kPreData && state_ != kName) {

	334 if (state_ == kEnd1 \|\| state_ == kEnd2 \|\| state_ == kEnd3) {

	335 // The cast is safe, we know that offset only moves forward.

	336 header_length = static_cast<size_t>(offset_ - header);

	337 }

	338 if (!DoStep())

	339 return false;

	340 }

	341 if (ParseHeader(base::StringPiece(header, header_length))) {

	342 // Found what we were looking for, just skip to the part's body.

	343 while (state_ != kPreData) {

	344 if (!DoStep())

	345 return false;

	346 }

	347 }

	348 }

	349

	350 const char* body = offset_;

	351 size_t body_length = 0;

	352 while (state_ != kDB2 && offset_ != source_end_) {

	353 if (!DoStep())

	354 return false;

	355 if (state_ == kCR4) {

	356 // We are in the middle of which might be the CRLF starting the part

	357 // separator (see the "delimiter" non-terminal from the grammar given

	358 // in the header file). The cast is safe, we know that offset only moves

	359 // forward and body was assigned at least 1 transition ago.

	360 body_length = static_cast<size_t>(offset_ - body - 1);

	361 }

	362 }

	363 if (body_length > 0)

	364 next_value_.set(body, body_length);

	365 return true;

	366 }

	367

	368 bool FormDataParserMultipart::DoStep() {

	369 if (state_ == kError \|\| offset_ == source_end_)

	370 return false;

	371 size_t transition_index = kStateToTransition[state_];

	372 Transition t = kAvailableTransitions[transition_index];

	373 while (t != kAny) {

	374 const State s = kNextState[transition_index];

	375 const size_t length = LookUp(t);

	376 if (length > 0) {

	377 offset_ += length;

	378 state_ = s;

	379 return true;

	380 }

	381 t = kAvailableTransitions[++transition_index];

	382 }

	383 // We have kAny, the default choice. Seek by one and switch the state.

	384 ++offset_;

	385 state_ = kNextState[transition_index];

	386 return true;

	387 }

	388

	389 // Contract -- the following must be true: offset_ != source_end_ .

	390 // The idea is to check this only once in the caller (DoStep()), and do not

	391 // repeat it here every time, as this can be called many times from one call

	392 // to DoStep().

	393 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {

	394 const char ahead = *offset_;

	395 const char first_char = kTransitionToChar[t];

	396

	397 // Easy case: labels corresponding to a single char.

	398 if (first_char != 0)

	399 return ahead == first_char ? 1u : 0u;

	400

	401 // Harder cases.

	402 switch (t) {

	403 // Multiple alternatives, 1-char long: return immediately.

	404 case kAscii:

	405 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;

	406 case kLwsp:

	407 return (ahead == ' ' \|\| ahead == '\t') ? 1u : 0u;

	408

	409 // Longer than 1 char: prepare work for later.

	410 case kDashBoundary: {

	411 const size_t length = dash_boundary_.size();

	412 // The cast below is safe, we know that the difference is not negative.

	413 if (static_cast<size_t>(source_end_ - offset_) < length \|\|

	414 memcmp(dash_boundary_.c_str(), offset_, length) != 0)

	415 return 0u;

	416 return length;

	417 }

	418 case kAny:

	419 // We are not supposed to be asked for kAny, but this is the right answer:

	420 return 1u;

	421 default: // We never get here -- the rest has already been handled above.

	422 NOTREACHED();

	423 return 0u;

	424 }

	425 }

	426

	427 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {

	428 static const char kContentDisposition[] = "Content-Disposition:";

	429 if (memcmp(header.data(), kContentDisposition,

	430 strlen(kContentDisposition) != 0))

	431 return false;

	432 static const char kNameEquals[] = " name=\"";

	433 static const char kFilenameEquals[] = " filename=\"";

	434

	435 // Mandatory part: find the name and set it as \|next_name_\|.

	436 StringPiece::size_type field_offset = header.find(kNameEquals);

	437 if (field_offset == StringPiece::npos)

	438 return false;

	439 field_offset += strlen(kNameEquals);

	440 StringPiece::size_type field_end = header.find('"', field_offset);

	441 if (field_end == StringPiece::npos)

	442 return false;

	443 next_name_.set(header.data() + field_offset, field_end - field_offset);

	444

	445 // Optional part: find the filename and set it as \|next_value_\|.

	446 field_offset = header.find(kFilenameEquals);

	447 if (field_offset == StringPiece::npos)

	448 return true; // This was only optional

	449 field_offset += strlen(kFilenameEquals);

	450 field_end = header.find('"', field_offset);

	451 if (field_end == StringPiece::npos)

	452 return false; // This is a malformed header.

	453 next_value_.set(header.data() + field_offset, field_end - field_offset);

	454 return true;

	455 }

	456

	457 } // namespace extensions

OLD	NEW