chrome/browser/extensions/api/web_request/form_data_parser.cc - Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest

Side by Side Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Dominic's comments Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « chrome/browser/extensions/api/web_request/form_data_parser.h ('k') | chrome/browser/extensions/api/web_request/form_data_parser_unittest.cc » ('j') | chrome/browser/extensions/api/web_request/web_request_api_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"

	6

	7 #include "base/string_util.h"

	8 #include "base/values.h"

	9 #include "net/base/escape.h"

	10 #include "net/url_request/url_request.h"

	11

	12 using base::DictionaryValue;

	13 using base::ListValue;

	14 using base::StringPiece;

	15

	16 namespace extensions {

	17

	18 // Implementation of FormDataParser and FormDataParser::Result .

	19

	20 FormDataParser::Result::Result() {}

	21 FormDataParser::Result::~Result() {}

	22

	23 void FormDataParser::Result::Reset() {

	24 name_.erase();

	25 value_.erase();

	26 }

	27

	28 FormDataParser::~FormDataParser() {}

	29

	30 // static

	31 scoped_ptr<FormDataParser> FormDataParser::Create(

	32 const net::URLRequest* request) {

	33 std::string value;

	34 const bool found = request->extra_request_headers().GetHeader(

	35 net::HttpRequestHeaders::kContentType, &value);

	36 return Create(found ? &value : NULL);

	37 }

	38

	39 // static

	40 scoped_ptr<FormDataParser> FormDataParser::Create(

	41 const std::string* content_type_header) {

	42 enum ParserChoice {kUrlEncoded, kMultipart, kError};

	43 ParserChoice choice = kError;

	44 std::string boundary;

	45

	46 if (content_type_header == NULL) {

	47 choice = kUrlEncoded;

	48 } else {

	49 const std::string content_type(

	50 content_type_header->substr(0, content_type_header->find(';')));

	51

	52 if (base::strcasecmp(

	53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) {

	54 choice = kUrlEncoded;

	55 } else if (base::strcasecmp(

	56 content_type.c_str(), "multipart/form-data") == 0) {

	57 static const char kBoundaryString[] = "boundary=";

	58 size_t offset = content_type_header->find(kBoundaryString);

	59 if (offset == std::string::npos) {

	60 // Malformed header.

	61 return scoped_ptr<FormDataParser>();

	62 }

	63 offset += strlen(kBoundaryString);

	64 boundary = content_type_header->substr(

	65 offset, content_type_header->find(';', offset));

	66 if (!boundary.empty())

	67 choice = kMultipart;

	68 }

	69 }

	70 // Other cases are unparseable, including when \|content_type\| is "text/plain".

	71

	72 switch (choice) {

	73 case kUrlEncoded:

	74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());

	75 case kMultipart:

	76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));

	77 default: // In other words, case kError:

	78 return scoped_ptr<FormDataParser>();

	79 }

	80 }

	81

	82 FormDataParser::FormDataParser() {}

	83

	84 // Implementation of FormDataParserUrlEncoded.

	85

	86 FormDataParserUrlEncoded::FormDataParserUrlEncoded()

	87 : source_(NULL),

	88 aborted_(false),

	89 equality_signs_(0),

	90 amp_signs_(0),

	91 expect_equality_(true) {

	92 }

	93

	94 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}

	95

	96 bool FormDataParserUrlEncoded::AllDataReadOK() {

	97 return source_ != NULL &&

	98 !aborted_ &&

	99 offset_ == source_->end() &&

	100 equality_signs_ == amp_signs_ + 1;

	101 }

	102

	103 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {

	104 result->Reset();

	105 if (source_ == NULL \|\| aborted_)

	106 return false;

	107 if (offset_ == source_->end())

	108 return false;

	109 const char* const name_start = &(*offset_);

	110 char c;

	111 bool last_read_success = GetNextChar(&c);

	112 while (last_read_success && c != '=')

	113 last_read_success = GetNextChar(&c);

	114 if (!last_read_success) { // This means the data is malformed.

	115 Abort();

	116 return false;

	117 }

	118 const char* const name_end = &(*(offset_ - 1));

	119 const std::string encoded_name(name_start, name_end - name_start);

	120 const net::UnescapeRule::Type unescape_rules =

	121 net::UnescapeRule::URL_SPECIAL_CHARS \| net::UnescapeRule::CONTROL_CHARS \|

	122 net::UnescapeRule::SPACES \| net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;

	123 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));

	124

	125 const char* const value_start = &(*offset_);

	126 last_read_success = GetNextChar(&c);

	127 while (last_read_success && c != '&')

	128 last_read_success = GetNextChar(&c);

	129 const char* const value_end =

	130 last_read_success ? &((offset_ - 1)) : &(offset_);

	131 const std::string encoded_value(value_start, value_end - value_start);

	132 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));

	133 return true;

	134 }

	135

	136 bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) {

	137 if (source_ != NULL \|\| source == NULL \|\| aborted_)

	138 return false;

	139 source_ = source;

	140 offset_ = source_->begin();

	141 return true;

	142 }

	143

	144 bool FormDataParserUrlEncoded::GetNextChar(char* c) {

	145 if (offset_ == source_->end() \|\| aborted_)

	146 return false;

	147 c = offset_;

	148 ++offset_;

	149

	150 if (*c == '=') {

	151 if (expect_equality_) {

	152 ++equality_signs_;

	153 expect_equality_ = false;

	154 } else {

	155 Abort();

	156 return false;

	157 }

	158 }

	159 if (*c == '&' && offset_ != source_->end()) {

	160 if (!expect_equality_) {

	161 ++amp_signs_;

	162 expect_equality_ = true;

	163 } else {

	164 Abort();

	165 return false;

	166 }

	167 }

	168

	169 return true;

	170 }

	171

	172 void FormDataParserUrlEncoded::Abort() {

	173 aborted_ = true;

	174 }

	175

	176 // Implementation of FormDataParserMultipart.

	177

	178 FormDataParserMultipart::FormDataParserMultipart(

	179 const std::string& boundary_separator)

	180 : source_(NULL),

	181 dash_boundary_("--" + boundary_separator),

	182 state_(kStart),

	183 value_name_present_(false) {

	184 }

	185

	186 FormDataParserMultipart::~FormDataParserMultipart() {}

	187

	188 bool FormDataParserMultipart::AllDataReadOK() {

	189 return source_ != NULL && InFinalState();

	190 }

	191

	192 bool FormDataParserMultipart::GetNextNameValue(Result* result) {

	193 if (!value_name_present_ \|\| state_ == kError)

	194 return false;

	195 result->set_name(next_name_);

	196 result->set_value(next_value_);

	197 next_name_.clear();

	198 next_value_.clear();

	199 value_name_present_ = ReadNextNameValue();

	200 return true;

	201 }

	202

	203 bool FormDataParserMultipart::SetSource(const std::vector<char>* source) {

	204 if (state_ == kError \|\|

	205 source == NULL \|\|

	206 // Message part across a source split is also an error.

	207 next_name_.data() != NULL \|\| next_value_.data() != NULL)

	208 return false;

	209 if (source_ != NULL && offset_ != source_->end()){

	210 // Try to seek until the end. If no name-value pair is found, this is OK.

	211 value_name_present_ = ReadNextNameValue();

	212 if (!value_name_present_ \|\| offset_ != source_->end())

	213 return false;

	214 }

	215 source_ = source;

	216 offset_ = source_->begin();

	217 value_name_present_ = ReadNextNameValue();

	218 return true;

	219 }

	220

	221 // static

	222 char FormDataParserMultipart::kTransitionToChar[] = {

	223 '\n', // For kLF.

	224 '\r', // For kCR.

	225 0, // For kAscii.

	226 0, // For kLwsp.

	227 0, // For kDashBoundary.

	228 ':', // For kColonT.

	229 '-', // For kDash.

	230 0, // For kAny.

	231 };

	232

	233 // static

	234 FormDataParserMultipart::Transition

	235 FormDataParserMultipart::kAvailableTransitions[] = {

	236 kDashBoundary, kCR, kAny, // For kStart.

	237 kLF, kAny, // For kCR1.

	238 kCR, kAny, // For kIgnorePreamble.

	239 kLwsp, kCR, kAny, // For kDB1.

	240 kLF, kAny, // For kCR2.

	241 kAscii, kCR, kAny, // For kPart.

	242 kAscii, kColonT, kAny, // For kName.

	243 kLF, kCR, kAny, // For kColonS.

	244 kCR, kAscii, kAny, // For kEnd1.

	245 kLF, kCR, kAscii, kAny, // For kEnd2.

	246 kLwsp, kCR, kAscii, kAny, // For kEnd3.

	247 kLF, kAny, // For kCR3.

	248 kDashBoundary, kCR, kAny, // For kPreData.

	249 kLF, kAny, // For kCR4.

	250 kCR, kAny, // For kData.

	251 kDashBoundary, kAny, // For kData2.

	252 kLwsp, kCR, kDash, kAny, // For kDB2.

	253 kDash, kAny, // For kD.

	254 kLwsp, kCR, kAny, // For kEnd.

	255 kLF, kAny, // For kCR5.

	256 kAny, // For kIgnoreEpilogue.

	257 kAny // For kError.

	258 };

	259

	260 // static

	261 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {

	262 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.

	263 kStart, kIgnorePreamble, // For kCR1; 05.

	264 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.

	265 kDB1, kCR2, kError, // For kDB1; 10.

	266 kPart, kError, // For kCR2; 12.

	267 kName, kCR3, kError, // For kPart; 15.

	268 kName, kColonS, kError, // For kName; 18.

	269 kEnd1, kEnd2, kColonS, // For kColonS; 21.

	270 kCR3, kName, kError, // For kEnd1; 24.

	271 kEnd3, kCR3, kName, kError, // For kEnd2; 28.

	272 kColonS, kCR3, kName, kError, // For kEnd3; 32.

	273 kPreData, kError, // For kCR3; 34.

	274 kDB2, kCR3, kData, // For kPreData; 37.

	275 kData2, kData, // For kCR4; 39.

	276 kCR4, kData, // For kData; 41.

	277 kDB2, kCR4, // For kData2; 43.

	278 kDB1, kCR2, kD, kError, // For kDB2; 47.

	279 kEnd, kError, // For kD; 49.

	280 kEnd, kCR5, kError, // For kEnd; 52.

	281 kIgnoreEpilogue, kError, // For kCR5; 54.

	282 kIgnoreEpilogue, // For kIgnoreEpilogue; 55.

	283 kError // For kError; 56.

	284 };

	285

	286 // static

	287 size_t FormDataParserMultipart::kStateToTransition[] = {

	288 0u, // For kStart

	289 3u, // For kCR1

	290 5u, // For kIgnorePreamble

	291 7u, // For kDB1

	292 10u, // For kCR2

	293 12u, // For kPart

	294 15u, // For kName

	295 18u, // For kColonS

	296 21u, // For kEnd1

	297 24u, // For kEnd2

	298 28u, // For kEnd3

	299 32u, // For kCR3

	300 34u, // For kPreData

	301 37u, // For kCR4

	302 39u, // For kData

	303 41u, // For kData2

	304 43u, // For kDB2

	305 47u, // For kD

	306 49u, // For kEnd

	307 52u, // For kCR5

	308 54u, // For kIgnoreEpilogue

	309 55u, // For kError

	310 };

	311

	312 bool FormDataParserMultipart::ReadNextNameValue() {

	313 if (state_ == kError \|\| source_ == NULL \|\|

	314 next_name_.data() != NULL \|\| next_value_.data() != NULL)

	315 return false;

	316

	317 // Seek to the next part's headers.

	318 while (state_ != kPart) {

	319 if (!DoStep())

	320 return false;

	321 }

	322 while (state_ != kPreData) {

	323 const char* header = &(*offset_);

	324 while (state_ != kColonS) {

	325 if (!DoStep())

	326 return false;

	327 }

	328 size_t header_length = 0u;

	329 while (state_ != kPreData && state_ != kName) {

	330 if (state_ == kEnd1 \|\| state_ == kEnd2 \|\| state_ == kEnd3) {

	331 // The cast is safe, we know that offset only moves forward.

	332 header_length = static_cast<size_t>(&(*offset_) - header);

	333 }

	334 if (!DoStep())

	335 return false;

	336 }

	337 if (ParseHeader(base::StringPiece(header, header_length))) {

	338 // Found what we were looking for, just skip to the part's body.

	339 while (state_ != kPreData) {

	340 if (!DoStep())

	341 return false;

	342 }

	343 }

	344 }

	345

	346 const char* body = &(*offset_);

	347 size_t body_length = 0;

	348 while (state_ != kDB2 && offset_ != source_->end()) {

	349 if (!DoStep())

	350 return false;

	351 if (state_ == kCR4) {

	352 // We are in the middle of which might be the CRLF starting the part

	353 // separator (see the "delimiter" non-terminal from the grammar given

	354 // in the header file). The cast is safe, we know that offset only moves

	355 // forward and body was assigned at least 1 transition ago.

	356 body_length = static_cast<size_t>(&(*offset_) - body - 1);

	357 }

	358 }

	359 if (body_length > 0)

	360 next_value_.set(body, body_length);

	361 return true;

	362 }

	363

	364 bool FormDataParserMultipart::DoStep() {

	365 if (state_ == kError \|\| source_ == NULL \|\| offset_ == source_->end())

	366 return false;

	367 size_t transition_index = kStateToTransition[state_];

	368 Transition t = kAvailableTransitions[transition_index];

	369 while (t != kAny) {

	370 const State s = kNextState[transition_index];

	371 const size_t length = LookUp(t);

	372 if (length > 0) {

	373 offset_ += length;

	374 state_ = s;

	375 return true;

	376 }

	377 t = kAvailableTransitions[++transition_index];

	378 }

	379 // We have kAny, the default choice. Seek by one and switch the state.

	380 ++offset_;

	381 state_ = kNextState[transition_index];

	382 return true;

	383 }

	384

	385 // Contract -- the following must be true:

	386 // source_ != NULL && offset_ != source.end()

	387 // The idea is to check this only once in the caller (DoStep()), and do not

	388 // repeat it here every time, as this can be called many times from one call

	389 // to DoStep().

	390 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {

	391 const char ahead = *offset_;

	392 const char first_char = kTransitionToChar[t];

	393

	394 // Easy case: labels corresponding to a single char.

	395 if (first_char != 0)

	396 return ahead == first_char ? 1u : 0u;

	397

	398 // Harder cases.

	399 switch (t) {

	400 // Multiple alternatives, 1-char long: return immediately.

	401 case kAscii:

	402 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;

	403 case kLwsp:

	404 return (ahead == ' ' \|\| ahead == '\t') ? 1u : 0u;

	405

	406 // Longer than 1 char: prepare work for later.

	407 case kDashBoundary: {

	408 const size_t length = dash_boundary_.size();

	409 // The cast below is safe, we know that the difference is not negative.

	410 if (static_cast<size_t>(source_->end() - offset_) < length \|\|

	411 memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0)

	412 return 0u;

	413 return length;

	414 }

	415 case kAny:

	416 // We are not supposed to be asked for kAny, but this is the right answer:

	417 return 1u;

	418 default: // We never get here -- the rest has already been handled above.

	419 NOTREACHED();

	420 return 0u;

	421 }

	422 }

	423

	424 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {

	425 static const char kContentDisposition[] = "Content-Disposition:";

	426 if (memcmp(header.data(), kContentDisposition,

	427 strlen(kContentDisposition) != 0))

	428 return false;

	429 static const char kNameEquals[] = " name=\"";

	430 static const char kFilenameEquals[] = " filename=\"";

	431

	432 // Mandatory part: find the name and set it as \|next_name_\|.

	433 StringPiece::size_type field_offset = header.find(kNameEquals);

	434 if (field_offset == StringPiece::npos)

	435 return false;

	436 field_offset += strlen(kNameEquals);

	437 StringPiece::size_type field_end = header.find('"', field_offset);

	438 if (field_end == StringPiece::npos)

	439 return false;

	440 next_name_.set(header.data() + field_offset, field_end - field_offset);

	441

	442 // Optional part: find the filename and set it as \|next_value_\|.

	443 field_offset = header.find(kFilenameEquals);

	444 if (field_offset == StringPiece::npos)

	445 return true; // This was only optional

	446 field_offset += strlen(kFilenameEquals);

	447 field_end = header.find('"', field_offset);

	448 if (field_end == StringPiece::npos)

	449 return false; // This is a malformed header.

	450 next_value_.set(header.data() + field_offset, field_end - field_offset);

	451 return true;

	452 }

	453

	454 } // namespace extensions

OLD	NEW