chrome/browser/extensions/api/web_request/form_data_parser.cc - Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest

Unified Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Kent's first comments Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/extensions/api/web_request/form_data_parser.h ('k') | chrome/browser/extensions/api/web_request/form_data_parser_unittest.cc » ('j') | chrome/common/extensions/api/web_request.json » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/extensions/api/web_request/form_data_parser.cc

diff --git a/chrome/browser/extensions/api/web_request/form_data_parser.cc b/chrome/browser/extensions/api/web_request/form_data_parser.cc

new file mode 100644

index 0000000000000000000000000000000000000000..0ea100cf856a4919b653c06aabd20150b98d95de

--- /dev/null

+++ b/chrome/browser/extensions/api/web_request/form_data_parser.cc

@@ -0,0 +1,730 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/extensions/api/web_request/form_data_parser.h"

+#include <vector>

+#include "base/string_util.h"

+#include "base/values.h"

+#include "net/base/escape.h"

+#include "net/url_request/url_request.h"

+using base::DictionaryValue;

+using base::ListValue;

+using base::StringPiece;

+namespace extensions {

+// Parses URLencoded forms, see

+// http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 .

+class FormDataParserUrlEncoded : public FormDataParser {

+ public:

+ FormDataParserUrlEncoded();

+ virtual ~FormDataParserUrlEncoded();

+ // Implementation of FormDataParser.

+ virtual bool AllDataReadOK() OVERRIDE;

+ virtual bool GetNextNameValue(Result* result) OVERRIDE;

+ virtual bool SetSource(const base::StringPiece& source) OVERRIDE;

+ private:

+ // Gets next char from |source_|, seeks, and does book-keeping of = and &.

+ // Returns false if end of |source_| was reached, otherwise true.

+ bool GetNextChar(char* c);

+ // Once called the parser gives up and claims any results so far invalid.

+ void Abort();

+ base::StringPiece source_;

+ const char* source_end_;

+ bool aborted_;

+ // Variables from this block are only to be written to by GetNextChar.

+ const char* offset_; // Next char to be read.

+ size_t equality_signs_; // How many '=' were read so far.

+ size_t amp_signs_; // How many '&' were read so far.

+ bool expect_equality_; // Is the next trailing sign '=' (as opposed to '&')?

+ DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded);

+};

+// The following class, FormDataParserMultipart, parses forms encoded as

+// multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart

+// encoding) and 822 (MIME-headers).

tkent 2012/08/27 07:09:17 Please do not refer to RFC 822, which was obsolete

vabr (Chromium) 2012/08/29 19:57:07 Done. Thanks for making me aware of this.

+//

+// Implementation details

+//

+// The original grammar from RFC 2046 is this, "multipart-body" being the root

+// non-terminal:

+//

+// boundary := 0*69<bchars> bcharsnospace

+// bchars := bcharsnospace / " "

+// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / ","

+// / "-" / "." / "/" / ":" / "=" / "?"

+// dash-boundary := "--" boundary

+// multipart-body := [preamble CRLF]

+// dash-boundary transport-padding CRLF

+// body-part *encapsulation

+// close-delimiter transport-padding

+// [CRLF epilogue]

+// transport-padding := *LWSP-char

+// encapsulation := delimiter transport-padding CRLF body-part

+// delimiter := CRLF dash-boundary

+// close-delimiter := delimiter "--"

+// preamble := discard-text

+// epilogue := discard-text

+// discard-text := *(*text CRLF) *text

+// body-part := MIME-part-headers [CRLF *OCTET]

+// OCTET := <any 0-255 octet value>

+//

+// Here, CRLF, DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters

+// of the English alphabet, respectively.

+// The non-terminal "text" is presumably just any text, excluding line breaks.

+// The non-terminal "LWSP-char" is not directly defined in the original grammar

+// but it means "linear whitespace", which is a space or a horizontal tab.

+// The non-terminal "MIME-part-headers" is not discussed in RFC 2046, but is in

+// English defined in RFC 822, and can be presented as follows:

+//

+// MIME-part-headers := *MIME-part-header

+// MIME-part-header := name ':' *(text / whitespace) linebreak

+// linebreak := '\r' / '\n' / CRLF

+// whitespace := LWSP-char / CRLF LWSP-char

+// name := namechar *namechar

+// namechar := <ASCII char between 33 and 126, excluding ':'>

+//

+// This sets of rules together compose a grammar, with the root non-terminal

+// "multipart-body". This grammer defines a regular language. Indeed, if the

+// non-terminals are ordered in this way:

+// namechar < name < CRLF < DIGIT < ALPHA < text < LWSP-char < whitespace <

+// linebreak < MIME-part-header < MIME-part-headers < bcharsnospace <

+// bchars < boundary < dash-boundary < delimiter < close-delimiter <

+// discard-text < transport-padding < OCTET < body-part < encapsulation <

+// multipart-body

+// then it is easy to verify that whenever A<B then no grammar rule with head

+// A contains B in the body. By induction on the above order, each non-terminal

+// defines a regular language: a non-terminal C is defined by a rule C := exp,

+// where "exp" is an expression composed from character constants, non-terminals

+// less than C, and the following closure operations of regular languages:

+// concatenation, union and Kleene-star. By induction, all the lesser

+// non-terminals represent regular languages, thus "exp" also represents a

+// regular language. In particular, the root non-terminal (and thus the grammar)

+// defines a regular language.

+//

+// The FormDataParseMultipart class uses a finite automaton to represent this

+// language. It is easiest to view it in an extended form, with longer words

+// allowed to label a single transition to keep the number of states is low.

+// Importand states have full-word names, unimportant states (allways with only

+// one incoming label) have names abbreviating the incoming label, possibly

+// with an index.

+//

+// Automaton for "multipart-body":

+// Initial state = Start

+// Final states = {End, IgnoreEpilogue}

+// Implicit state (when a transition is missing) = Error

+// Transition table ('*' is a label matching everything not matched by other

+// labels leaving the same state):

+// FROM LABEL TO

+// Start dash-boundary DB1

+// CR CR1

+// * IgnorePreamble

+// CR1 LF Start

+// * IgnorePreamble

+// IgnorePreamble CR CR1

+// * IgnorePreamble

+// DB1 LWSP-char DB1

+// CR CR2

+// CR2 LF Part

+// Part <ASCII 33-126, excluding ':'> Name

+// CR CR3

+// Name <ASCII 33-126, excluding ':'> Name

+// ':' Colon

+// Colon LF End1

+// CR End2

+// * Colon

+// End1 CR CR3

+// <ASCII 33-126, excluding ':'> Name

+// End2 LF End3

+// CR CR3

+// <ASCII 33-126, excluding ':'> Name

+// End3 LWSP-char Colon

+// CR CR3

+// <ASCII 33-126, excluding ':'> Name

+// CR3 LF PreData

+// PreData dash-boundary DB2

+// CR CR4

+// * Data

+// CR4 LF Data2

+// * Data

+// Data CR CR4

+// * Data

+// Data2 dash-boundary DB2

+// * CR4

+// DB2 LWSP-char DB1

+// CR CR2

+// '-' D

+// D '-' End

+// End LWSP-char End

+// CR CR5

+// CR5 LF IgnoreEpilogue

+// IgnoreEpilogue * IgnoreEpilogue

+//

+// The automaton itself only allows to check that the input is a well-formed

+// multipart encoding of a form. To also extract the data, additional logic is

+// added:

+// * The header "Content-Disposition" (read between Part and PreData) contains

+// the elements name=... and optionally filename=... The former is the name

+// of the corresponding field of a form. The latter is only present if that

+// field was a file-upload, and contains the path to the uploaded file.

+// * The data of a message part is read between PreData and DB2, excluding the

+// last CR LF dash-boundary.

+//

+// IMPORTANT NOTE

+// This parser supports multiple sources, i.e., SetSource can be called multiple

+// times if the input is spread over several byte blocks. However, the split

+// must not occur in the middle of a transition of the above described automata,

+// e.g., if there is a transition StateA --dash-boundary--> StateB, then the

+// whole string with the dash--boundary bust be contained in the first source,

tkent 2012/08/27 07:09:17 bust -> must?

vabr (Chromium) 2012/08/29 19:57:07 Rewritten in the meantime.

+// or in the other. Also, the split must not occur in the middle of a header,

+// or a part body data. A message part from one source must be read via

+// GetNextNameValue before setting up a new source.

+class FormDataParserMultipart : public FormDataParser {

+ public:

+ explicit FormDataParserMultipart(const std::string& boundary_separator);

+ virtual ~FormDataParserMultipart();

+ // Implementation of FormDataParser.

+ virtual bool AllDataReadOK() OVERRIDE;

+ virtual bool GetNextNameValue(Result* result) OVERRIDE;

+ virtual bool SetSource(const base::StringPiece& source) OVERRIDE;

+ private:

+ // State and Transition are numbered to make sure they form a continuous block

+ // of numbers for array indexing in lookup tables. If changing State or

+ // Transition, don't forget to update k*Size and the lookup tables.

+ enum State {

+ kStart = 0,

tkent 2012/08/27 07:09:17 See http://www.chromium.org/developers/coding-styl

vabr (Chromium) 2012/08/29 19:57:07 Added STATE_ prefix to states, transitions disappe

+ kCR1 = 1,

+ kIgnorePreamble = 2,

+ kDB1 = 3,

+ kCR2 = 4,

+ kPart = 5,

+ kName = 6,

+ kColonS = 7, // "S" to distinguish it from the transition kColonT.

+ kEnd1 = 8,

+ kEnd2 = 9,

+ kEnd3 = 10,

+ kCR3 = 11,

+ kPreData = 12,

+ kCR4 = 13,

+ kData = 14,

+ kData2 = 15,

+ kDB2 = 16,

+ kD = 17,

+ kEnd = 18,

+ kCR5 = 19,

+ kIgnoreEpilogue = 20,

+ kError = 21

+ };

+ enum Transition {

+ kLF = 0,

+ kCR = 1,

+ kAscii = 2, // A "shorthand" for ASCII 33-126 without ':'.

+ kLwsp = 3,

+ kDashBoundary = 4,

+ kColonT = 5, // "T" to distinguish it from the state kColonS.

+ kDash = 6, // Meaning '-', not "--".

+ kAny = 7 // To represent '*'.

+ };

+ static const size_t kStateSize = 22;

+ static const size_t kTransitionSize = 8;

+ // Lookup tables:

+ // Maps transitions with one-character label to that character (else to 0).

+ static char kTransitionToChar[];

+ // Indices of transitions available in state |s| in |kAvailableTransitions|

+ // start at kStateToTransition[s] and the last transition for |s| is always

+ // kAny. The target state corresponding to transition kAvailableTransitions[i]

+ // is kNextState[i].

+ static Transition kAvailableTransitions[];

+ static State kNextState[];

+ static size_t kStateToTransition[];

+ // Reads the source until the next name-value pair is read. Returns true if

+ // |next_name_| and |next_value_| were successfully updated.

+ bool ReadNextNameValue();

+ // One step of the automaton, based on |state_| and the input from |source_|

+ // to be read. Updates the |offset_| iterator. Returns true on success.

+ bool DoStep();

+ // Tests whether the input pointed to by |offset_| allows to read transition

+ // |t|. It returns the number of bytes to be read, or 0 if |t| cannot be read.

+ size_t LookUp(Transition t);

+ // Extracts "name" and possibly "value" from a Content-Disposition header.

+ // Writes directly into |next_name_| and |next_value_|. Returns true on

+ // success and false otherwise.

+ bool ParseHeader(const base::StringPiece& header);

+ bool InFinalState() {

+ return state_ == kEnd || state_ == kIgnoreEpilogue;

+ }

+ // The parsed message can be split into multiple sources which we read

+ // sequentially.

+ base::StringPiece source_;

+ const char* source_end_;

+ const char* offset_;

tkent 2012/08/27 07:09:17 The name "offset_" is confusing. It's not an offs

vabr (Chromium) 2012/08/29 19:57:07 You're right. This disappeared after rewriting.

+ // The dash-boundary string is used for all sources.

+ const std::string dash_boundary_;

+ State state_;

+ // The next result to be returned by GetNextNameValue. It is stored as a pair

+ // of StringPieces instead of a Result, to avoid one copy of the data (note

+ // that Result stores a copy of the data in std::string, whereas StringPiece

+ // is just a pointer to the data in |source_|).

+ base::StringPiece next_name_;

+ base::StringPiece next_value_;

+ bool value_name_present_;

+ DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart);

+};

+// Implementation of FormDataParser and FormDataParser::Result .

+FormDataParser::Result::Result() {}

+FormDataParser::Result::~Result() {}

+void FormDataParser::Result::Reset() {

+ name_.erase();

+ value_.erase();

+FormDataParser::~FormDataParser() {}

+// static

+scoped_ptr<FormDataParser> FormDataParser::Create(

+ const net::URLRequest* request) {

+ std::string value;

+ const bool found = request->extra_request_headers().GetHeader(

+ net::HttpRequestHeaders::kContentType, &value);

+ return Create(found ? &value : NULL);

+// static

+scoped_ptr<FormDataParser> FormDataParser::Create(

+ const std::string* content_type_header) {

+ enum ParserChoice {kUrlEncoded, kMultipart, kError};

+ ParserChoice choice = kError;

+ std::string boundary;

+ if (content_type_header == NULL) {

+ choice = kUrlEncoded;

+ } else {

+ const std::string content_type(

+ content_type_header->substr(0, content_type_header->find(';')));

+ if (base::strcasecmp(

+ content_type.c_str(), "application/x-www-form-urlencoded") == 0) {

+ choice = kUrlEncoded;

+ } else if (base::strcasecmp(

+ content_type.c_str(), "multipart/form-data") == 0) {

+ static const char kBoundaryString[] = "boundary=";

+ size_t offset = content_type_header->find(kBoundaryString);

+ if (offset == std::string::npos) {

+ // Malformed header.

+ return scoped_ptr<FormDataParser>();

+ }

+ offset += strlen(kBoundaryString);

+ boundary = content_type_header->substr(

+ offset, content_type_header->find(';', offset));

+ if (!boundary.empty())

+ choice = kMultipart;

+ }

+ // Other cases are unparseable, including when |content_type| is "text/plain".

+ switch (choice) {

+ case kUrlEncoded:

+ return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());

+ case kMultipart:

+ return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));

+ default: // In other words, case kError:

+ return scoped_ptr<FormDataParser>();

+ }

+FormDataParser::FormDataParser() {}

+// Implementation of FormDataParserUrlEncoded.

+FormDataParserUrlEncoded::FormDataParserUrlEncoded()

+ : source_end_(NULL),

+ aborted_(false),

+ offset_(NULL),

+ equality_signs_(0),

+ amp_signs_(0),

+ expect_equality_(true) {

+FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}

+bool FormDataParserUrlEncoded::AllDataReadOK() {

+ return source_.data() != NULL &&

+ !aborted_ &&

+ offset_ == source_end_ &&

+ equality_signs_ == amp_signs_ + 1;

tkent 2012/08/27 07:09:17 Why do we need to check the number of = and & ? eq

vabr (Chromium) 2012/08/29 19:57:07 Now the parser uses a regexp which eliminates such

+bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {

+ result->Reset();

+ if (source_.data() == NULL || aborted_)

+ return false;

+ if (offset_ == source_end_)

+ return false;

+ const char* const name_start = &(*offset_);

+ char c;

+ bool last_read_success = GetNextChar(&c);

+ while (last_read_success && c != '=')

+ last_read_success = GetNextChar(&c);

+ if (!last_read_success) { // This means the data is malformed.

+ Abort();

+ return false;

+ }

+ const char* const name_end = offset_ - 1;

+ const std::string encoded_name(name_start, name_end - name_start);

+ const net::UnescapeRule::Type unescape_rules =

+ net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |

+ net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;

+ result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));

+ const char* const value_start = offset_;

+ last_read_success = GetNextChar(&c);

+ while (last_read_success && c != '&')

+ last_read_success = GetNextChar(&c);

+ const char* const value_end =

+ last_read_success ? offset_ - 1 : offset_;

+ const std::string encoded_value(value_start, value_end - value_start);

+ result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));

+ return true;

+bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {

+ if (source_ != NULL || source.data() == NULL || aborted_)

+ return false;

+ source_ = source;

+ source_end_ = source_.data() + source_.size();

+ offset_ = source_.data();

+ return true;

+bool FormDataParserUrlEncoded::GetNextChar(char* c) {

+ if (offset_ == source_end_ || aborted_)

+ return false;

+ *c = *offset_;

+ ++offset_;

+ if (*c == '=') {

+ if (expect_equality_) {

+ ++equality_signs_;

+ expect_equality_ = false;

+ } else {

+ Abort();

+ return false;

+ }

+ if (*c == '&' && offset_ != source_end_) {

+ if (!expect_equality_) {

+ ++amp_signs_;

+ expect_equality_ = true;

+ } else {

+ Abort();

+ return false;

+ }

+ return true;

+void FormDataParserUrlEncoded::Abort() {

+ aborted_ = true;

+// Implementation of FormDataParserMultipart.

+FormDataParserMultipart::FormDataParserMultipart(

+ const std::string& boundary_separator)

+ : source_end_(NULL),

+ offset_(NULL),

+ dash_boundary_("--" + boundary_separator),

+ state_(kStart),

+ value_name_present_(false) {

+FormDataParserMultipart::~FormDataParserMultipart() {}

+bool FormDataParserMultipart::AllDataReadOK() {

+ return source_.data() != NULL && InFinalState();

+bool FormDataParserMultipart::GetNextNameValue(Result* result) {

+ if (!value_name_present_ || state_ == kError)

+ return false;

+ result->set_name(next_name_);

+ result->set_value(next_value_);

+ next_name_.clear();

+ next_value_.clear();

+ value_name_present_ = ReadNextNameValue();

+ return true;

+bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {

+ if (state_ == kError ||

+ source.data() == NULL ||

+ // Message part across a source split is also an error.

+ next_name_.data() != NULL || next_value_.data() != NULL)

+ return false;

+ if (source_.data() != NULL && offset_ != source_end_){

+ // Try to seek until the end. If no name-value pair is found, this is OK.

+ value_name_present_ = ReadNextNameValue();

+ if (!value_name_present_ || offset_ != source_end_)

+ return false;

+ }

+ source_ = source;

+ source_end_ = source_.data() + source_.size();

+ offset_ = source_.data();

+ value_name_present_ = ReadNextNameValue();

+ return true;

+// static

+char FormDataParserMultipart::kTransitionToChar[] = {

+ '\n', // For kLF.

+ '\r', // For kCR.

+ 0, // For kAscii.

+ 0, // For kLwsp.

+ 0, // For kDashBoundary.

+ ':', // For kColonT.

+ '-', // For kDash.

+ 0, // For kAny.

+};

+// static

+FormDataParserMultipart::Transition

+ FormDataParserMultipart::kAvailableTransitions[] = {

+ kDashBoundary, kCR, kAny, // For kStart.

+ kLF, kAny, // For kCR1.

+ kCR, kAny, // For kIgnorePreamble.

+ kLwsp, kCR, kAny, // For kDB1.

+ kLF, kAny, // For kCR2.

+ kAscii, kCR, kAny, // For kPart.

+ kAscii, kColonT, kAny, // For kName.

+ kLF, kCR, kAny, // For kColonS.

+ kCR, kAscii, kAny, // For kEnd1.

+ kLF, kCR, kAscii, kAny, // For kEnd2.

+ kLwsp, kCR, kAscii, kAny, // For kEnd3.

+ kLF, kAny, // For kCR3.

+ kDashBoundary, kCR, kAny, // For kPreData.

+ kLF, kAny, // For kCR4.

+ kCR, kAny, // For kData.

+ kDashBoundary, kAny, // For kData2.

+ kLwsp, kCR, kDash, kAny, // For kDB2.

+ kDash, kAny, // For kD.

+ kLwsp, kCR, kAny, // For kEnd.

+ kLF, kAny, // For kCR5.

+ kAny, // For kIgnoreEpilogue.

+ kAny // For kError.

+};

+// static

+FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {

tkent 2012/08/27 07:09:17 kAvailableTransitions and kNextState should be mer

vabr (Chromium) 2012/08/29 19:57:07 Disappeared after rewriting.

+ kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.

+ kStart, kIgnorePreamble, // For kCR1; 05.

+ kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.

+ kDB1, kCR2, kError, // For kDB1; 10.

+ kPart, kError, // For kCR2; 12.

+ kName, kCR3, kError, // For kPart; 15.

+ kName, kColonS, kError, // For kName; 18.

+ kEnd1, kEnd2, kColonS, // For kColonS; 21.

+ kCR3, kName, kError, // For kEnd1; 24.

+ kEnd3, kCR3, kName, kError, // For kEnd2; 28.

+ kColonS, kCR3, kName, kError, // For kEnd3; 32.

+ kPreData, kError, // For kCR3; 34.

+ kDB2, kCR3, kData, // For kPreData; 37.

+ kData2, kData, // For kCR4; 39.

+ kCR4, kData, // For kData; 41.

+ kDB2, kCR4, // For kData2; 43.

+ kDB1, kCR2, kD, kError, // For kDB2; 47.

+ kEnd, kError, // For kD; 49.

+ kEnd, kCR5, kError, // For kEnd; 52.

+ kIgnoreEpilogue, kError, // For kCR5; 54.

+ kIgnoreEpilogue, // For kIgnoreEpilogue; 55.

+ kError // For kError; 56.

+};

+// static

+size_t FormDataParserMultipart::kStateToTransition[] = {

+ 0u, // For kStart

+ 3u, // For kCR1

+ 5u, // For kIgnorePreamble

+ 7u, // For kDB1

+ 10u, // For kCR2

+ 12u, // For kPart

+ 15u, // For kName

+ 18u, // For kColonS

+ 21u, // For kEnd1

+ 24u, // For kEnd2

+ 28u, // For kEnd3

+ 32u, // For kCR3

+ 34u, // For kPreData

+ 37u, // For kCR4

+ 39u, // For kData

+ 41u, // For kData2

+ 43u, // For kDB2

+ 47u, // For kD

+ 49u, // For kEnd

+ 52u, // For kCR5

+ 54u, // For kIgnoreEpilogue

+ 55u, // For kError

+};

+bool FormDataParserMultipart::ReadNextNameValue() {

+ if (state_ == kError || source_.data() == NULL ||

+ next_name_.data() != NULL || next_value_.data() != NULL)

+ return false;

+ // Seek to the next part's headers.

+ while (state_ != kPart) {

+ if (!DoStep())

+ return false;

+ }

tkent 2012/08/27 07:09:17 We had better have a function DoStepsUntil(State).

vabr (Chromium) 2012/08/29 19:57:07 Disappeared after the rewrite.

+ while (state_ != kPreData) {

+ const char* header = offset_;

+ while (state_ != kColonS) {

+ if (!DoStep())

+ return false;

+ }

+ size_t header_length = 0u;

+ while (state_ != kPreData && state_ != kName) {

+ if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) {

+ // The cast is safe, we know that offset only moves forward.

+ header_length = static_cast<size_t>(offset_ - header);

+ }

+ if (!DoStep())

+ return false;

+ }

+ if (ParseHeader(base::StringPiece(header, header_length))) {

+ // Found what we were looking for, just skip to the part's body.

+ while (state_ != kPreData) {

+ if (!DoStep())

+ return false;

+ }

+ const char* body = offset_;

+ size_t body_length = 0;

+ while (state_ != kDB2 && offset_ != source_end_) {

+ if (!DoStep())

+ return false;

+ if (state_ == kCR4) {

+ // We are in the middle of which might be the CRLF starting the part

+ // separator (see the "delimiter" non-terminal from the grammar given

+ // in the header file). The cast is safe, we know that offset only moves

+ // forward and body was assigned at least 1 transition ago.

+ body_length = static_cast<size_t>(offset_ - body - 1);

+ }

+ if (body_length > 0)

+ next_value_.set(body, body_length);

+ return true;

+bool FormDataParserMultipart::DoStep() {

+ if (state_ == kError || offset_ == source_end_)

+ return false;

+ size_t transition_index = kStateToTransition[state_];

+ Transition t = kAvailableTransitions[transition_index];

+ while (t != kAny) {

+ const State s = kNextState[transition_index];

+ const size_t length = LookUp(t);

+ if (length > 0) {

+ offset_ += length;

+ state_ = s;

+ return true;

+ }

+ t = kAvailableTransitions[++transition_index];

+ }

+ // We have kAny, the default choice. Seek by one and switch the state.

+ ++offset_;

+ state_ = kNextState[transition_index];

+ return true;

+// Contract -- the following must be true: offset_ != source_end_ .

+// The idea is to check this only once in the caller (DoStep()), and do not

+// repeat it here every time, as this can be called many times from one call

+// to DoStep().

+size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {

+ const char ahead = *offset_;

+ const char first_char = kTransitionToChar[t];

+ // Easy case: labels corresponding to a single char.

+ if (first_char != 0)

+ return ahead == first_char ? 1u : 0u;

+ // Harder cases.

+ switch (t) {

+ // Multiple alternatives, 1-char long: return immediately.

+ case kAscii:

+ return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;

+ case kLwsp:

+ return (ahead == ' ' || ahead == '\t') ? 1u : 0u;

+ // Longer than 1 char: prepare work for later.

+ case kDashBoundary: {

+ const size_t length = dash_boundary_.size();

+ // The cast below is safe, we know that the difference is not negative.

+ if (static_cast<size_t>(source_end_ - offset_) < length ||

+ memcmp(dash_boundary_.c_str(), offset_, length) != 0)

+ return 0u;

+ return length;

+ }

+ case kAny:

+ // We are not supposed to be asked for kAny, but this is the right answer:

+ return 1u;

+ default: // We never get here -- the rest has already been handled above.

+ NOTREACHED();

+ return 0u;

+ }

+bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {

+ static const char kContentDisposition[] = "Content-Disposition:";

+ if (memcmp(header.data(), kContentDisposition,

+ strlen(kContentDisposition) != 0))

tkent 2012/08/27 07:09:17 strlen is not needed. The length of kContentDispo

vabr (Chromium) 2012/08/29 19:57:07 Correct. Although this particular instance and tho

+ return false;

+ static const char kNameEquals[] = " name=\"";

+ static const char kFilenameEquals[] = " filename=\"";

+ // Mandatory part: find the name and set it as |next_name_|.

+ StringPiece::size_type field_offset = header.find(kNameEquals);

+ if (field_offset == StringPiece::npos)

+ return false;

+ field_offset += strlen(kNameEquals);

tkent 2012/08/27 07:09:17 ditto.

+ StringPiece::size_type field_end = header.find('"', field_offset);

+ if (field_end == StringPiece::npos)

+ return false;

+ next_name_.set(header.data() + field_offset, field_end - field_offset);

tkent 2012/08/27 07:09:17 Need to decode the name value. BTW, what's the ex

vabr (Chromium) 2012/08/29 19:57:07 Thanks very much for bringing this up! On 2012/08

+ // Optional part: find the filename and set it as |next_value_|.

+ field_offset = header.find(kFilenameEquals);

+ if (field_offset == StringPiece::npos)

+ return true; // This was only optional

+ field_offset += strlen(kFilenameEquals);

tkent 2012/08/27 07:09:17 ditto.

+ field_end = header.find('"', field_offset);

+ if (field_end == StringPiece::npos)

+ return false; // This is a malformed header.

+ next_value_.set(header.data() + field_offset, field_end - field_offset);

+ return true;

+} // namespace extensions