Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(254)

Side by Side Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Kent's first comments Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"
6
7 #include <vector>
8
9 #include "base/string_util.h"
10 #include "base/values.h"
11 #include "net/base/escape.h"
12 #include "net/url_request/url_request.h"
13
14 using base::DictionaryValue;
15 using base::ListValue;
16 using base::StringPiece;
17
18 namespace extensions {
19
20 // Parses URLencoded forms, see
21 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 .
22 class FormDataParserUrlEncoded : public FormDataParser {
23 public:
24 FormDataParserUrlEncoded();
25 virtual ~FormDataParserUrlEncoded();
26
27 // Implementation of FormDataParser.
28 virtual bool AllDataReadOK() OVERRIDE;
29 virtual bool GetNextNameValue(Result* result) OVERRIDE;
30 virtual bool SetSource(const base::StringPiece& source) OVERRIDE;
31
32 private:
33 // Gets next char from |source_|, seeks, and does book-keeping of = and &.
34 // Returns false if end of |source_| was reached, otherwise true.
35 bool GetNextChar(char* c);
36 // Once called the parser gives up and claims any results so far invalid.
37 void Abort();
38
39 base::StringPiece source_;
40 const char* source_end_;
41 bool aborted_;
42
43 // Variables from this block are only to be written to by GetNextChar.
44 const char* offset_; // Next char to be read.
45 size_t equality_signs_; // How many '=' were read so far.
46 size_t amp_signs_; // How many '&' were read so far.
47 bool expect_equality_; // Is the next trailing sign '=' (as opposed to '&')?
48
49 DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded);
50 };
51
52 // The following class, FormDataParserMultipart, parses forms encoded as
53 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart
54 // encoding) and 822 (MIME-headers).
tkent 2012/08/27 07:09:17 Please do not refer to RFC 822, which was obsolete
vabr (Chromium) 2012/08/29 19:57:07 Done. Thanks for making me aware of this.
55 //
56 // Implementation details
57 //
58 // The original grammar from RFC 2046 is this, "multipart-body" being the root
59 // non-terminal:
60 //
61 // boundary := 0*69<bchars> bcharsnospace
62 // bchars := bcharsnospace / " "
63 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / ","
64 // / "-" / "." / "/" / ":" / "=" / "?"
65 // dash-boundary := "--" boundary
66 // multipart-body := [preamble CRLF]
67 // dash-boundary transport-padding CRLF
68 // body-part *encapsulation
69 // close-delimiter transport-padding
70 // [CRLF epilogue]
71 // transport-padding := *LWSP-char
72 // encapsulation := delimiter transport-padding CRLF body-part
73 // delimiter := CRLF dash-boundary
74 // close-delimiter := delimiter "--"
75 // preamble := discard-text
76 // epilogue := discard-text
77 // discard-text := *(*text CRLF) *text
78 // body-part := MIME-part-headers [CRLF *OCTET]
79 // OCTET := <any 0-255 octet value>
80 //
81 // Here, CRLF, DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters
82 // of the English alphabet, respectively.
83 // The non-terminal "text" is presumably just any text, excluding line breaks.
84 // The non-terminal "LWSP-char" is not directly defined in the original grammar
85 // but it means "linear whitespace", which is a space or a horizontal tab.
86 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, but is in
87 // English defined in RFC 822, and can be presented as follows:
88 //
89 // MIME-part-headers := *MIME-part-header
90 // MIME-part-header := name ':' *(text / whitespace) linebreak
91 // linebreak := '\r' / '\n' / CRLF
92 // whitespace := LWSP-char / CRLF LWSP-char
93 // name := namechar *namechar
94 // namechar := <ASCII char between 33 and 126, excluding ':'>
95 //
96 // This sets of rules together compose a grammar, with the root non-terminal
97 // "multipart-body". This grammer defines a regular language. Indeed, if the
98 // non-terminals are ordered in this way:
99 // namechar < name < CRLF < DIGIT < ALPHA < text < LWSP-char < whitespace <
100 // linebreak < MIME-part-header < MIME-part-headers < bcharsnospace <
101 // bchars < boundary < dash-boundary < delimiter < close-delimiter <
102 // discard-text < transport-padding < OCTET < body-part < encapsulation <
103 // multipart-body
104 // then it is easy to verify that whenever A<B then no grammar rule with head
105 // A contains B in the body. By induction on the above order, each non-terminal
106 // defines a regular language: a non-terminal C is defined by a rule C := exp,
107 // where "exp" is an expression composed from character constants, non-terminals
108 // less than C, and the following closure operations of regular languages:
109 // concatenation, union and Kleene-star. By induction, all the lesser
110 // non-terminals represent regular languages, thus "exp" also represents a
111 // regular language. In particular, the root non-terminal (and thus the grammar)
112 // defines a regular language.
113 //
114 // The FormDataParseMultipart class uses a finite automaton to represent this
115 // language. It is easiest to view it in an extended form, with longer words
116 // allowed to label a single transition to keep the number of states is low.
117 // Importand states have full-word names, unimportant states (allways with only
118 // one incoming label) have names abbreviating the incoming label, possibly
119 // with an index.
120 //
121 // Automaton for "multipart-body":
122 // Initial state = Start
123 // Final states = {End, IgnoreEpilogue}
124 // Implicit state (when a transition is missing) = Error
125 // Transition table ('*' is a label matching everything not matched by other
126 // labels leaving the same state):
127 // FROM LABEL TO
128 // Start dash-boundary DB1
129 // CR CR1
130 // * IgnorePreamble
131 // CR1 LF Start
132 // * IgnorePreamble
133 // IgnorePreamble CR CR1
134 // * IgnorePreamble
135 // DB1 LWSP-char DB1
136 // CR CR2
137 // CR2 LF Part
138 // Part <ASCII 33-126, excluding ':'> Name
139 // CR CR3
140 // Name <ASCII 33-126, excluding ':'> Name
141 // ':' Colon
142 // Colon LF End1
143 // CR End2
144 // * Colon
145 // End1 CR CR3
146 // <ASCII 33-126, excluding ':'> Name
147 // End2 LF End3
148 // CR CR3
149 // <ASCII 33-126, excluding ':'> Name
150 // End3 LWSP-char Colon
151 // CR CR3
152 // <ASCII 33-126, excluding ':'> Name
153 // CR3 LF PreData
154 // PreData dash-boundary DB2
155 // CR CR4
156 // * Data
157 // CR4 LF Data2
158 // * Data
159 // Data CR CR4
160 // * Data
161 // Data2 dash-boundary DB2
162 // * CR4
163 // DB2 LWSP-char DB1
164 // CR CR2
165 // '-' D
166 // D '-' End
167 // End LWSP-char End
168 // CR CR5
169 // CR5 LF IgnoreEpilogue
170 // IgnoreEpilogue * IgnoreEpilogue
171 //
172 // The automaton itself only allows to check that the input is a well-formed
173 // multipart encoding of a form. To also extract the data, additional logic is
174 // added:
175 // * The header "Content-Disposition" (read between Part and PreData) contains
176 // the elements name=... and optionally filename=... The former is the name
177 // of the corresponding field of a form. The latter is only present if that
178 // field was a file-upload, and contains the path to the uploaded file.
179 // * The data of a message part is read between PreData and DB2, excluding the
180 // last CR LF dash-boundary.
181 //
182 // IMPORTANT NOTE
183 // This parser supports multiple sources, i.e., SetSource can be called multiple
184 // times if the input is spread over several byte blocks. However, the split
185 // must not occur in the middle of a transition of the above described automata,
186 // e.g., if there is a transition StateA --dash-boundary--> StateB, then the
187 // whole string with the dash--boundary bust be contained in the first source,
tkent 2012/08/27 07:09:17 bust -> must?
vabr (Chromium) 2012/08/29 19:57:07 Rewritten in the meantime.
188 // or in the other. Also, the split must not occur in the middle of a header,
189 // or a part body data. A message part from one source must be read via
190 // GetNextNameValue before setting up a new source.
191 class FormDataParserMultipart : public FormDataParser {
192 public:
193 explicit FormDataParserMultipart(const std::string& boundary_separator);
194 virtual ~FormDataParserMultipart();
195
196 // Implementation of FormDataParser.
197 virtual bool AllDataReadOK() OVERRIDE;
198 virtual bool GetNextNameValue(Result* result) OVERRIDE;
199 virtual bool SetSource(const base::StringPiece& source) OVERRIDE;
200
201 private:
202 // State and Transition are numbered to make sure they form a continuous block
203 // of numbers for array indexing in lookup tables. If changing State or
204 // Transition, don't forget to update k*Size and the lookup tables.
205 enum State {
206 kStart = 0,
tkent 2012/08/27 07:09:17 See http://www.chromium.org/developers/coding-styl
vabr (Chromium) 2012/08/29 19:57:07 Added STATE_ prefix to states, transitions disappe
207 kCR1 = 1,
208 kIgnorePreamble = 2,
209 kDB1 = 3,
210 kCR2 = 4,
211 kPart = 5,
212 kName = 6,
213 kColonS = 7, // "S" to distinguish it from the transition kColonT.
214 kEnd1 = 8,
215 kEnd2 = 9,
216 kEnd3 = 10,
217 kCR3 = 11,
218 kPreData = 12,
219 kCR4 = 13,
220 kData = 14,
221 kData2 = 15,
222 kDB2 = 16,
223 kD = 17,
224 kEnd = 18,
225 kCR5 = 19,
226 kIgnoreEpilogue = 20,
227 kError = 21
228 };
229 enum Transition {
230 kLF = 0,
231 kCR = 1,
232 kAscii = 2, // A "shorthand" for ASCII 33-126 without ':'.
233 kLwsp = 3,
234 kDashBoundary = 4,
235 kColonT = 5, // "T" to distinguish it from the state kColonS.
236 kDash = 6, // Meaning '-', not "--".
237 kAny = 7 // To represent '*'.
238 };
239 static const size_t kStateSize = 22;
240 static const size_t kTransitionSize = 8;
241
242 // Lookup tables:
243 // Maps transitions with one-character label to that character (else to 0).
244 static char kTransitionToChar[];
245 // Indices of transitions available in state |s| in |kAvailableTransitions|
246 // start at kStateToTransition[s] and the last transition for |s| is always
247 // kAny. The target state corresponding to transition kAvailableTransitions[i]
248 // is kNextState[i].
249 static Transition kAvailableTransitions[];
250 static State kNextState[];
251 static size_t kStateToTransition[];
252
253 // Reads the source until the next name-value pair is read. Returns true if
254 // |next_name_| and |next_value_| were successfully updated.
255 bool ReadNextNameValue();
256 // One step of the automaton, based on |state_| and the input from |source_|
257 // to be read. Updates the |offset_| iterator. Returns true on success.
258 bool DoStep();
259 // Tests whether the input pointed to by |offset_| allows to read transition
260 // |t|. It returns the number of bytes to be read, or 0 if |t| cannot be read.
261 size_t LookUp(Transition t);
262
263 // Extracts "name" and possibly "value" from a Content-Disposition header.
264 // Writes directly into |next_name_| and |next_value_|. Returns true on
265 // success and false otherwise.
266 bool ParseHeader(const base::StringPiece& header);
267
268 bool InFinalState() {
269 return state_ == kEnd || state_ == kIgnoreEpilogue;
270 }
271
272 // The parsed message can be split into multiple sources which we read
273 // sequentially.
274 base::StringPiece source_;
275 const char* source_end_;
276 const char* offset_;
tkent 2012/08/27 07:09:17 The name "offset_" is confusing. It's not an offs
vabr (Chromium) 2012/08/29 19:57:07 You're right. This disappeared after rewriting.
277 // The dash-boundary string is used for all sources.
278 const std::string dash_boundary_;
279 State state_;
280 // The next result to be returned by GetNextNameValue. It is stored as a pair
281 // of StringPieces instead of a Result, to avoid one copy of the data (note
282 // that Result stores a copy of the data in std::string, whereas StringPiece
283 // is just a pointer to the data in |source_|).
284 base::StringPiece next_name_;
285 base::StringPiece next_value_;
286 bool value_name_present_;
287
288 DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart);
289 };
290
291 // Implementation of FormDataParser and FormDataParser::Result .
292
293 FormDataParser::Result::Result() {}
294 FormDataParser::Result::~Result() {}
295
296 void FormDataParser::Result::Reset() {
297 name_.erase();
298 value_.erase();
299 }
300
301 FormDataParser::~FormDataParser() {}
302
303 // static
304 scoped_ptr<FormDataParser> FormDataParser::Create(
305 const net::URLRequest* request) {
306 std::string value;
307 const bool found = request->extra_request_headers().GetHeader(
308 net::HttpRequestHeaders::kContentType, &value);
309 return Create(found ? &value : NULL);
310 }
311
312 // static
313 scoped_ptr<FormDataParser> FormDataParser::Create(
314 const std::string* content_type_header) {
315 enum ParserChoice {kUrlEncoded, kMultipart, kError};
316 ParserChoice choice = kError;
317 std::string boundary;
318
319 if (content_type_header == NULL) {
320 choice = kUrlEncoded;
321 } else {
322 const std::string content_type(
323 content_type_header->substr(0, content_type_header->find(';')));
324
325 if (base::strcasecmp(
326 content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
327 choice = kUrlEncoded;
328 } else if (base::strcasecmp(
329 content_type.c_str(), "multipart/form-data") == 0) {
330 static const char kBoundaryString[] = "boundary=";
331 size_t offset = content_type_header->find(kBoundaryString);
332 if (offset == std::string::npos) {
333 // Malformed header.
334 return scoped_ptr<FormDataParser>();
335 }
336 offset += strlen(kBoundaryString);
337 boundary = content_type_header->substr(
338 offset, content_type_header->find(';', offset));
339 if (!boundary.empty())
340 choice = kMultipart;
341 }
342 }
343 // Other cases are unparseable, including when |content_type| is "text/plain".
344
345 switch (choice) {
346 case kUrlEncoded:
347 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
348 case kMultipart:
349 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
350 default: // In other words, case kError:
351 return scoped_ptr<FormDataParser>();
352 }
353 }
354
355 FormDataParser::FormDataParser() {}
356
357 // Implementation of FormDataParserUrlEncoded.
358
359 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
360 : source_end_(NULL),
361 aborted_(false),
362 offset_(NULL),
363 equality_signs_(0),
364 amp_signs_(0),
365 expect_equality_(true) {
366 }
367
368 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
369
370 bool FormDataParserUrlEncoded::AllDataReadOK() {
371 return source_.data() != NULL &&
372 !aborted_ &&
373 offset_ == source_end_ &&
374 equality_signs_ == amp_signs_ + 1;
tkent 2012/08/27 07:09:17 Why do we need to check the number of = and & ? eq
vabr (Chromium) 2012/08/29 19:57:07 Now the parser uses a regexp which eliminates such
375 }
376
377 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
378 result->Reset();
379 if (source_.data() == NULL || aborted_)
380 return false;
381 if (offset_ == source_end_)
382 return false;
383 const char* const name_start = &(*offset_);
384 char c;
385 bool last_read_success = GetNextChar(&c);
386 while (last_read_success && c != '=')
387 last_read_success = GetNextChar(&c);
388 if (!last_read_success) { // This means the data is malformed.
389 Abort();
390 return false;
391 }
392 const char* const name_end = offset_ - 1;
393 const std::string encoded_name(name_start, name_end - name_start);
394 const net::UnescapeRule::Type unescape_rules =
395 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
396 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
397 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));
398
399 const char* const value_start = offset_;
400 last_read_success = GetNextChar(&c);
401 while (last_read_success && c != '&')
402 last_read_success = GetNextChar(&c);
403 const char* const value_end =
404 last_read_success ? offset_ - 1 : offset_;
405 const std::string encoded_value(value_start, value_end - value_start);
406 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));
407 return true;
408 }
409
410 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {
411 if (source_ != NULL || source.data() == NULL || aborted_)
412 return false;
413 source_ = source;
414 source_end_ = source_.data() + source_.size();
415 offset_ = source_.data();
416 return true;
417 }
418
419 bool FormDataParserUrlEncoded::GetNextChar(char* c) {
420 if (offset_ == source_end_ || aborted_)
421 return false;
422 *c = *offset_;
423 ++offset_;
424
425 if (*c == '=') {
426 if (expect_equality_) {
427 ++equality_signs_;
428 expect_equality_ = false;
429 } else {
430 Abort();
431 return false;
432 }
433 }
434 if (*c == '&' && offset_ != source_end_) {
435 if (!expect_equality_) {
436 ++amp_signs_;
437 expect_equality_ = true;
438 } else {
439 Abort();
440 return false;
441 }
442 }
443
444 return true;
445 }
446
447 void FormDataParserUrlEncoded::Abort() {
448 aborted_ = true;
449 }
450
451 // Implementation of FormDataParserMultipart.
452
453 FormDataParserMultipart::FormDataParserMultipart(
454 const std::string& boundary_separator)
455 : source_end_(NULL),
456 offset_(NULL),
457 dash_boundary_("--" + boundary_separator),
458 state_(kStart),
459 value_name_present_(false) {
460 }
461
462 FormDataParserMultipart::~FormDataParserMultipart() {}
463
464 bool FormDataParserMultipart::AllDataReadOK() {
465 return source_.data() != NULL && InFinalState();
466 }
467
468 bool FormDataParserMultipart::GetNextNameValue(Result* result) {
469 if (!value_name_present_ || state_ == kError)
470 return false;
471 result->set_name(next_name_);
472 result->set_value(next_value_);
473 next_name_.clear();
474 next_value_.clear();
475 value_name_present_ = ReadNextNameValue();
476 return true;
477 }
478
479 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {
480 if (state_ == kError ||
481 source.data() == NULL ||
482 // Message part across a source split is also an error.
483 next_name_.data() != NULL || next_value_.data() != NULL)
484 return false;
485 if (source_.data() != NULL && offset_ != source_end_){
486 // Try to seek until the end. If no name-value pair is found, this is OK.
487 value_name_present_ = ReadNextNameValue();
488 if (!value_name_present_ || offset_ != source_end_)
489 return false;
490 }
491 source_ = source;
492 source_end_ = source_.data() + source_.size();
493 offset_ = source_.data();
494 value_name_present_ = ReadNextNameValue();
495 return true;
496 }
497
498 // static
499 char FormDataParserMultipart::kTransitionToChar[] = {
500 '\n', // For kLF.
501 '\r', // For kCR.
502 0, // For kAscii.
503 0, // For kLwsp.
504 0, // For kDashBoundary.
505 ':', // For kColonT.
506 '-', // For kDash.
507 0, // For kAny.
508 };
509
510 // static
511 FormDataParserMultipart::Transition
512 FormDataParserMultipart::kAvailableTransitions[] = {
513 kDashBoundary, kCR, kAny, // For kStart.
514 kLF, kAny, // For kCR1.
515 kCR, kAny, // For kIgnorePreamble.
516 kLwsp, kCR, kAny, // For kDB1.
517 kLF, kAny, // For kCR2.
518 kAscii, kCR, kAny, // For kPart.
519 kAscii, kColonT, kAny, // For kName.
520 kLF, kCR, kAny, // For kColonS.
521 kCR, kAscii, kAny, // For kEnd1.
522 kLF, kCR, kAscii, kAny, // For kEnd2.
523 kLwsp, kCR, kAscii, kAny, // For kEnd3.
524 kLF, kAny, // For kCR3.
525 kDashBoundary, kCR, kAny, // For kPreData.
526 kLF, kAny, // For kCR4.
527 kCR, kAny, // For kData.
528 kDashBoundary, kAny, // For kData2.
529 kLwsp, kCR, kDash, kAny, // For kDB2.
530 kDash, kAny, // For kD.
531 kLwsp, kCR, kAny, // For kEnd.
532 kLF, kAny, // For kCR5.
533 kAny, // For kIgnoreEpilogue.
534 kAny // For kError.
535 };
536
537 // static
538 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {
tkent 2012/08/27 07:09:17 kAvailableTransitions and kNextState should be mer
vabr (Chromium) 2012/08/29 19:57:07 Disappeared after rewriting.
539 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.
540 kStart, kIgnorePreamble, // For kCR1; 05.
541 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.
542 kDB1, kCR2, kError, // For kDB1; 10.
543 kPart, kError, // For kCR2; 12.
544 kName, kCR3, kError, // For kPart; 15.
545 kName, kColonS, kError, // For kName; 18.
546 kEnd1, kEnd2, kColonS, // For kColonS; 21.
547 kCR3, kName, kError, // For kEnd1; 24.
548 kEnd3, kCR3, kName, kError, // For kEnd2; 28.
549 kColonS, kCR3, kName, kError, // For kEnd3; 32.
550 kPreData, kError, // For kCR3; 34.
551 kDB2, kCR3, kData, // For kPreData; 37.
552 kData2, kData, // For kCR4; 39.
553 kCR4, kData, // For kData; 41.
554 kDB2, kCR4, // For kData2; 43.
555 kDB1, kCR2, kD, kError, // For kDB2; 47.
556 kEnd, kError, // For kD; 49.
557 kEnd, kCR5, kError, // For kEnd; 52.
558 kIgnoreEpilogue, kError, // For kCR5; 54.
559 kIgnoreEpilogue, // For kIgnoreEpilogue; 55.
560 kError // For kError; 56.
561 };
562
563 // static
564 size_t FormDataParserMultipart::kStateToTransition[] = {
565 0u, // For kStart
566 3u, // For kCR1
567 5u, // For kIgnorePreamble
568 7u, // For kDB1
569 10u, // For kCR2
570 12u, // For kPart
571 15u, // For kName
572 18u, // For kColonS
573 21u, // For kEnd1
574 24u, // For kEnd2
575 28u, // For kEnd3
576 32u, // For kCR3
577 34u, // For kPreData
578 37u, // For kCR4
579 39u, // For kData
580 41u, // For kData2
581 43u, // For kDB2
582 47u, // For kD
583 49u, // For kEnd
584 52u, // For kCR5
585 54u, // For kIgnoreEpilogue
586 55u, // For kError
587 };
588
589 bool FormDataParserMultipart::ReadNextNameValue() {
590 if (state_ == kError || source_.data() == NULL ||
591 next_name_.data() != NULL || next_value_.data() != NULL)
592 return false;
593
594 // Seek to the next part's headers.
595 while (state_ != kPart) {
596 if (!DoStep())
597 return false;
598 }
tkent 2012/08/27 07:09:17 We had better have a function DoStepsUntil(State).
vabr (Chromium) 2012/08/29 19:57:07 Disappeared after the rewrite.
599 while (state_ != kPreData) {
600 const char* header = offset_;
601 while (state_ != kColonS) {
602 if (!DoStep())
603 return false;
604 }
605 size_t header_length = 0u;
606 while (state_ != kPreData && state_ != kName) {
607 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) {
608 // The cast is safe, we know that offset only moves forward.
609 header_length = static_cast<size_t>(offset_ - header);
610 }
611 if (!DoStep())
612 return false;
613 }
614 if (ParseHeader(base::StringPiece(header, header_length))) {
615 // Found what we were looking for, just skip to the part's body.
616 while (state_ != kPreData) {
617 if (!DoStep())
618 return false;
619 }
620 }
621 }
622
623 const char* body = offset_;
624 size_t body_length = 0;
625 while (state_ != kDB2 && offset_ != source_end_) {
626 if (!DoStep())
627 return false;
628 if (state_ == kCR4) {
629 // We are in the middle of which might be the CRLF starting the part
630 // separator (see the "delimiter" non-terminal from the grammar given
631 // in the header file). The cast is safe, we know that offset only moves
632 // forward and body was assigned at least 1 transition ago.
633 body_length = static_cast<size_t>(offset_ - body - 1);
634 }
635 }
636 if (body_length > 0)
637 next_value_.set(body, body_length);
638 return true;
639 }
640
641 bool FormDataParserMultipart::DoStep() {
642 if (state_ == kError || offset_ == source_end_)
643 return false;
644 size_t transition_index = kStateToTransition[state_];
645 Transition t = kAvailableTransitions[transition_index];
646 while (t != kAny) {
647 const State s = kNextState[transition_index];
648 const size_t length = LookUp(t);
649 if (length > 0) {
650 offset_ += length;
651 state_ = s;
652 return true;
653 }
654 t = kAvailableTransitions[++transition_index];
655 }
656 // We have kAny, the default choice. Seek by one and switch the state.
657 ++offset_;
658 state_ = kNextState[transition_index];
659 return true;
660 }
661
662 // Contract -- the following must be true: offset_ != source_end_ .
663 // The idea is to check this only once in the caller (DoStep()), and do not
664 // repeat it here every time, as this can be called many times from one call
665 // to DoStep().
666 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {
667 const char ahead = *offset_;
668 const char first_char = kTransitionToChar[t];
669
670 // Easy case: labels corresponding to a single char.
671 if (first_char != 0)
672 return ahead == first_char ? 1u : 0u;
673
674 // Harder cases.
675 switch (t) {
676 // Multiple alternatives, 1-char long: return immediately.
677 case kAscii:
678 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;
679 case kLwsp:
680 return (ahead == ' ' || ahead == '\t') ? 1u : 0u;
681
682 // Longer than 1 char: prepare work for later.
683 case kDashBoundary: {
684 const size_t length = dash_boundary_.size();
685 // The cast below is safe, we know that the difference is not negative.
686 if (static_cast<size_t>(source_end_ - offset_) < length ||
687 memcmp(dash_boundary_.c_str(), offset_, length) != 0)
688 return 0u;
689 return length;
690 }
691 case kAny:
692 // We are not supposed to be asked for kAny, but this is the right answer:
693 return 1u;
694 default: // We never get here -- the rest has already been handled above.
695 NOTREACHED();
696 return 0u;
697 }
698 }
699
700 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {
701 static const char kContentDisposition[] = "Content-Disposition:";
702 if (memcmp(header.data(), kContentDisposition,
703 strlen(kContentDisposition) != 0))
tkent 2012/08/27 07:09:17 strlen is not needed. The length of kContentDispo
vabr (Chromium) 2012/08/29 19:57:07 Correct. Although this particular instance and tho
704 return false;
705 static const char kNameEquals[] = " name=\"";
706 static const char kFilenameEquals[] = " filename=\"";
707
708 // Mandatory part: find the name and set it as |next_name_|.
709 StringPiece::size_type field_offset = header.find(kNameEquals);
710 if (field_offset == StringPiece::npos)
711 return false;
712 field_offset += strlen(kNameEquals);
tkent 2012/08/27 07:09:17 ditto.
713 StringPiece::size_type field_end = header.find('"', field_offset);
714 if (field_end == StringPiece::npos)
715 return false;
716 next_name_.set(header.data() + field_offset, field_end - field_offset);
tkent 2012/08/27 07:09:17 Need to decode the name value. BTW, what's the ex
vabr (Chromium) 2012/08/29 19:57:07 Thanks very much for bringing this up! On 2012/08
717
718 // Optional part: find the filename and set it as |next_value_|.
719 field_offset = header.find(kFilenameEquals);
720 if (field_offset == StringPiece::npos)
721 return true; // This was only optional
722 field_offset += strlen(kFilenameEquals);
tkent 2012/08/27 07:09:17 ditto.
723 field_end = header.find('"', field_offset);
724 if (field_end == StringPiece::npos)
725 return false; // This is a malformed header.
726 next_value_.set(header.data() + field_offset, field_end - field_offset);
727 return true;
728 }
729
730 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698