Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Corrected the multipart parser + parsedForm->formData Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"
6
7 #include "base/string_util.h"
8 #include "base/values.h"
9 #include "net/base/escape.h"
10 #include "net/url_request/url_request.h"
11
12 using base::DictionaryValue;
13 using base::ListValue;
14 using base::StringPiece;
15
16 namespace extensions {
17
18 // Implementation of FormDataParser and FormDataParser::Result .
19
20 FormDataParser::Result::Result() {}
21 FormDataParser::Result::~Result() {}
22
23 void FormDataParser::Result::Reset() {
24 name_.erase();
25 value_.erase();
26 }
27
28 FormDataParser::~FormDataParser() {}
29
30 // static
31 scoped_ptr<FormDataParser> FormDataParser::Create(
32 const net::URLRequest* request) {
33 std::string value;
34 const bool found = request->extra_request_headers().GetHeader(
35 net::HttpRequestHeaders::kContentType, &value);
36 return Create(found ? &value : NULL);
37 }
38
39 // static
40 scoped_ptr<FormDataParser> FormDataParser::Create(
41 const std::string* content_type_header) {
42 enum ParserChoice {kUrlEncoded, kMultipart, kError};
43 ParserChoice choice = kError;
44 std::string boundary;
45
46 if (content_type_header == NULL) {
47 choice = kUrlEncoded;
48 } else {
49 const std::string content_type(
50 content_type_header->substr(0, content_type_header->find(';')));
51
52 if (base::strcasecmp(
53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
54 choice = kUrlEncoded;
55 } else if (base::strcasecmp(
56 content_type.c_str(), "multipart/form-data") == 0) {
57 static const char kBoundaryString[] = "boundary=";
58 size_t offset = content_type_header->find(kBoundaryString);
59 if (offset == std::string::npos) {
60 // Malformed header.
61 return scoped_ptr<FormDataParser>();
62 }
63 offset += strlen(kBoundaryString);
64 boundary = content_type_header->substr(
65 offset, content_type_header->find(';', offset));
66 if (!boundary.empty())
67 choice = kMultipart;
68 }
69 }
70 // Other cases are unparseable, including when |content_type| is "text/plain".
71
72 switch (choice) {
73 case kUrlEncoded:
74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
75 case kMultipart:
76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
77 default: // In other words, case kError:
78 return scoped_ptr<FormDataParser>();
79 }
80 }
81
82 FormDataParser::FormDataParser() {}
83
84 // Implementation of FormDataParserUrlEncoded.
85
86 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
87 : source_(NULL),
88 aborted_(false),
89 equality_signs_(0),
90 amp_signs_(0),
91 expect_equality_(true) {
92 }
93
94 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
95
96 bool FormDataParserUrlEncoded::AllDataReadOK() {
97 return source_ != NULL &&
98 !aborted_ &&
99 offset_ == source_->end() &&
100 equality_signs_ == amp_signs_ + 1;
101 }
102
103 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
104 result->Reset();
105 if (source_ == NULL || aborted_)
106 return false;
107 if (offset_ == source_->end())
108 return false;
109 const char* const name_start = &(*offset_);
110 char c;
111 bool last_read_success = GetNextChar(&c);
112 while (last_read_success && c != '=')
113 last_read_success = GetNextChar(&c);
114 if (!last_read_success) { // This means the data is malformed.
115 Abort();
116 return false;
117 }
118 const char* const name_end = &(*(offset_ - 1));
119 const std::string encoded_name(name_start, name_end - name_start);
120 const net::UnescapeRule::Type unescape_rules =
121 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
122 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
123 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));
124
125 const char* const value_start = &(*offset_);
126 last_read_success = GetNextChar(&c);
127 while (last_read_success && c != '&')
128 last_read_success = GetNextChar(&c);
129 const char* const value_end =
130 last_read_success ? &(*(offset_ - 1)) : &(*offset_);
131 const std::string encoded_value(value_start, value_end - value_start);
132 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));
133 return true;
134 }
135
136 bool FormDataParserUrlEncoded::SetSource(const std::vector<char>* source) {
137 if (source_ != NULL || source == NULL || aborted_)
138 return false;
139 source_ = source;
140 offset_ = source_->begin();
141 return true;
142 }
143
144 bool FormDataParserUrlEncoded::GetNextChar(char* c) {
145 if (offset_ == source_->end() || aborted_)
146 return false;
147 *c = *offset_;
148 ++offset_;
149
150 if (*c == '=') {
151 if (expect_equality_) {
152 ++equality_signs_;
153 expect_equality_ = false;
154 } else {
155 Abort();
156 return false;
157 }
158 }
159 if (*c == '&' && offset_ != source_->end()) {
160 if (!expect_equality_) {
161 ++amp_signs_;
162 expect_equality_ = true;
163 } else {
164 Abort();
165 return false;
166 }
167 }
168
169 return true;
170 }
171
172 void FormDataParserUrlEncoded::Abort() {
173 aborted_ = true;
174 }
175
176 // Implementation of FormDataParserMultipart.
177
178 FormDataParserMultipart::FormDataParserMultipart(
179 const std::string& boundary_separator)
180 : source_(NULL),
181 dash_boundary_("--" + boundary_separator),
182 state_(kStart),
183 value_name_present_(false) {
184 }
185
186 FormDataParserMultipart::~FormDataParserMultipart() {}
187
188 bool FormDataParserMultipart::AllDataReadOK() {
189 return source_ != NULL && InFinalState();
190 }
191
192 bool FormDataParserMultipart::GetNextNameValue(Result* result) {
193 if (!value_name_present_ || state_ == kError)
194 return false;
195 result->set_name(next_name_);
196 result->set_value(next_value_);
197 next_name_.clear();
198 next_value_.clear();
199 value_name_present_ = ReadNextNameValue();
200 return true;
201 }
202
203 bool FormDataParserMultipart::SetSource(const std::vector<char>* source) {
204 if (state_ == kError ||
205 source == NULL ||
206 // Message part across a source split is also an error.
207 next_name_.data() != NULL || next_value_.data() != NULL)
208 return false;
209 if (source_ != NULL && offset_ != source_->end()){
210 // Try to seek until the end. If no name-value pair is found, this is OK.
211 value_name_present_ = ReadNextNameValue();
212 if (!value_name_present_ || offset_ != source_->end())
213 return false;
214 }
215 source_ = source;
216 offset_ = source_->begin();
217 value_name_present_ = ReadNextNameValue();
218 return true;
219 }
220
221 // static
222 char FormDataParserMultipart::kTransitionToChar[] = {
223 '\n', // For kLF.
224 '\r', // For kCR.
225 0, // For kAscii.
226 0, // For kLwsp.
227 0, // For kDashBoundary.
228 ':', // For kColonT.
229 '-', // For kDash.
230 0, // For kAny.
231 };
232
233 // static
234 FormDataParserMultipart::Transition
235 FormDataParserMultipart::kAvailableTransitions[] = {
236 kDashBoundary, kCR, kAny, // For kStart.
237 kLF, kAny, // For kCR1.
238 kCR, kAny, // For kIgnorePreamble.
239 kLwsp, kCR, kAny, // For kDB1.
240 kLF, kAny, // For kCR2.
241 kAscii, kCR, kAny, // For kPart.
242 kAscii, kColonT, kAny, // For kName.
243 kLF, kCR, kAny, // For kColonS.
244 kCR, kAscii, kAny, // For kEnd1.
245 kLF, kCR, kAscii, kAny, // For kEnd2.
246 kLwsp, kCR, kAscii, kAny, // For kEnd3.
247 kLF, kAny, // For kCR3.
248 kDashBoundary, kCR, kAny, // For kPreData.
249 kLF, kAny, // For kCR4.
250 kCR, kAny, // For kData.
251 kDashBoundary, kAny, // For kData2.
252 kLwsp, kCR, kDash, kAny, // For kDB2.
253 kDash, kAny, // For kD.
254 kLwsp, kCR, kAny, // For kEnd.
255 kLF, kAny, // For kCR5.
256 kAny, // For kIgnoreEpilogue.
257 kAny // For kError.
258 };
259
260 // static
261 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {
262 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.
263 kStart, kIgnorePreamble, // For kCR1; 05.
264 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.
265 kDB1, kCR2, kError, // For kDB1; 10.
266 kPart, kError, // For kCR2; 12.
267 kName, kCR3, kError, // For kPart; 15.
268 kName, kColonS, kError, // For kName; 18.
269 kEnd1, kEnd2, kColonS, // For kColonS; 21.
270 kCR3, kName, kError, // For kEnd1; 24.
271 kEnd3, kCR3, kName, kError, // For kEnd2; 28.
272 kColonS, kCR3, kName, kError, // For kEnd3; 32.
273 kPreData, kError, // For kCR3; 34.
274 kDB2, kCR3, kData, // For kPreData; 37.
275 kData2, kData, // For kCR4; 39.
276 kCR4, kData, // For kData; 41.
277 kDB2, kCR4, // For kData2; 43.
278 kDB1, kCR2, kD, kError, // For kDB2; 47.
279 kEnd, kError, // For kD; 49.
280 kEnd, kCR5, kError, // For kEnd; 52.
281 kIgnoreEpilogue, kError, // For kCR5; 54.
282 kIgnoreEpilogue, // For kIgnoreEpilogue; 55.
283 kError // For kError; 56.
284 };
285
286 // static
287 size_t FormDataParserMultipart::kStateToTransition[] = {
288 0u, // For kStart
289 3u, // For kCR1
290 5u, // For kIgnorePreamble
291 7u, // For kDB1
292 10u, // For kCR2
293 12u, // For kPart
294 15u, // For kName
295 18u, // For kColonS
296 21u, // For kEnd1
297 24u, // For kEnd2
298 28u, // For kEnd3
299 32u, // For kCR3
300 34u, // For kPreData
301 37u, // For kCR4
302 39u, // For kData
303 41u, // For kData2
304 43u, // For kDB2
305 47u, // For kD
306 49u, // For kEnd
307 52u, // For kCR5
308 54u, // For kIgnoreEpilogue
309 55u, // For kError
310 };
311
312 bool FormDataParserMultipart::ReadNextNameValue() {
313 if (state_ == kError || source_ == NULL ||
314 next_name_.data() != NULL || next_value_.data() != NULL)
315 return false;
316
317 // Seek to the next part's headers.
318 while (state_ != kPart)
battre 2012/08/16 19:18:03 nit: {}
vabr (Chromium) 2012/08/17 18:29:57 Done.
319 if (!DoStep())
320 return false;
321 while (state_ != kPreData) {
322 const char* header = &(*offset_);
323 while (state_ != kColonS)
battre 2012/08/16 19:18:03 nit: {}
vabr (Chromium) 2012/08/17 18:29:57 Done.
324 if (!DoStep())
325 return false;
326 size_t header_length = 0u;
327 while (state_ != kPreData && state_ != kName) {
328 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3)
battre 2012/08/16 19:18:03 nit: {}
vabr (Chromium) 2012/08/17 18:29:57 Done.
329 // The cast is safe, we know that offset only moves forward.
330 header_length = static_cast<size_t>(&(*offset_) - header);
331 if (!DoStep())
332 return false;
333 }
334 if (ParseHeader(base::StringPiece(header, header_length))) {
335 // Found what we were looking for, just skip to the part's body.
336 while (state_ != kPreData)
battre 2012/08/16 19:18:03 nit: {}
vabr (Chromium) 2012/08/17 18:29:57 Done.
337 if (!DoStep())
338 return false;
339 }
340 }
341
342 const char* body = &(*offset_);
343 size_t body_length = 0;
344 while (state_ != kDB2 && offset_ != source_->end()) {
345 if (!DoStep())
346 return false;
347 if (state_ == kCR4)
battre 2012/08/16 19:18:03 nit: {}
vabr (Chromium) 2012/08/17 18:29:57 Done.
348 // We are in the middle of which might be the CRLF starting the part
349 // separator (see the "delimiter" non-terminal from the grammar given
350 // in the header file). The cast is safe, we know that offset only moves
351 // forward and body was assigned at least 1 transition ago.
352 body_length = static_cast<size_t>(&(*offset_) - body - 1);
353 }
354 if (body_length > 0)
355 next_value_.set(body, body_length);
356 return true;
357 }
358
359 bool FormDataParserMultipart::DoStep() {
360 if (state_ == kError || source_ == NULL || offset_ == source_->end())
361 return false;
362 size_t transition_index = kStateToTransition[state_];
363 Transition t = kAvailableTransitions[transition_index];
364 while (t != kAny) {
365 const State s = kNextState[transition_index];
366 const size_t length = LookUp(t);
367 if (length > 0) {
368 offset_ += length;
369 state_ = s;
370 return true;
371 }
372 t = kAvailableTransitions[++transition_index];
373 }
374 // We have kAny, the default choice. Seek by one and switch the state.
375 ++offset_;
376 state_ = kNextState[transition_index];
377 return true;
378 }
379
380 // Contract -- the following must be true:
381 // source_ != NULL && offset_ != source.end()
382 // The idea is to check this only once in the caller (DoStep()), and do not
383 // repeat it here every time, as this can be called many times from one call
384 // to DoStep().
385 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {
386 const char ahead = *offset_;
387 const char first_char = kTransitionToChar[t];
388
389 // Easy case: labels corresponding to a single char.
390 if (first_char != 0)
391 return ahead == first_char ? 1u : 0u;
392
393 // Harder cases.
394 switch (t) {
395 // Multiple alternatives, 1-char long: return immediately.
396 case kAscii:
397 return ahead >= 33 && ahead <= 126 && ahead != ':' ? 1u : 0u;
398 case kLwsp:
399 return ahead == ' ' || ahead == '\t' ? 1u : 0u;
battre 2012/08/16 19:18:03 nit: () around condition, also above
vabr (Chromium) 2012/08/17 18:29:57 Done.
400
401 // Longer than 1 char: prepare work for later.
402 case kDashBoundary: {
403 const size_t length = dash_boundary_.size();
404 // The cast below is safe, we know that the difference is not negative.
405 if (static_cast<size_t>(source_->end() - offset_) < length ||
406 memcmp(dash_boundary_.c_str(), &(*offset_), length) != 0)
vabr (Chromium) 2012/08/16 08:00:59 Doing this each time is unnecessary expensive. It
407 return 0u;
408 return length;
409 }
410 case kAny:
411 // We are not supposed to be asked for kAny, but this is the right answer:
412 return 1u;
413 default: // We never get here -- the rest has already been handled above.
414 NOTREACHED();
415 return 0u;
416 }
417 }
418
419 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {
420 static const char kContentDisposition[] = "Content-Disposition:";
421 if (memcmp(
422 header.data(), kContentDisposition, strlen(kContentDisposition) != 0))
battre 2012/08/16 19:18:03 I would move header.data(), kContentDisposition, t
vabr (Chromium) 2012/08/17 18:29:57 Done.
423 return false;
424 static const char kNameEquals[] = " name=\"";
425 static const char kFilenameEquals[] = " filename=\"";
426
427 // Mandatory part: find the name and set it as |next_name_|.
428 StringPiece::size_type field_offset = header.find(kNameEquals);
429 if (field_offset == StringPiece::npos)
430 return false;
431 field_offset += strlen(kNameEquals);
432 StringPiece::size_type field_end = header.find('"', field_offset);
433 if (field_end == StringPiece::npos)
434 return false;
435 next_name_.set(header.data() + field_offset, field_end - field_offset);
436
437 // Optional part: find the filename and set it as |next_value_|.
438 field_offset = header.find(kFilenameEquals);
439 if (field_offset == StringPiece::npos)
440 return true; // This was only optional
441 field_offset += strlen(kFilenameEquals);
442 field_end = header.find('"', field_offset);
443 if (field_end == StringPiece::npos)
444 return false; // This is a malformed header.
445 next_value_.set(header.data() + field_offset, field_end - field_offset);
446 return true;
447 }
448
449 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698