Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(415)

Side by Side Diff: chrome/browser/extensions/api/web_request/form_data_parser.cc

Issue 10694055: Add read-only access to POST data for webRequest's onBeforeRequest (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Dominic's comments + adjusting to the recent move of UploadElement out of UploadData. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"
6
7 #include "base/string_util.h"
8 #include "base/values.h"
9 #include "net/base/escape.h"
10 #include "net/url_request/url_request.h"
11
12 using base::DictionaryValue;
13 using base::ListValue;
14 using base::StringPiece;
15
16 namespace extensions {
17
18 // Implementation of FormDataParser and FormDataParser::Result .
19
20 FormDataParser::Result::Result() {}
21 FormDataParser::Result::~Result() {}
22
23 void FormDataParser::Result::Reset() {
24 name_.erase();
25 value_.erase();
26 }
27
28 FormDataParser::~FormDataParser() {}
29
30 // static
31 scoped_ptr<FormDataParser> FormDataParser::Create(
32 const net::URLRequest* request) {
33 std::string value;
34 const bool found = request->extra_request_headers().GetHeader(
35 net::HttpRequestHeaders::kContentType, &value);
36 return Create(found ? &value : NULL);
37 }
38
39 // static
40 scoped_ptr<FormDataParser> FormDataParser::Create(
41 const std::string* content_type_header) {
42 enum ParserChoice {kUrlEncoded, kMultipart, kError};
43 ParserChoice choice = kError;
44 std::string boundary;
45
46 if (content_type_header == NULL) {
47 choice = kUrlEncoded;
48 } else {
49 const std::string content_type(
50 content_type_header->substr(0, content_type_header->find(';')));
51
52 if (base::strcasecmp(
53 content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
54 choice = kUrlEncoded;
55 } else if (base::strcasecmp(
56 content_type.c_str(), "multipart/form-data") == 0) {
57 static const char kBoundaryString[] = "boundary=";
58 size_t offset = content_type_header->find(kBoundaryString);
59 if (offset == std::string::npos) {
60 // Malformed header.
61 return scoped_ptr<FormDataParser>();
62 }
63 offset += strlen(kBoundaryString);
64 boundary = content_type_header->substr(
65 offset, content_type_header->find(';', offset));
66 if (!boundary.empty())
67 choice = kMultipart;
68 }
69 }
70 // Other cases are unparseable, including when |content_type| is "text/plain".
tkent 2012/08/24 14:26:50 Why text/plain is not supported?
vabr (Chromium) 2012/08/24 16:16:59 This encoding is ambiguous. Nice description from
71
72 switch (choice) {
73 case kUrlEncoded:
74 return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
75 case kMultipart:
76 return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
77 default: // In other words, case kError:
78 return scoped_ptr<FormDataParser>();
79 }
80 }
81
82 FormDataParser::FormDataParser() {}
83
84 // Implementation of FormDataParserUrlEncoded.
85
86 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
87 : source_end_(NULL),
88 aborted_(false),
89 offset_(NULL),
90 equality_signs_(0),
91 amp_signs_(0),
92 expect_equality_(true) {
93 }
94
95 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
96
97 bool FormDataParserUrlEncoded::AllDataReadOK() {
98 return source_.data() != NULL &&
99 !aborted_ &&
100 offset_ == source_end_ &&
101 equality_signs_ == amp_signs_ + 1;
102 }
103
104 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
105 result->Reset();
106 if (source_.data() == NULL || aborted_)
107 return false;
108 if (offset_ == source_end_)
109 return false;
110 const char* const name_start = &(*offset_);
111 char c;
112 bool last_read_success = GetNextChar(&c);
113 while (last_read_success && c != '=')
114 last_read_success = GetNextChar(&c);
115 if (!last_read_success) { // This means the data is malformed.
116 Abort();
117 return false;
118 }
119 const char* const name_end = offset_ - 1;
120 const std::string encoded_name(name_start, name_end - name_start);
121 const net::UnescapeRule::Type unescape_rules =
122 net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
123 net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
124 result->set_name(net::UnescapeURLComponent(encoded_name, unescape_rules));
125
126 const char* const value_start = offset_;
127 last_read_success = GetNextChar(&c);
128 while (last_read_success && c != '&')
129 last_read_success = GetNextChar(&c);
130 const char* const value_end =
131 last_read_success ? offset_ - 1 : offset_;
132 const std::string encoded_value(value_start, value_end - value_start);
133 result->set_value(net::UnescapeURLComponent(encoded_value, unescape_rules));
134 return true;
135 }
136
137 bool FormDataParserUrlEncoded::SetSource(const base::StringPiece& source) {
138 if (source_ != NULL || source.data() == NULL || aborted_)
139 return false;
140 source_ = source;
141 source_end_ = source_.data() + source_.size();
142 offset_ = source_.data();
143 return true;
144 }
145
146 bool FormDataParserUrlEncoded::GetNextChar(char* c) {
147 if (offset_ == source_end_ || aborted_)
148 return false;
149 *c = *offset_;
150 ++offset_;
151
152 if (*c == '=') {
153 if (expect_equality_) {
154 ++equality_signs_;
155 expect_equality_ = false;
156 } else {
157 Abort();
158 return false;
159 }
160 }
161 if (*c == '&' && offset_ != source_end_) {
162 if (!expect_equality_) {
163 ++amp_signs_;
164 expect_equality_ = true;
165 } else {
166 Abort();
167 return false;
168 }
169 }
170
171 return true;
172 }
173
174 void FormDataParserUrlEncoded::Abort() {
175 aborted_ = true;
176 }
177
178 // Implementation of FormDataParserMultipart.
179
180 FormDataParserMultipart::FormDataParserMultipart(
181 const std::string& boundary_separator)
182 : source_end_(NULL),
183 offset_(NULL),
184 dash_boundary_("--" + boundary_separator),
185 state_(kStart),
186 value_name_present_(false) {
187 }
188
189 FormDataParserMultipart::~FormDataParserMultipart() {}
190
191 bool FormDataParserMultipart::AllDataReadOK() {
192 return source_.data() != NULL && InFinalState();
193 }
194
195 bool FormDataParserMultipart::GetNextNameValue(Result* result) {
196 if (!value_name_present_ || state_ == kError)
197 return false;
198 result->set_name(next_name_);
199 result->set_value(next_value_);
200 next_name_.clear();
201 next_value_.clear();
202 value_name_present_ = ReadNextNameValue();
203 return true;
204 }
205
206 bool FormDataParserMultipart::SetSource(const base::StringPiece& source) {
207 if (state_ == kError ||
208 source.data() == NULL ||
209 // Message part across a source split is also an error.
210 next_name_.data() != NULL || next_value_.data() != NULL)
211 return false;
212 if (source_.data() != NULL && offset_ != source_end_){
213 // Try to seek until the end. If no name-value pair is found, this is OK.
214 value_name_present_ = ReadNextNameValue();
215 if (!value_name_present_ || offset_ != source_end_)
216 return false;
217 }
218 source_ = source;
219 source_end_ = source_.data() + source_.size();
220 offset_ = source_.data();
221 value_name_present_ = ReadNextNameValue();
222 return true;
223 }
224
225 // static
226 char FormDataParserMultipart::kTransitionToChar[] = {
227 '\n', // For kLF.
228 '\r', // For kCR.
229 0, // For kAscii.
230 0, // For kLwsp.
231 0, // For kDashBoundary.
232 ':', // For kColonT.
233 '-', // For kDash.
234 0, // For kAny.
235 };
236
237 // static
238 FormDataParserMultipart::Transition
239 FormDataParserMultipart::kAvailableTransitions[] = {
240 kDashBoundary, kCR, kAny, // For kStart.
241 kLF, kAny, // For kCR1.
242 kCR, kAny, // For kIgnorePreamble.
243 kLwsp, kCR, kAny, // For kDB1.
244 kLF, kAny, // For kCR2.
245 kAscii, kCR, kAny, // For kPart.
246 kAscii, kColonT, kAny, // For kName.
247 kLF, kCR, kAny, // For kColonS.
248 kCR, kAscii, kAny, // For kEnd1.
249 kLF, kCR, kAscii, kAny, // For kEnd2.
250 kLwsp, kCR, kAscii, kAny, // For kEnd3.
251 kLF, kAny, // For kCR3.
252 kDashBoundary, kCR, kAny, // For kPreData.
253 kLF, kAny, // For kCR4.
254 kCR, kAny, // For kData.
255 kDashBoundary, kAny, // For kData2.
256 kLwsp, kCR, kDash, kAny, // For kDB2.
257 kDash, kAny, // For kD.
258 kLwsp, kCR, kAny, // For kEnd.
259 kLF, kAny, // For kCR5.
260 kAny, // For kIgnoreEpilogue.
261 kAny // For kError.
262 };
263
264 // static
265 FormDataParserMultipart::State FormDataParserMultipart::kNextState[] = {
266 kDB1, kCR1, kIgnorePreamble, // For kStart; size so far: 03.
267 kStart, kIgnorePreamble, // For kCR1; 05.
268 kCR1, kIgnorePreamble, // For kIgnorePreamble; 07.
269 kDB1, kCR2, kError, // For kDB1; 10.
270 kPart, kError, // For kCR2; 12.
271 kName, kCR3, kError, // For kPart; 15.
272 kName, kColonS, kError, // For kName; 18.
273 kEnd1, kEnd2, kColonS, // For kColonS; 21.
274 kCR3, kName, kError, // For kEnd1; 24.
275 kEnd3, kCR3, kName, kError, // For kEnd2; 28.
276 kColonS, kCR3, kName, kError, // For kEnd3; 32.
277 kPreData, kError, // For kCR3; 34.
278 kDB2, kCR3, kData, // For kPreData; 37.
279 kData2, kData, // For kCR4; 39.
280 kCR4, kData, // For kData; 41.
281 kDB2, kCR4, // For kData2; 43.
282 kDB1, kCR2, kD, kError, // For kDB2; 47.
283 kEnd, kError, // For kD; 49.
284 kEnd, kCR5, kError, // For kEnd; 52.
285 kIgnoreEpilogue, kError, // For kCR5; 54.
286 kIgnoreEpilogue, // For kIgnoreEpilogue; 55.
287 kError // For kError; 56.
288 };
289
290 // static
291 size_t FormDataParserMultipart::kStateToTransition[] = {
292 0u, // For kStart
293 3u, // For kCR1
294 5u, // For kIgnorePreamble
295 7u, // For kDB1
296 10u, // For kCR2
297 12u, // For kPart
298 15u, // For kName
299 18u, // For kColonS
300 21u, // For kEnd1
301 24u, // For kEnd2
302 28u, // For kEnd3
303 32u, // For kCR3
304 34u, // For kPreData
305 37u, // For kCR4
306 39u, // For kData
307 41u, // For kData2
308 43u, // For kDB2
309 47u, // For kD
310 49u, // For kEnd
311 52u, // For kCR5
312 54u, // For kIgnoreEpilogue
313 55u, // For kError
314 };
315
316 bool FormDataParserMultipart::ReadNextNameValue() {
317 if (state_ == kError || source_.data() == NULL ||
318 next_name_.data() != NULL || next_value_.data() != NULL)
319 return false;
320
321 // Seek to the next part's headers.
322 while (state_ != kPart) {
323 if (!DoStep())
324 return false;
325 }
326 while (state_ != kPreData) {
327 const char* header = offset_;
328 while (state_ != kColonS) {
329 if (!DoStep())
330 return false;
331 }
332 size_t header_length = 0u;
333 while (state_ != kPreData && state_ != kName) {
334 if (state_ == kEnd1 || state_ == kEnd2 || state_ == kEnd3) {
335 // The cast is safe, we know that offset only moves forward.
336 header_length = static_cast<size_t>(offset_ - header);
337 }
338 if (!DoStep())
339 return false;
340 }
341 if (ParseHeader(base::StringPiece(header, header_length))) {
342 // Found what we were looking for, just skip to the part's body.
343 while (state_ != kPreData) {
344 if (!DoStep())
345 return false;
346 }
347 }
348 }
349
350 const char* body = offset_;
351 size_t body_length = 0;
352 while (state_ != kDB2 && offset_ != source_end_) {
353 if (!DoStep())
354 return false;
355 if (state_ == kCR4) {
356 // We are in the middle of which might be the CRLF starting the part
357 // separator (see the "delimiter" non-terminal from the grammar given
358 // in the header file). The cast is safe, we know that offset only moves
359 // forward and body was assigned at least 1 transition ago.
360 body_length = static_cast<size_t>(offset_ - body - 1);
361 }
362 }
363 if (body_length > 0)
364 next_value_.set(body, body_length);
365 return true;
366 }
367
368 bool FormDataParserMultipart::DoStep() {
369 if (state_ == kError || offset_ == source_end_)
370 return false;
371 size_t transition_index = kStateToTransition[state_];
372 Transition t = kAvailableTransitions[transition_index];
373 while (t != kAny) {
374 const State s = kNextState[transition_index];
375 const size_t length = LookUp(t);
376 if (length > 0) {
377 offset_ += length;
378 state_ = s;
379 return true;
380 }
381 t = kAvailableTransitions[++transition_index];
382 }
383 // We have kAny, the default choice. Seek by one and switch the state.
384 ++offset_;
385 state_ = kNextState[transition_index];
386 return true;
387 }
388
389 // Contract -- the following must be true: offset_ != source_end_ .
390 // The idea is to check this only once in the caller (DoStep()), and do not
391 // repeat it here every time, as this can be called many times from one call
392 // to DoStep().
393 size_t FormDataParserMultipart::LookUp(FormDataParserMultipart::Transition t) {
394 const char ahead = *offset_;
395 const char first_char = kTransitionToChar[t];
396
397 // Easy case: labels corresponding to a single char.
398 if (first_char != 0)
399 return ahead == first_char ? 1u : 0u;
400
401 // Harder cases.
402 switch (t) {
403 // Multiple alternatives, 1-char long: return immediately.
404 case kAscii:
405 return (ahead >= 33 && ahead <= 126 && ahead != ':') ? 1u : 0u;
406 case kLwsp:
407 return (ahead == ' ' || ahead == '\t') ? 1u : 0u;
408
409 // Longer than 1 char: prepare work for later.
410 case kDashBoundary: {
411 const size_t length = dash_boundary_.size();
412 // The cast below is safe, we know that the difference is not negative.
413 if (static_cast<size_t>(source_end_ - offset_) < length ||
414 memcmp(dash_boundary_.c_str(), offset_, length) != 0)
415 return 0u;
416 return length;
417 }
418 case kAny:
419 // We are not supposed to be asked for kAny, but this is the right answer:
420 return 1u;
421 default: // We never get here -- the rest has already been handled above.
422 NOTREACHED();
423 return 0u;
424 }
425 }
426
427 bool FormDataParserMultipart::ParseHeader(const base::StringPiece& header) {
428 static const char kContentDisposition[] = "Content-Disposition:";
429 if (memcmp(header.data(), kContentDisposition,
430 strlen(kContentDisposition) != 0))
431 return false;
432 static const char kNameEquals[] = " name=\"";
433 static const char kFilenameEquals[] = " filename=\"";
434
435 // Mandatory part: find the name and set it as |next_name_|.
436 StringPiece::size_type field_offset = header.find(kNameEquals);
437 if (field_offset == StringPiece::npos)
438 return false;
439 field_offset += strlen(kNameEquals);
440 StringPiece::size_type field_end = header.find('"', field_offset);
441 if (field_end == StringPiece::npos)
442 return false;
443 next_name_.set(header.data() + field_offset, field_end - field_offset);
444
445 // Optional part: find the filename and set it as |next_value_|.
446 field_offset = header.find(kFilenameEquals);
447 if (field_offset == StringPiece::npos)
448 return true; // This was only optional
449 field_offset += strlen(kFilenameEquals);
450 field_end = header.find('"', field_offset);
451 if (field_end == StringPiece::npos)
452 return false; // This is a malformed header.
453 next_value_.set(header.data() + field_offset, field_end - field_offset);
454 return true;
455 }
456
457 } // namespace extensions
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698