Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Side by Side Diff: net/tools/balsa/balsa_frame.cc

Issue 2477703002: Remove now unused Balsa code. (Closed)
Patch Set: Rebase Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/tools/balsa/balsa_frame.h ('k') | net/tools/balsa/balsa_frame_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/tools/balsa/balsa_frame.h"
6
7 // Visual C++ defines _M_IX86_FP as 2 if the /arch:SSE2 compiler option is
8 // specified.
9 #if !defined(__SSE2__) && _M_IX86_FP == 2
10 #define __SSE2__ 1
11 #endif
12
13 #include <assert.h>
14 #if __SSE2__
15 #include <emmintrin.h>
16 #endif // __SSE2__
17
18 #include <limits>
19 #include <string>
20 #include <utility>
21 #include <vector>
22
23 #include "base/logging.h"
24 #include "base/strings/string_piece.h"
25 #include "base/strings/string_util.h"
26 #include "net/tools/balsa/balsa_enums.h"
27 #include "net/tools/balsa/balsa_headers.h"
28 #include "net/tools/balsa/balsa_visitor_interface.h"
29 #include "net/tools/balsa/buffer_interface.h"
30 #include "net/tools/balsa/simple_buffer.h"
31 #include "net/tools/balsa/string_piece_utils.h"
32
33 #if defined(COMPILER_MSVC)
34 #include <intrin.h>
35 #include <string.h>
36
37 #pragma intrinsic(_BitScanForward)
38
39 static int ffs(int i) {
40 unsigned long index;
41 return _BitScanForward(&index, i) ? index + 1 : 0;
42 }
43
44 #define strncasecmp _strnicmp
45 #else
46 #include <strings.h>
47 #endif
48
49 namespace net {
50
51 // Constants holding some header names for headers which can affect the way the
52 // HTTP message is framed, and so must be processed specially:
53 static const char kContentLength[] = "content-length";
54 static const size_t kContentLengthSize = sizeof(kContentLength) - 1;
55 static const char kTransferEncoding[] = "transfer-encoding";
56 static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;
57
58 BalsaFrame::BalsaFrame()
59 : last_char_was_slash_r_(false),
60 saw_non_newline_char_(false),
61 start_was_space_(true),
62 chunk_length_character_extracted_(false),
63 is_request_(true),
64 request_was_head_(false),
65 max_header_length_(16 * 1024),
66 max_request_uri_length_(2048),
67 visitor_(&do_nothing_visitor_),
68 chunk_length_remaining_(0),
69 content_length_remaining_(0),
70 last_slash_n_loc_(NULL),
71 last_recorded_slash_n_loc_(NULL),
72 last_slash_n_idx_(0),
73 term_chars_(0),
74 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),
75 last_error_(BalsaFrameEnums::NO_ERROR),
76 headers_(NULL) {
77 }
78
79 BalsaFrame::~BalsaFrame() {}
80
81 void BalsaFrame::Reset() {
82 last_char_was_slash_r_ = false;
83 saw_non_newline_char_ = false;
84 start_was_space_ = true;
85 chunk_length_character_extracted_ = false;
86 // is_request_ = true; // not reset between messages.
87 // request_was_head_ = false; // not reset between messages.
88 // max_header_length_ = 4096; // not reset between messages.
89 // max_request_uri_length_ = 2048; // not reset between messages.
90 // visitor_ = &do_nothing_visitor_; // not reset between messages.
91 chunk_length_remaining_ = 0;
92 content_length_remaining_ = 0;
93 last_slash_n_loc_ = NULL;
94 last_recorded_slash_n_loc_ = NULL;
95 last_slash_n_idx_ = 0;
96 term_chars_ = 0;
97 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
98 last_error_ = BalsaFrameEnums::NO_ERROR;
99 lines_.clear();
100 if (headers_ != NULL) {
101 headers_->Clear();
102 }
103 }
104
105 const char* BalsaFrameEnums::ParseStateToString(
106 BalsaFrameEnums::ParseState error_code) {
107 switch (error_code) {
108 case PARSE_ERROR:
109 return "PARSE_ERROR";
110 case READING_HEADER_AND_FIRSTLINE:
111 return "READING_HEADER_AND_FIRSTLINE";
112 case READING_CHUNK_LENGTH:
113 return "READING_CHUNK_LENGTH";
114 case READING_CHUNK_EXTENSION:
115 return "READING_CHUNK_EXTENSION";
116 case READING_CHUNK_DATA:
117 return "READING_CHUNK_DATA";
118 case READING_CHUNK_TERM:
119 return "READING_CHUNK_TERM";
120 case READING_LAST_CHUNK_TERM:
121 return "READING_LAST_CHUNK_TERM";
122 case READING_TRAILER:
123 return "READING_TRAILER";
124 case READING_UNTIL_CLOSE:
125 return "READING_UNTIL_CLOSE";
126 case READING_CONTENT:
127 return "READING_CONTENT";
128 case MESSAGE_FULLY_READ:
129 return "MESSAGE_FULLY_READ";
130 case NUM_STATES:
131 return "UNKNOWN_STATE";
132 }
133 return "UNKNOWN_STATE";
134 }
135
136 const char* BalsaFrameEnums::ErrorCodeToString(
137 BalsaFrameEnums::ErrorCode error_code) {
138 switch (error_code) {
139 case NO_ERROR:
140 return "NO_ERROR";
141 case NO_STATUS_LINE_IN_RESPONSE:
142 return "NO_STATUS_LINE_IN_RESPONSE";
143 case NO_REQUEST_LINE_IN_REQUEST:
144 return "NO_REQUEST_LINE_IN_REQUEST";
145 case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:
146 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";
147 case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:
148 return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";
149 case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:
150 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";
151 case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:
152 return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";
153 case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:
154 return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";
155 case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:
156 return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";
157 case FAILED_CONVERTING_STATUS_CODE_TO_INT:
158 return "FAILED_CONVERTING_STATUS_CODE_TO_INT";
159 case REQUEST_URI_TOO_LONG:
160 return "REQUEST_URI_TOO_LONG";
161 case HEADERS_TOO_LONG:
162 return "HEADERS_TOO_LONG";
163 case UNPARSABLE_CONTENT_LENGTH:
164 return "UNPARSABLE_CONTENT_LENGTH";
165 case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:
166 return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";
167 case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:
168 return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";
169 case HEADER_MISSING_COLON:
170 return "HEADER_MISSING_COLON";
171 case INVALID_CHUNK_LENGTH:
172 return "INVALID_CHUNK_LENGTH";
173 case CHUNK_LENGTH_OVERFLOW:
174 return "CHUNK_LENGTH_OVERFLOW";
175 case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:
176 return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";
177 case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:
178 return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";
179 case MULTIPLE_CONTENT_LENGTH_KEYS:
180 return "MULTIPLE_CONTENT_LENGTH_KEYS";
181 case MULTIPLE_TRANSFER_ENCODING_KEYS:
182 return "MULTIPLE_TRANSFER_ENCODING_KEYS";
183 case UNKNOWN_TRANSFER_ENCODING:
184 return "UNKNOWN_TRANSFER_ENCODING";
185 case INVALID_HEADER_FORMAT:
186 return "INVALID_HEADER_FORMAT";
187 case INTERNAL_LOGIC_ERROR:
188 return "INTERNAL_LOGIC_ERROR";
189 case NUM_ERROR_CODES:
190 return "UNKNOWN_ERROR";
191 }
192 return "UNKNOWN_ERROR";
193 }
194
195 // Summary:
196 // Parses the first line of either a request or response.
197 // Note that in the case of a detected warning, error_code will be set
198 // but the function will not return false.
199 // Exactly zero or one warning or error (but not both) may be detected
200 // by this function.
201 // Note that this function will not write the data of the first-line
202 // into the header's buffer (that should already have been done elsewhere).
203 //
204 // Pre-conditions:
205 // begin != end
206 // *begin should be a character which is > ' '. This implies that there
207 // is at least one non-whitespace characters between [begin, end).
208 // headers is a valid pointer to a BalsaHeaders class.
209 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
210 // Entire first line must exist between [begin, end)
211 // Exactly zero or one newlines -may- exist between [begin, end)
212 // [begin, end) should exist in the header's buffer.
213 //
214 // Side-effects:
215 // headers will be modified
216 // error_code may be modified if either a warning or error is detected
217 //
218 // Returns:
219 // True if no error (as opposed to warning) is detected.
220 // False if an error (as opposed to warning) is detected.
221
222 //
223 // If there is indeed non-whitespace in the line, then the following
224 // will take care of this for you:
225 // while (*begin <= ' ') ++begin;
226 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);
227 //
228 bool ParseHTTPFirstLine(const char* begin,
229 const char* end,
230 bool is_request,
231 size_t max_request_uri_length,
232 BalsaHeaders* headers,
233 BalsaFrameEnums::ErrorCode* error_code) {
234 const char* current = begin;
235 // HTTP firstlines all have the following structure:
236 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF
237 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"
238 // ws1 nws1 ws2 nws2 ws3 nws3 ws4
239 // | [-------) [-------) [----------------)
240 // REQ: method request_uri version
241 // RESP: version statuscode reason
242 //
243 // The first NONWS->LWS component we'll call firstline_a.
244 // The second firstline_b, and the third firstline_c.
245 //
246 // firstline_a goes from nws1 to (but not including) ws2
247 // firstline_b goes from nws2 to (but not including) ws3
248 // firstline_c goes from nws3 to (but not including) ws4
249 //
250 // In the code:
251 // ws1 == whitespace_1_idx_
252 // nws1 == non_whitespace_1_idx_
253 // ws2 == whitespace_2_idx_
254 // nws2 == non_whitespace_2_idx_
255 // ws3 == whitespace_3_idx_
256 // nws3 == non_whitespace_3_idx_
257 // ws4 == whitespace_4_idx_
258
259 // Kill all whitespace (including '\r\n') at the end of the line.
260 --end;
261 if (*end != '\n') {
262 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
263 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
264 << headers->OriginalHeadersForDebugging();
265 return false;
266 }
267 while (begin < end && *end <= ' ') {
268 --end;
269 }
270 DCHECK(*end != '\n');
271 if (*end == '\n') {
272 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
273 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
274 << headers->OriginalHeadersForDebugging();
275 return false;
276 }
277 ++end;
278
279 // The two following statements should not be possible.
280 if (end == begin) {
281 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
282 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
283 << headers->OriginalHeadersForDebugging();
284 return false;
285 }
286
287 // whitespace_1_idx_
288 headers->whitespace_1_idx_ = current - begin;
289 // This loop is commented out as it is never used in current code. This is
290 // true only because we don't begin parsing the headers at all until we've
291 // encountered a non whitespace character at the beginning of the stream, at
292 // which point we begin our demarcation of header-start. If we did -not- do
293 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop
294 // would be necessary for the proper functioning of this parsing.
295 // This is left here as this function may (in the future) be refactored out
296 // of the BalsaFrame class so that it may be shared between code in
297 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the
298 // set_first_line() function (at which point it would be necessary).
299 #if 0
300 while (*current <= ' ') {
301 ++current;
302 }
303 #endif
304 // non_whitespace_1_idx_
305 headers->non_whitespace_1_idx_ = current - begin;
306 do {
307 // The first time through, we're guaranteed that the current character
308 // won't be a whitespace (else the loop above wouldn't have terminated).
309 // That implies that we're guaranteed to get at least one non-whitespace
310 // character if we get into this loop at all.
311 ++current;
312 if (current == end) {
313 headers->whitespace_2_idx_ = current - begin;
314 headers->non_whitespace_2_idx_ = current - begin;
315 headers->whitespace_3_idx_ = current - begin;
316 headers->non_whitespace_3_idx_ = current - begin;
317 headers->whitespace_4_idx_ = current - begin;
318 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
319 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
320 *error_code =
321 static_cast<BalsaFrameEnums::ErrorCode>(
322 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
323 is_request);
324 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
325 return false;
326 }
327 goto output_exhausted;
328 }
329 } while (*current > ' ');
330 // whitespace_2_idx_
331 headers->whitespace_2_idx_ = current - begin;
332 do {
333 ++current;
334 // Note that due to the loop which consumes all of the whitespace
335 // at the end of the line, current can never == end while in this function.
336 } while (*current <= ' ');
337 // non_whitespace_2_idx_
338 headers->non_whitespace_2_idx_ = current - begin;
339 do {
340 ++current;
341 if (current == end) {
342 headers->whitespace_3_idx_ = current - begin;
343 headers->non_whitespace_3_idx_ = current - begin;
344 headers->whitespace_4_idx_ = current - begin;
345 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request
346 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response
347 *error_code =
348 static_cast<BalsaFrameEnums::ErrorCode>(
349 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE
350 + is_request);
351 goto output_exhausted;
352 }
353 } while (*current > ' ');
354 // whitespace_3_idx_
355 headers->whitespace_3_idx_ = current - begin;
356 do {
357 ++current;
358 // Note that due to the loop which consumes all of the whitespace
359 // at the end of the line, current can never == end while in this function.
360 } while (*current <= ' ');
361 // non_whitespace_3_idx_
362 headers->non_whitespace_3_idx_ = current - begin;
363 headers->whitespace_4_idx_ = end - begin;
364
365 output_exhausted:
366 // Note that we don't fail the parse immediately when parsing of the
367 // firstline fails. Depending on the protocol type, we may want to accept
368 // a firstline with only one or two elements, e.g., for HTTP/0.9:
369 // GET\r\n
370 // or
371 // GET /\r\n
372 // should be parsed without issue (though the visitor should know that
373 // parsing the entire line was not exactly as it should be).
374 //
375 // Eventually, these errors may be removed alltogether, as the visitor can
376 // detect them on its own by examining the size of the various fields.
377 // headers->set_first_line(non_whitespace_1_idx_, current);
378
379 if (is_request) {
380 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >
381 max_request_uri_length) {
382 // For requests, we need at least the method. We could assume that a
383 // blank URI means "/". If version isn't stated, it should be assumed
384 // to be HTTP/0.9 by the visitor.
385 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;
386 return false;
387 }
388 } else {
389 headers->parsed_response_code_ = 0;
390 {
391 const char* parsed_response_code_current =
392 begin + headers->non_whitespace_2_idx_;
393 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;
394 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
395
396 // Convert a string of [0-9]* into an int.
397 // Note that this allows for the conversion of response codes which
398 // are outside the bounds of normal HTTP response codes (no checking
399 // is done to ensure that these are valid-- they're merely parsed)!
400 while (parsed_response_code_current < parsed_response_code_end) {
401 if (*parsed_response_code_current < '0' ||
402 *parsed_response_code_current > '9') {
403 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
404 return false;
405 }
406 size_t status_code_x_10 = headers->parsed_response_code_ * 10;
407 uint8_t c = *parsed_response_code_current - '0';
408 if ((headers->parsed_response_code_ > kMaxDiv10) ||
409 (std::numeric_limits<size_t>::max() - status_code_x_10) < c) {
410 // overflow.
411 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
412 return false;
413 }
414 headers->parsed_response_code_ = status_code_x_10 + c;
415 ++parsed_response_code_current;
416 }
417 }
418 }
419 return true;
420 }
421
422 // begin - beginning of the firstline
423 // end - end of the firstline
424 //
425 // A precondition for this function is that there is non-whitespace between
426 // [begin, end). If this precondition is not met, the function will not perform
427 // as expected (and bad things may happen, and it will eat your first, second,
428 // and third unborn children!).
429 //
430 // Another precondition for this function is that [begin, end) includes
431 // at most one newline, which must be at the end of the line.
432 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
433 BalsaFrameEnums::ErrorCode previous_error = last_error_;
434 if (!ParseHTTPFirstLine(begin,
435 end,
436 is_request_,
437 max_request_uri_length_,
438 headers_,
439 &last_error_)) {
440 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
441 visitor_->HandleHeaderError(this);
442 return;
443 }
444 if (previous_error != last_error_) {
445 visitor_->HandleHeaderWarning(this);
446 }
447
448 if (is_request_) {
449 size_t version_length =
450 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;
451 visitor_->ProcessRequestFirstLine(
452 begin + headers_->non_whitespace_1_idx_,
453 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
454 begin + headers_->non_whitespace_1_idx_,
455 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
456 begin + headers_->non_whitespace_2_idx_,
457 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
458 begin + headers_->non_whitespace_3_idx_,
459 version_length);
460 if (version_length == 0)
461 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
462 } else {
463 visitor_->ProcessResponseFirstLine(
464 begin + headers_->non_whitespace_1_idx_,
465 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
466 begin + headers_->non_whitespace_1_idx_,
467 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
468 begin + headers_->non_whitespace_2_idx_,
469 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
470 begin + headers_->non_whitespace_3_idx_,
471 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
472 }
473 }
474
475 // 'stream_begin' points to the first character of the headers buffer.
476 // 'line_begin' points to the first character of the line.
477 // 'current' points to a char which is ':'.
478 // 'line_end' points to the position of '\n' + 1.
479 // 'line_begin' points to the position of first character of line.
480 void BalsaFrame::CleanUpKeyValueWhitespace(
481 const char* stream_begin,
482 const char* line_begin,
483 const char* current,
484 const char* line_end,
485 HeaderLineDescription* current_header_line) {
486 const char* colon_loc = current;
487 DCHECK_LT(colon_loc, line_end);
488 DCHECK_EQ(':', *colon_loc);
489 DCHECK_EQ(':', *current);
490 DCHECK_GE(' ', *line_end)
491 << "\"" << std::string(line_begin, line_end) << "\"";
492
493 // TODO(fenix): Investigate whether or not the bounds tests in the
494 // while loops here are redundant, and if so, remove them.
495 --current;
496 while (current > line_begin && *current <= ' ') --current;
497 current += (current != colon_loc);
498 current_header_line->key_end_idx = current - stream_begin;
499
500 current = colon_loc;
501 DCHECK_EQ(':', *current);
502 ++current;
503 while (current < line_end && *current <= ' ') ++current;
504 current_header_line->value_begin_idx = current - stream_begin;
505
506 DCHECK_GE(current_header_line->key_end_idx,
507 current_header_line->first_char_idx);
508 DCHECK_GE(current_header_line->value_begin_idx,
509 current_header_line->key_end_idx);
510 DCHECK_GE(current_header_line->last_char_idx,
511 current_header_line->value_begin_idx);
512 }
513
514 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {
515 DCHECK(!lines_.empty());
516 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
517 // The last line is always just a newline (and is uninteresting).
518 const Lines::size_type lines_size_m1 = lines_.size() - 1;
519 #if __SSE2__
520 const __m128i colons = _mm_set1_epi8(':');
521 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;
522 #endif // __SSE2__
523 const char* current = stream_begin + lines_[1].first;
524 // This code is a bit more subtle than it may appear at first glance.
525 // This code looks for a colon in the current line... but it also looks
526 // beyond the current line. If there is no colon in the current line, then
527 // for each subsequent line (until the colon which -has- been found is
528 // associated with a line), no searching for a colon will be performed. In
529 // this way, we minimize the amount of bytes we have scanned for a colon.
530 for (Lines::size_type i = 1; i < lines_size_m1;) {
531 const char* line_begin = stream_begin + lines_[i].first;
532
533 // Here we handle possible continuations. Note that we do not replace
534 // the '\n' in the line before a continuation (at least, as of now),
535 // which implies that any code which looks for a value must deal with
536 // "\r\n", etc -within- the line (and not just at the end of it).
537 for (++i; i < lines_size_m1; ++i) {
538 const char c = *(stream_begin + lines_[i].first);
539 if (c > ' ') {
540 // Not a continuation, so stop. Note that if the 'original' i = 1,
541 // and the next line is not a continuation, we'll end up with i = 2
542 // when we break. This handles the incrementing of i for the outer
543 // loop.
544 break;
545 }
546 }
547 const char* line_end = stream_begin + lines_[i - 1].second;
548 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
549
550 // We cleanup the whitespace at the end of the line before doing anything
551 // else of interest as it allows us to do nothing when irregularly formatted
552 // headers are parsed (e.g. those with only keys, only values, or no colon).
553 //
554 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
555 --line_end;
556 DCHECK_EQ('\n', *line_end)
557 << "\"" << std::string(line_begin, line_end) << "\"";
558 while (*line_end <= ' ' && line_end > line_begin) {
559 --line_end;
560 }
561 ++line_end;
562 DCHECK_GE(' ', *line_end);
563 DCHECK_LT(line_begin, line_end);
564
565 // We use '0' for the block idx, because we're always writing to the first
566 // block from the framer (we do this because the framer requires that the
567 // entire header sequence be in a contiguous buffer).
568 headers_->header_lines_.push_back(
569 HeaderLineDescription(line_begin - stream_begin,
570 line_end - stream_begin,
571 line_end - stream_begin,
572 line_end - stream_begin,
573 0));
574 if (current >= line_end) {
575 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
576 visitor_->HandleHeaderWarning(this);
577 // Then the next colon will not be found within this header line-- time
578 // to try again with another header-line.
579 continue;
580 } else if (current < line_begin) {
581 // When this condition is true, the last detected colon was part of a
582 // previous line. We reset to the beginning of the line as we don't care
583 // about the presence of any colon before the beginning of the current
584 // line.
585 current = line_begin;
586 }
587 #if __SSE2__
588 while (current < header_lines_end_m16) {
589 __m128i header_bytes =
590 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current));
591 __m128i colon_cmp = _mm_cmpeq_epi8(header_bytes, colons);
592 int colon_msk = _mm_movemask_epi8(colon_cmp);
593 if (colon_msk == 0) {
594 current += 16;
595 continue;
596 }
597 current += (ffs(colon_msk) - 1);
598 if (current > line_end) {
599 break;
600 }
601 goto found_colon;
602 }
603 #endif // __SSE2__
604 for (; current < line_end; ++current) {
605 if (*current != ':') {
606 continue;
607 }
608 goto found_colon;
609 }
610 // If we've gotten to here, then there was no colon
611 // in the line. The arguments we passed into the construction
612 // for the HeaderLineDescription object should be OK-- it assumes
613 // that the entire content is 'key' by default (which is true, as
614 // there was no colon, there can be no value). Note that this is a
615 // construct which is technically not allowed by the spec.
616 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
617 visitor_->HandleHeaderWarning(this);
618 continue;
619 found_colon:
620 DCHECK_EQ(*current, ':');
621 DCHECK_LE(current - stream_begin, line_end - stream_begin);
622 DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
623
624 HeaderLineDescription& current_header_line = headers_->header_lines_.back();
625 current_header_line.key_end_idx = current - stream_begin;
626 current_header_line.value_begin_idx = current_header_line.key_end_idx;
627 if (current < line_end) {
628 ++current_header_line.key_end_idx;
629
630 CleanUpKeyValueWhitespace(stream_begin,
631 line_begin,
632 current,
633 line_end,
634 &current_header_line);
635 }
636 }
637 }
638
639 void BalsaFrame::ProcessContentLengthLine(
640 HeaderLines::size_type line_idx,
641 BalsaHeadersEnums::ContentLengthStatus* status,
642 size_t* length) {
643 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
644 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
645 const char* line_end = stream_begin + header_line.last_char_idx;
646 const char* value_begin = (stream_begin + header_line.value_begin_idx);
647
648 if (value_begin >= line_end) {
649 // There is no non-whitespace value data.
650 #if DEBUGFRAMER
651 LOG(INFO) << "invalid content-length -- no non-whitespace value data";
652 #endif
653 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
654 return;
655 }
656
657 *length = 0;
658 while (value_begin < line_end) {
659 if (*value_begin < '0' || *value_begin > '9') {
660 // bad! content-length found, and couldn't parse all of it!
661 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
662 #if DEBUGFRAMER
663 LOG(INFO) << "invalid content-length - non numeric character detected";
664 #endif // DEBUGFRAMER
665 return;
666 }
667 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
668 size_t length_x_10 = *length * 10;
669 const unsigned char c = *value_begin - '0';
670 if (*length > kMaxDiv10 ||
671 (std::numeric_limits<size_t>::max() - length_x_10) < c) {
672 *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
673 #if DEBUGFRAMER
674 LOG(INFO) << "content-length overflow";
675 #endif // DEBUGFRAMER
676 return;
677 }
678 *length = length_x_10 + c;
679 ++value_begin;
680 }
681 #if DEBUGFRAMER
682 LOG(INFO) << "content_length parsed: " << *length;
683 #endif // DEBUGFRAMER
684 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;
685 }
686
687 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
688 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
689 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
690 const char* line_end = stream_begin + header_line.last_char_idx;
691 const char* value_begin = stream_begin + header_line.value_begin_idx;
692 size_t value_length = line_end - value_begin;
693
694 if ((value_length == 7) &&
695 !strncasecmp(value_begin, "chunked", 7)) {
696 headers_->transfer_encoding_is_chunked_ = true;
697 } else if ((value_length == 8) &&
698 !strncasecmp(value_begin, "identity", 8)) {
699 headers_->transfer_encoding_is_chunked_ = false;
700 } else {
701 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;
702 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
703 visitor_->HandleHeaderError(this);
704 return;
705 }
706 }
707
708 namespace {
709 bool SplitStringPiece(base::StringPiece original, char delim,
710 base::StringPiece* before, base::StringPiece* after) {
711 const char* p = original.data();
712 const char* end = p + original.size();
713
714 while (p != end) {
715 if (*p == delim) {
716 ++p;
717 } else {
718 const char* start = p;
719 while (++p != end && *p != delim) {
720 // Skip to the next occurence of the delimiter.
721 }
722 *before = base::StringPiece(start, p - start);
723 if (p != end)
724 *after = base::StringPiece(p + 1, end - (p + 1));
725 else
726 *after = base::StringPiece("");
727 *before = base::TrimWhitespaceASCII(*before, base::TRIM_ALL);
728 *after = base::TrimWhitespaceASCII(*after, base::TRIM_ALL);
729 return true;
730 }
731 }
732
733 *before = original;
734 *after = "";
735 return false;
736 }
737
738 // TODO(phython): Fix this function to properly deal with quoted values.
739 // E.g. ";;foo", "\";;\"", or \"aa;
740 // The last example, the semi-colon is a separator between extensions.
741 void ProcessChunkExtensionsManual(base::StringPiece all_extensions,
742 BalsaHeaders* extensions) {
743 base::StringPiece extension;
744 base::StringPiece remaining;
745 all_extensions = base::TrimWhitespaceASCII(all_extensions, base::TRIM_ALL);
746 SplitStringPiece(all_extensions, ';', &extension, &remaining);
747 while (!extension.empty()) {
748 base::StringPiece key;
749 base::StringPiece value;
750 SplitStringPiece(extension, '=', &key, &value);
751 if (!value.empty()) {
752 // Strip quotation marks if they exist.
753 if (!value.empty() && value.front() == '"')
754 value.remove_prefix(1);
755 if (!value.empty() && value.back() == '"')
756 value.remove_suffix(1);
757 }
758
759 extensions->AppendHeader(key, value);
760
761 remaining = base::TrimWhitespaceASCII(remaining, base::TRIM_ALL);
762 SplitStringPiece(remaining, ';', &extension, &remaining);
763 }
764 }
765
766 } // anonymous namespace
767
768 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,
769 BalsaHeaders* extensions) {
770 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);
771 }
772
773 void BalsaFrame::ProcessHeaderLines() {
774 HeaderLines::size_type content_length_idx = 0;
775 HeaderLines::size_type transfer_encoding_idx = 0;
776
777 DCHECK(!lines_.empty());
778 #if DEBUGFRAMER
779 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
780 #endif // DEBUGFRAMER
781
782 // There is no need to attempt to process headers if no header lines exist.
783 // There are at least two lines in the message which are not header lines.
784 // These two non-header lines are the first line of the message, and the
785 // last line of the message (which is an empty line).
786 // Thus, we test to see if we have more than two lines total before attempting
787 // to parse any header lines.
788 if (lines_.size() > 2) {
789 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
790
791 // Then, for the rest of the header data, we parse these into key-value
792 // pairs.
793 FindColonsAndParseIntoKeyValue();
794 // At this point, we've parsed all of the headers. Time to look for those
795 // headers which we require for framing.
796 const HeaderLines::size_type
797 header_lines_size = headers_->header_lines_.size();
798 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
799 const HeaderLineDescription& current_header_line =
800 headers_->header_lines_[i];
801 const char* key_begin =
802 (stream_begin + current_header_line.first_char_idx);
803 const char* key_end = (stream_begin + current_header_line.key_end_idx);
804 const size_t key_len = key_end - key_begin;
805 const char c = *key_begin;
806 #if DEBUGFRAMER
807 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)
808 << " c: '" << c << "' key_len: " << key_len;
809 #endif // DEBUGFRAMER
810 // If a header begins with either lowercase or uppercase 'c' or 't', then
811 // the header may be one of content-length, connection, content-encoding
812 // or transfer-encoding. These headers are special, as they change the way
813 // that the message is framed, and so the framer is required to search
814 // for them.
815
816
817 if (c == 'c' || c == 'C') {
818 if ((key_len == kContentLengthSize) &&
819 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {
820 BalsaHeadersEnums::ContentLengthStatus content_length_status =
821 BalsaHeadersEnums::NO_CONTENT_LENGTH;
822 size_t length = 0;
823 ProcessContentLengthLine(i, &content_length_status, &length);
824 if (content_length_idx != 0) { // then we've already seen one!
825 if ((headers_->content_length_status_ != content_length_status) ||
826 ((headers_->content_length_status_ ==
827 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
828 length != headers_->content_length_)) {
829 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;
830 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
831 visitor_->HandleHeaderError(this);
832 return;
833 }
834 continue;
835 } else {
836 content_length_idx = i + 1;
837 headers_->content_length_status_ = content_length_status;
838 headers_->content_length_ = length;
839 content_length_remaining_ = length;
840 }
841
842 }
843 } else if (c == 't' || c == 'T') {
844 if ((key_len == kTransferEncodingSize) &&
845 0 == strncasecmp(key_begin, kTransferEncoding,
846 kTransferEncodingSize)) {
847 if (transfer_encoding_idx != 0) {
848 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;
849 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
850 visitor_->HandleHeaderError(this);
851 return;
852 }
853 transfer_encoding_idx = i + 1;
854 }
855 } else if (i == 0 && (key_len == 0 || c == ' ')) {
856 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;
857 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
858 visitor_->HandleHeaderError(this);
859 return;
860 }
861 }
862 if (headers_->transfer_encoding_is_chunked_) {
863 headers_->content_length_ = 0;
864 headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
865 content_length_remaining_ = 0;
866 }
867 if (transfer_encoding_idx != 0) {
868 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
869 }
870 }
871 }
872
873 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
874 // For responses, can't have a body if the request was a HEAD, or if it is
875 // one of these response-codes. rfc2616 section 4.3
876 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
877 if (is_request_ ||
878 !(request_was_head_ ||
879 (headers_->parsed_response_code_ >= 100 &&
880 headers_->parsed_response_code_ < 200) ||
881 (headers_->parsed_response_code_ == 204) ||
882 (headers_->parsed_response_code_ == 304))) {
883 // Then we can have a body.
884 if (headers_->transfer_encoding_is_chunked_) {
885 // Note that
886 // if ( Transfer-Encoding: chunked && Content-length: )
887 // then Transfer-Encoding: chunked trumps.
888 // This is as specified in the spec.
889 // rfc2616 section 4.4.3
890 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
891 } else {
892 // Errors parsing content-length definitely can cause
893 // protocol errors/warnings
894 switch (headers_->content_length_status_) {
895 // If we have a content-length, and it is parsed
896 // properly, there are two options.
897 // 1) zero content, in which case the message is done, and
898 // 2) nonzero content, in which case we have to
899 // consume the body.
900 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
901 if (headers_->content_length_ == 0) {
902 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
903 } else {
904 parse_state_ = BalsaFrameEnums::READING_CONTENT;
905 }
906 break;
907 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
908 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
909 // If there were characters left-over after parsing the
910 // content length, we should flag an error and stop.
911 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
912 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;
913 visitor_->HandleHeaderError(this);
914 break;
915 // We can have: no transfer-encoding, no content length, and no
916 // connection: close...
917 // Unfortunately, this case doesn't seem to be covered in the spec.
918 // We'll assume that the safest thing to do here is what the google
919 // binaries before 2008 already do, which is to assume that
920 // everything until the connection is closed is body.
921 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
922 if (is_request_) {
923 base::StringPiece method = headers_->request_method();
924 // POSTs and PUTs should have a detectable body length. If they
925 // do not we consider it an error.
926 if ((method.size() == 4 &&
927 strncmp(method.data(), "POST", 4) == 0) ||
928 (method.size() == 3 &&
929 strncmp(method.data(), "PUT", 3) == 0)) {
930 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
931 last_error_ =
932 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;
933 visitor_->HandleHeaderError(this);
934 break;
935 }
936 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
937 } else {
938 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
939 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;
940 visitor_->HandleHeaderWarning(this);
941 }
942 break;
943 // The COV_NF_... statements here provide hints to the apparatus
944 // which computes coverage reports/ratios that this code is never
945 // intended to be executed, and should technically be impossible.
946 // COV_NF_START
947 default:
948 LOG(FATAL) << "Saw a content_length_status: "
949 << headers_->content_length_status_ << " which is unknown.";
950 // COV_NF_END
951 }
952 }
953 }
954 }
955
956 size_t BalsaFrame::ProcessHeaders(const char* message_start,
957 size_t message_length) {
958 const char* const original_message_start = message_start;
959 const char* const message_end = message_start + message_length;
960 const char* message_current = message_start;
961 const char* checkpoint = message_start;
962
963 if (message_length == 0) {
964 goto bottom;
965 }
966
967 while (message_current < message_end) {
968 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
969
970 // Yes, we could use strchr (assuming null termination), or
971 // memchr, but as it turns out that is slower than this tight loop
972 // for the input that we see.
973 if (!saw_non_newline_char_) {
974 do {
975 const char c = *message_current;
976 if (c != '\r' && c != '\n') {
977 if (c <= ' ') {
978 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
979 last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;
980 visitor_->HandleHeaderError(this);
981 goto bottom;
982 } else {
983 saw_non_newline_char_ = true;
984 checkpoint = message_start = message_current;
985 goto read_real_message;
986 }
987 }
988 ++message_current;
989 } while (message_current < message_end);
990 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks
991 } else {
992 read_real_message:
993 // Note that SSE2 can be enabled on certain piii platforms.
994 #if __SSE2__
995 {
996 const char* const message_end_m16 = message_end - 16;
997 __m128i newlines = _mm_set1_epi8('\n');
998 while (message_current < message_end_m16) {
999 // What this does (using compiler intrinsics):
1000 //
1001 // Load 16 '\n's into an xmm register
1002 // Load 16 bytes of currennt message into an xmm register
1003 // Do byte-wise equals on those two xmm registers
1004 // Take the first bit of each byte, and put that into the first
1005 // 16 bits of a mask
1006 // If the mask is zero, no '\n' found. increment by 16 and try again
1007 // Else scan forward to find the first set bit.
1008 // Increment current by the index of the first set bit
1009 // (ffs returns index of first set bit + 1)
1010 __m128i msg_bytes =
1011 _mm_loadu_si128(const_cast<__m128i *>(
1012 reinterpret_cast<const __m128i *>(message_current)));
1013 __m128i newline_cmp = _mm_cmpeq_epi8(msg_bytes, newlines);
1014 int newline_msk = _mm_movemask_epi8(newline_cmp);
1015 if (newline_msk == 0) {
1016 message_current += 16;
1017 continue;
1018 }
1019 message_current += (ffs(newline_msk) - 1);
1020 const size_t relative_idx = message_current - message_start;
1021 const size_t message_current_idx = 1 + base_idx + relative_idx;
1022 lines_.push_back(std::make_pair(last_slash_n_idx_,
1023 message_current_idx));
1024 if (lines_.size() == 1) {
1025 headers_->WriteFromFramer(checkpoint,
1026 1 + message_current - checkpoint);
1027 checkpoint = message_current + 1;
1028 const char* begin = headers_->OriginalHeaderStreamBegin();
1029 #if DEBUGFRAMER
1030 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1031 LOG(INFO) << "is_request_: " << is_request_;
1032 #endif
1033 ProcessFirstLine(begin, begin + lines_[0].second);
1034 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1035 goto process_lines;
1036 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1037 goto bottom;
1038 }
1039 const size_t chars_since_last_slash_n = (message_current_idx -
1040 last_slash_n_idx_);
1041 last_slash_n_idx_ = message_current_idx;
1042 if (chars_since_last_slash_n > 2) {
1043 // We have a slash-n, but the last slash n was
1044 // more than 2 characters away from this. Thus, we know
1045 // that this cannot be an end-of-header.
1046 ++message_current;
1047 continue;
1048 }
1049 if ((chars_since_last_slash_n == 1) ||
1050 (((message_current > message_start) &&
1051 (*(message_current - 1) == '\r')) ||
1052 (last_char_was_slash_r_))) {
1053 goto process_lines;
1054 }
1055 ++message_current;
1056 }
1057 }
1058 #endif // __SSE2__
1059 while (message_current < message_end) {
1060 if (*message_current != '\n') {
1061 ++message_current;
1062 continue;
1063 }
1064 const size_t relative_idx = message_current - message_start;
1065 const size_t message_current_idx = 1 + base_idx + relative_idx;
1066 lines_.push_back(std::make_pair(last_slash_n_idx_,
1067 message_current_idx));
1068 if (lines_.size() == 1) {
1069 headers_->WriteFromFramer(checkpoint,
1070 1 + message_current - checkpoint);
1071 checkpoint = message_current + 1;
1072 const char* begin = headers_->OriginalHeaderStreamBegin();
1073 #if DEBUGFRAMER
1074 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1075 LOG(INFO) << "is_request_: " << is_request_;
1076 #endif
1077 ProcessFirstLine(begin, begin + lines_[0].second);
1078 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1079 goto process_lines;
1080 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1081 goto bottom;
1082 }
1083 const size_t chars_since_last_slash_n = (message_current_idx -
1084 last_slash_n_idx_);
1085 last_slash_n_idx_ = message_current_idx;
1086 if (chars_since_last_slash_n > 2) {
1087 // false positive.
1088 ++message_current;
1089 continue;
1090 }
1091 if ((chars_since_last_slash_n == 1) ||
1092 (((message_current > message_start) &&
1093 (*(message_current - 1) == '\r')) ||
1094 (last_char_was_slash_r_))) {
1095 goto process_lines;
1096 }
1097 ++message_current;
1098 }
1099 }
1100 continue;
1101 process_lines:
1102 ++message_current;
1103 DCHECK(message_current >= message_start);
1104 if (message_current > message_start) {
1105 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1106 }
1107
1108 // Check if we have exceeded maximum headers length
1109 // Although we check for this limit before and after we call this function
1110 // we check it here as well to make sure that in case the visitor changed
1111 // the max_header_length_ (for example after processing the first line)
1112 // we handle it gracefully.
1113 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
1114 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1115 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1116 visitor_->HandleHeaderError(this);
1117 goto bottom;
1118 }
1119
1120 // Since we know that we won't be writing any more bytes of the header,
1121 // we tell that to the headers object. The headers object may make
1122 // more efficient allocation decisions when this is signaled.
1123 headers_->DoneWritingFromFramer();
1124 {
1125 const char* readable_ptr = NULL;
1126 size_t readable_size = 0;
1127 headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);
1128 visitor_->ProcessHeaderInput(readable_ptr, readable_size);
1129 }
1130
1131 // Ok, now that we've written everything into our header buffer, it is
1132 // time to process the header lines (extract proper values for headers
1133 // which are important for framing).
1134 ProcessHeaderLines();
1135 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1136 goto bottom;
1137 }
1138 AssignParseStateAfterHeadersHaveBeenParsed();
1139 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1140 goto bottom;
1141 }
1142 visitor_->ProcessHeaders(*headers_);
1143 visitor_->HeaderDone();
1144 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
1145 visitor_->MessageDone();
1146 }
1147 goto bottom;
1148 }
1149 // If we've gotten to here, it means that we've consumed all of the
1150 // available input. We need to record whether or not the last character we
1151 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
1152 // a header framing that is split across the two calls.
1153 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
1154 DCHECK(message_current >= message_start);
1155 if (message_current > message_start) {
1156 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1157 }
1158 bottom:
1159 return message_current - original_message_start;
1160 }
1161
1162
1163 size_t BalsaFrame::BytesSafeToSplice() const {
1164 switch (parse_state_) {
1165 case BalsaFrameEnums::READING_CHUNK_DATA:
1166 return chunk_length_remaining_;
1167 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1168 return std::numeric_limits<size_t>::max();
1169 case BalsaFrameEnums::READING_CONTENT:
1170 return content_length_remaining_;
1171 default:
1172 return 0;
1173 }
1174 }
1175
1176 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
1177 switch (parse_state_) {
1178 case BalsaFrameEnums::READING_CHUNK_DATA:
1179 if (chunk_length_remaining_ >= bytes_spliced) {
1180 chunk_length_remaining_ -= bytes_spliced;
1181 if (chunk_length_remaining_ == 0) {
1182 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1183 }
1184 return;
1185 } else {
1186 last_error_ =
1187 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1188 goto error_exit;
1189 }
1190
1191 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1192 return;
1193
1194 case BalsaFrameEnums::READING_CONTENT:
1195 if (content_length_remaining_ >= bytes_spliced) {
1196 content_length_remaining_ -= bytes_spliced;
1197 if (content_length_remaining_ == 0) {
1198 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1199 visitor_->MessageDone();
1200 }
1201 return;
1202 } else {
1203 last_error_ =
1204 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1205 goto error_exit;
1206 }
1207
1208 default:
1209 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;
1210 goto error_exit;
1211 }
1212
1213 error_exit:
1214 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1215 visitor_->HandleBodyError(this);
1216 };
1217
1218 // You may note that the state-machine contained within this function has both
1219 // switch and goto labels for nearly the same thing. For instance, the
1220 // following two labels refer to the same code block:
1221 // label_reading_chunk_data:
1222 // case BalsaFrameEnums::READING_CHUNK_DATA:
1223 // The 'case' statement is required for the switch statement which occurs when
1224 // ProcessInput is invoked. The goto label is required as the state-machine
1225 // does not use a computed goto in any subsequent operations.
1226 //
1227 // Since several states exit the state machine for various reasons, there is
1228 // also one label at the bottom of the function. When it is appropriate to
1229 // return from the function, that part of the state machine instead issues a
1230 // goto bottom; This results in less code duplication, and makes debugging
1231 // easier (as you can add a statement to a section of code which is guaranteed
1232 // to be invoked when the function is exiting.
1233 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
1234 const char* current = input;
1235 const char* on_entry = current;
1236 const char* end = current + size;
1237 #if DEBUGFRAMER
1238 LOG(INFO) << "\n=============="
1239 << BalsaFrameEnums::ParseStateToString(parse_state_)
1240 << "===============\n";
1241 #endif // DEBUGFRAMER
1242
1243 DCHECK(headers_ != NULL);
1244 if (headers_ == NULL) return 0;
1245
1246 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1247 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
1248 // Yes, we still have to check this here as the user can change the
1249 // max_header_length amount!
1250 // Also it is possible that we have reached the maximum allowed header size,
1251 // and we have more to consume (remember we are still inside
1252 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
1253 if (header_length > max_header_length_ ||
1254 (header_length == max_header_length_ && size > 0)) {
1255 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1256 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1257 visitor_->HandleHeaderError(this);
1258 goto bottom;
1259 }
1260 size_t bytes_to_process = max_header_length_ - header_length;
1261 if (bytes_to_process > size) {
1262 bytes_to_process = size;
1263 }
1264 current += ProcessHeaders(input, bytes_to_process);
1265 // If we are still reading headers check if we have crossed the headers
1266 // limit. Note that we check for >= as opposed to >. This is because if
1267 // header_length_after equals max_header_length_ and we are still in the
1268 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
1269 // sure that the headers limit will be crossed later on
1270 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1271 // Note that headers_ is valid only if we are still reading headers.
1272 const size_t header_length_after =
1273 headers_->GetReadableBytesFromHeaderStream();
1274 if (header_length_after >= max_header_length_) {
1275 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1276 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1277 visitor_->HandleHeaderError(this);
1278 }
1279 }
1280 goto bottom;
1281 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1282 parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1283 // Can do nothing more 'till we're reset.
1284 goto bottom;
1285 }
1286
1287 while (current < end) {
1288 switch (parse_state_) {
1289 label_reading_chunk_length:
1290 case BalsaFrameEnums::READING_CHUNK_LENGTH:
1291 // In this state we read the chunk length.
1292 // Note that once we hit a character which is not in:
1293 // [0-9;A-Fa-f\n], we transition to a different state.
1294 //
1295 {
1296 // If we used strtol, etc, we'd have to buffer this line.
1297 // This is more annoying than simply doing the conversion
1298 // here. This code accounts for overflow.
1299 static const signed char buf[] = {
1300 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f
1301 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
1302 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f
1303 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1304 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f
1305 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1306 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f
1307 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,
1308 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f
1309 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1310 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f
1311 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1312 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f
1313 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1314 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f
1315 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1316 };
1317 // valid cases:
1318 // "09123\n" // -> 09123
1319 // "09123\r\n" // -> 09123
1320 // "09123 \n" // -> 09123
1321 // "09123 \r\n" // -> 09123
1322 // "09123 12312\n" // -> 09123
1323 // "09123 12312\r\n" // -> 09123
1324 // "09123; foo=bar\n" // -> 09123
1325 // "09123; foo=bar\r\n" // -> 09123
1326 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
1327 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
1328 // invalid cases:
1329 // "[ \t]+[^\n]*\n"
1330 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
1331 // "\r\n"
1332 // "\n"
1333 while (current < end) {
1334 const char c = *current;
1335 ++current;
1336 const signed char addition = buf[static_cast<int>(c)];
1337 if (addition >= 0) {
1338 chunk_length_character_extracted_ = true;
1339 size_t length_x_16 = chunk_length_remaining_ * 16;
1340 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
1341 if ((chunk_length_remaining_ > kMaxDiv16) ||
1342 ((std::numeric_limits<size_t>::max() - length_x_16) <
1343 static_cast<size_t>(addition))) {
1344 // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1345 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1346 last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;
1347 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1348 visitor_->HandleChunkingError(this);
1349 goto bottom;
1350 }
1351 chunk_length_remaining_ = length_x_16 + addition;
1352 continue;
1353 }
1354
1355 if (!chunk_length_character_extracted_ || addition == -1) {
1356 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
1357 // characters were converted, or an unexpected character was
1358 // seen.
1359 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1360 last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;
1361 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1362 visitor_->HandleChunkingError(this);
1363 goto bottom;
1364 }
1365
1366 --current;
1367 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1368 visitor_->ProcessChunkLength(chunk_length_remaining_);
1369 goto label_reading_chunk_extension;
1370 }
1371 }
1372 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1373 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH
1374
1375 label_reading_chunk_extension:
1376 case BalsaFrameEnums::READING_CHUNK_EXTENSION:
1377 {
1378 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1379 // there is data to be read.
1380 const char* extensions_start = current;
1381 size_t extensions_length = 0;
1382 while (current < end) {
1383 const char c = *current;
1384 if (c == '\r' || c == '\n') {
1385 extensions_length =
1386 (extensions_start == current) ?
1387 0 :
1388 current - extensions_start - 1;
1389 }
1390
1391 ++current;
1392 if (c == '\n') {
1393 chunk_length_character_extracted_ = false;
1394 visitor_->ProcessChunkExtensions(
1395 extensions_start, extensions_length);
1396 if (chunk_length_remaining_ != 0) {
1397 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1398 goto label_reading_chunk_data;
1399 }
1400 HeaderFramingFound('\n');
1401 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1402 goto label_reading_last_chunk_term;
1403 }
1404 }
1405 visitor_->ProcessChunkExtensions(
1406 extensions_start, extensions_length);
1407 }
1408
1409 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1410 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION
1411
1412 label_reading_chunk_data:
1413 case BalsaFrameEnums::READING_CHUNK_DATA:
1414 while (current < end) {
1415 if (chunk_length_remaining_ == 0) {
1416 break;
1417 }
1418 // read in the chunk
1419 size_t bytes_remaining = end - current;
1420 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?
1421 chunk_length_remaining_ : bytes_remaining;
1422 const char* tmp_current = current + consumed_bytes;
1423 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);
1424 visitor_->ProcessBodyData(current, consumed_bytes);
1425 on_entry = current = tmp_current;
1426 chunk_length_remaining_ -= consumed_bytes;
1427 }
1428 if (chunk_length_remaining_ == 0) {
1429 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1430 goto label_reading_chunk_term;
1431 }
1432 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1433 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA
1434
1435 label_reading_chunk_term:
1436 case BalsaFrameEnums::READING_CHUNK_TERM:
1437 while (current < end) {
1438 const char c = *current;
1439 ++current;
1440
1441 if (c == '\n') {
1442 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1443 goto label_reading_chunk_length;
1444 }
1445 }
1446 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1447 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM
1448
1449 label_reading_last_chunk_term:
1450 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1451 while (current < end) {
1452 const char c = *current;
1453
1454 if (!HeaderFramingFound(c)) {
1455 // If not, however, since the spec only suggests that the
1456 // client SHOULD indicate the presence of trailers, we get to
1457 // *test* that they did or didn't.
1458 // If all of the bytes we've seen since:
1459 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1460 // are either '\r', or '\n', then we can assume that we don't yet
1461 // know if we need to parse headers, or if the next byte will make
1462 // the HeaderFramingFound condition (above) true.
1463 if (HeaderFramingMayBeFound()) {
1464 // If true, then we have seen only characters '\r' or '\n'.
1465 ++current;
1466
1467 // Lets try again! There is no state change here.
1468 continue;
1469 } else {
1470 // If (!HeaderFramingMayBeFound()), then we know that we must be
1471 // reading the first non CRLF character of a trailer.
1472 parse_state_ = BalsaFrameEnums::READING_TRAILER;
1473 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1474 on_entry = current;
1475 goto label_reading_trailer;
1476 }
1477 } else {
1478 // If we've found a "\r\n\r\n", then the message
1479 // is done.
1480 ++current;
1481 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1482 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1483 visitor_->MessageDone();
1484 goto bottom;
1485 }
1486 break; // from while loop
1487 }
1488 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1489 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM
1490
1491 label_reading_trailer:
1492 case BalsaFrameEnums::READING_TRAILER:
1493 while (current < end) {
1494 const char c = *current;
1495 ++current;
1496 // TODO(fenix): If we ever care about trailers as part of framing,
1497 // deal with them here (see below for part of the 'solution')
1498 // if (LineFramingFound(c)) {
1499 // trailer_lines_.push_back(make_pair(start_of_line_,
1500 // trailer_length_ - 1));
1501 // start_of_line_ = trailer_length_;
1502 // }
1503 if (HeaderFramingFound(c)) {
1504 // ProcessTrailers(visitor_, &trailers_);
1505 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1506 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1507 visitor_->MessageDone();
1508 goto bottom;
1509 }
1510 }
1511 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1512 break; // case BalsaFrameEnums::READING_TRAILER
1513
1514 // Note that there is no label:
1515 // 'label_reading_until_close'
1516 // here. This is because the state-machine exists immediately after
1517 // reading the headers instead of transitioning here (as it would
1518 // do if it was consuming all the data it could, all the time).
1519 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1520 {
1521 const size_t bytes_remaining = end - current;
1522 if (bytes_remaining > 0) {
1523 visitor_->ProcessBodyInput(current, bytes_remaining);
1524 visitor_->ProcessBodyData(current, bytes_remaining);
1525 current += bytes_remaining;
1526 }
1527 }
1528 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE
1529
1530 // label_reading_content:
1531 case BalsaFrameEnums::READING_CONTENT:
1532 #if DEBUGFRAMER
1533 LOG(INFO) << "ReadingContent: " << content_length_remaining_;
1534 #endif // DEBUGFRAMER
1535 while (content_length_remaining_ && current < end) {
1536 // read in the content
1537 const size_t bytes_remaining = end - current;
1538 const size_t consumed_bytes =
1539 (content_length_remaining_ < bytes_remaining) ?
1540 content_length_remaining_ : bytes_remaining;
1541 visitor_->ProcessBodyInput(current, consumed_bytes);
1542 visitor_->ProcessBodyData(current, consumed_bytes);
1543 current += consumed_bytes;
1544 content_length_remaining_ -= consumed_bytes;
1545 }
1546 if (content_length_remaining_ == 0) {
1547 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1548 visitor_->MessageDone();
1549 }
1550 goto bottom; // case BalsaFrameEnums::READING_CONTENT
1551
1552 default:
1553 // The state-machine should never be in a state that isn't handled
1554 // above. This is a glaring logic error, and we should do something
1555 // drastic to ensure that this gets looked-at and fixed.
1556 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1557 << " memory corruption?!"; // COV_NF_LINE
1558 }
1559 }
1560 bottom:
1561 #if DEBUGFRAMER
1562 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
1563 << std::string(input, current)
1564 << "\n$$$$$$$$$$$$$$"
1565 << BalsaFrameEnums::ParseStateToString(parse_state_)
1566 << "$$$$$$$$$$$$$$$"
1567 << " consumed: " << (current - input);
1568 if (Error()) {
1569 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());
1570 }
1571 #endif // DEBUGFRAMER
1572 return current - input;
1573 }
1574
1575 } // namespace net
OLDNEW
« no previous file with comments | « net/tools/balsa/balsa_frame.h ('k') | net/tools/balsa/balsa_frame_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698