Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(443)

Side by Side Diff: net/tools/balsa/balsa_frame.cc

Issue 266243004: Clang format slam. Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/tools/balsa/balsa_frame.h" 5 #include "net/tools/balsa/balsa_frame.h"
6 6
7 #include <assert.h> 7 #include <assert.h>
8 #if __SSE2__ 8 #if __SSE2__
9 #include <emmintrin.h> 9 #include <emmintrin.h>
10 #endif // __SSE2__ 10 #endif // __SSE2__
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 content_length_remaining_(0), 49 content_length_remaining_(0),
50 last_slash_n_loc_(NULL), 50 last_slash_n_loc_(NULL),
51 last_recorded_slash_n_loc_(NULL), 51 last_recorded_slash_n_loc_(NULL),
52 last_slash_n_idx_(0), 52 last_slash_n_idx_(0),
53 term_chars_(0), 53 term_chars_(0),
54 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), 54 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),
55 last_error_(BalsaFrameEnums::NO_ERROR), 55 last_error_(BalsaFrameEnums::NO_ERROR),
56 headers_(NULL) { 56 headers_(NULL) {
57 } 57 }
58 58
59 BalsaFrame::~BalsaFrame() {} 59 BalsaFrame::~BalsaFrame() {
60 }
60 61
61 void BalsaFrame::Reset() { 62 void BalsaFrame::Reset() {
62 last_char_was_slash_r_ = false; 63 last_char_was_slash_r_ = false;
63 saw_non_newline_char_ = false; 64 saw_non_newline_char_ = false;
64 start_was_space_ = true; 65 start_was_space_ = true;
65 chunk_length_character_extracted_ = false; 66 chunk_length_character_extracted_ = false;
66 // is_request_ = true; // not reset between messages. 67 // is_request_ = true; // not reset between messages.
67 // request_was_head_ = false; // not reset between messages. 68 // request_was_head_ = false; // not reset between messages.
68 // max_header_length_ = 4096; // not reset between messages. 69 // max_header_length_ = 4096; // not reset between messages.
69 // max_request_uri_length_ = 2048; // not reset between messages. 70 // max_request_uri_length_ = 2048; // not reset between messages.
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after
259 // The two following statements should not be possible. 260 // The two following statements should not be possible.
260 if (end == begin) { 261 if (end == begin) {
261 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; 262 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
262 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" 263 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
263 << headers->OriginalHeadersForDebugging(); 264 << headers->OriginalHeadersForDebugging();
264 return false; 265 return false;
265 } 266 }
266 267
267 // whitespace_1_idx_ 268 // whitespace_1_idx_
268 headers->whitespace_1_idx_ = current - begin; 269 headers->whitespace_1_idx_ = current - begin;
269 // This loop is commented out as it is never used in current code. This is 270 // This loop is commented out as it is never used in current code. This is
270 // true only because we don't begin parsing the headers at all until we've 271 // true only because we don't begin parsing the headers at all until we've
271 // encountered a non whitespace character at the beginning of the stream, at 272 // encountered a non whitespace character at the beginning of the stream, at
272 // which point we begin our demarcation of header-start. If we did -not- do 273 // which point we begin our demarcation of header-start. If we did -not- do
273 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop 274 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop
274 // would be necessary for the proper functioning of this parsing. 275 // would be necessary for the proper functioning of this parsing.
275 // This is left here as this function may (in the future) be refactored out 276 // This is left here as this function may (in the future) be refactored out
276 // of the BalsaFrame class so that it may be shared between code in 277 // of the BalsaFrame class so that it may be shared between code in
277 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the 278 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the
278 // set_first_line() function (at which point it would be necessary). 279 // set_first_line() function (at which point it would be necessary).
279 #if 0 280 #if 0
280 while (*current <= ' ') { 281 while (*current <= ' ') {
281 ++current; 282 ++current;
282 } 283 }
283 #endif 284 #endif
284 // non_whitespace_1_idx_ 285 // non_whitespace_1_idx_
285 headers->non_whitespace_1_idx_ = current - begin; 286 headers->non_whitespace_1_idx_ = current - begin;
286 do { 287 do {
287 // The first time through, we're guaranteed that the current character 288 // The first time through, we're guaranteed that the current character
288 // won't be a whitespace (else the loop above wouldn't have terminated). 289 // won't be a whitespace (else the loop above wouldn't have terminated).
289 // That implies that we're guaranteed to get at least one non-whitespace 290 // That implies that we're guaranteed to get at least one non-whitespace
290 // character if we get into this loop at all. 291 // character if we get into this loop at all.
291 ++current; 292 ++current;
292 if (current == end) { 293 if (current == end) {
293 headers->whitespace_2_idx_ = current - begin; 294 headers->whitespace_2_idx_ = current - begin;
294 headers->non_whitespace_2_idx_ = current - begin; 295 headers->non_whitespace_2_idx_ = current - begin;
295 headers->whitespace_3_idx_ = current - begin; 296 headers->whitespace_3_idx_ = current - begin;
296 headers->non_whitespace_3_idx_ = current - begin; 297 headers->non_whitespace_3_idx_ = current - begin;
297 headers->whitespace_4_idx_ = current - begin; 298 headers->whitespace_4_idx_ = current - begin;
298 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request 299 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
299 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response 300 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
300 *error_code = 301 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
301 static_cast<BalsaFrameEnums::ErrorCode>( 302 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
302 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + 303 is_request);
303 is_request);
304 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION 304 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
305 return false; 305 return false;
306 } 306 }
307 goto output_exhausted; 307 goto output_exhausted;
308 } 308 }
309 } while (*current > ' '); 309 } while (*current > ' ');
310 // whitespace_2_idx_ 310 // whitespace_2_idx_
311 headers->whitespace_2_idx_ = current - begin; 311 headers->whitespace_2_idx_ = current - begin;
312 do { 312 do {
313 ++current; 313 ++current;
314 // Note that due to the loop which consumes all of the whitespace 314 // Note that due to the loop which consumes all of the whitespace
315 // at the end of the line, current can never == end while in this function. 315 // at the end of the line, current can never == end while in this function.
316 } while (*current <= ' '); 316 } while (*current <= ' ');
317 // non_whitespace_2_idx_ 317 // non_whitespace_2_idx_
318 headers->non_whitespace_2_idx_ = current - begin; 318 headers->non_whitespace_2_idx_ = current - begin;
319 do { 319 do {
320 ++current; 320 ++current;
321 if (current == end) { 321 if (current == end) {
322 headers->whitespace_3_idx_ = current - begin; 322 headers->whitespace_3_idx_ = current - begin;
323 headers->non_whitespace_3_idx_ = current - begin; 323 headers->non_whitespace_3_idx_ = current - begin;
324 headers->whitespace_4_idx_ = current - begin; 324 headers->whitespace_4_idx_ = current - begin;
325 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request 325 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request
326 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response 326 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response
327 *error_code = 327 *error_code = static_cast<BalsaFrameEnums::ErrorCode>(
328 static_cast<BalsaFrameEnums::ErrorCode>( 328 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
329 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE 329 is_request);
330 + is_request);
331 goto output_exhausted; 330 goto output_exhausted;
332 } 331 }
333 } while (*current > ' '); 332 } while (*current > ' ');
334 // whitespace_3_idx_ 333 // whitespace_3_idx_
335 headers->whitespace_3_idx_ = current - begin; 334 headers->whitespace_3_idx_ = current - begin;
336 do { 335 do {
337 ++current; 336 ++current;
338 // Note that due to the loop which consumes all of the whitespace 337 // Note that due to the loop which consumes all of the whitespace
339 // at the end of the line, current can never == end while in this function. 338 // at the end of the line, current can never == end while in this function.
340 } while (*current <= ' '); 339 } while (*current <= ' ');
341 // non_whitespace_3_idx_ 340 // non_whitespace_3_idx_
342 headers->non_whitespace_3_idx_ = current - begin; 341 headers->non_whitespace_3_idx_ = current - begin;
343 headers->whitespace_4_idx_ = end - begin; 342 headers->whitespace_4_idx_ = end - begin;
344 343
345 output_exhausted: 344 output_exhausted:
346 // Note that we don't fail the parse immediately when parsing of the 345 // Note that we don't fail the parse immediately when parsing of the
347 // firstline fails. Depending on the protocol type, we may want to accept 346 // firstline fails. Depending on the protocol type, we may want to accept
348 // a firstline with only one or two elements, e.g., for HTTP/0.9: 347 // a firstline with only one or two elements, e.g., for HTTP/0.9:
349 // GET\r\n 348 // GET\r\n
350 // or 349 // or
351 // GET /\r\n 350 // GET /\r\n
352 // should be parsed without issue (though the visitor should know that 351 // should be parsed without issue (though the visitor should know that
353 // parsing the entire line was not exactly as it should be). 352 // parsing the entire line was not exactly as it should be).
354 // 353 //
355 // Eventually, these errors may be removed alltogether, as the visitor can 354 // Eventually, these errors may be removed alltogether, as the visitor can
356 // detect them on its own by examining the size of the various fields. 355 // detect them on its own by examining the size of the various fields.
357 // headers->set_first_line(non_whitespace_1_idx_, current); 356 // headers->set_first_line(non_whitespace_1_idx_, current);
358 357
359 if (is_request) { 358 if (is_request) {
360 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > 359 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >
361 max_request_uri_length) { 360 max_request_uri_length) {
362 // For requests, we need at least the method. We could assume that a 361 // For requests, we need at least the method. We could assume that a
363 // blank URI means "/". If version isn't stated, it should be assumed 362 // blank URI means "/". If version isn't stated, it should be assumed
364 // to be HTTP/0.9 by the visitor. 363 // to be HTTP/0.9 by the visitor.
365 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; 364 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;
366 return false; 365 return false;
367 } 366 }
368 } else { 367 } else {
369 headers->parsed_response_code_ = 0; 368 headers->parsed_response_code_ = 0;
370 { 369 {
371 const char* parsed_response_code_current = 370 const char* parsed_response_code_current =
372 begin + headers->non_whitespace_2_idx_; 371 begin + headers->non_whitespace_2_idx_;
373 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; 372 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;
374 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; 373 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
375 374
376 // Convert a string of [0-9]* into an int. 375 // Convert a string of [0-9]* into an int.
377 // Note that this allows for the conversion of response codes which 376 // Note that this allows for the conversion of response codes which
378 // are outside the bounds of normal HTTP response codes (no checking 377 // are outside the bounds of normal HTTP response codes (no checking
379 // is done to ensure that these are valid-- they're merely parsed)! 378 // is done to ensure that these are valid-- they're merely parsed)!
380 while (parsed_response_code_current < parsed_response_code_end) { 379 while (parsed_response_code_current < parsed_response_code_end) {
381 if (*parsed_response_code_current < '0' || 380 if (*parsed_response_code_current < '0' ||
382 *parsed_response_code_current > '9') { 381 *parsed_response_code_current > '9') {
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
460 void BalsaFrame::CleanUpKeyValueWhitespace( 459 void BalsaFrame::CleanUpKeyValueWhitespace(
461 const char* stream_begin, 460 const char* stream_begin,
462 const char* line_begin, 461 const char* line_begin,
463 const char* current, 462 const char* current,
464 const char* line_end, 463 const char* line_end,
465 HeaderLineDescription* current_header_line) { 464 HeaderLineDescription* current_header_line) {
466 const char* colon_loc = current; 465 const char* colon_loc = current;
467 DCHECK_LT(colon_loc, line_end); 466 DCHECK_LT(colon_loc, line_end);
468 DCHECK_EQ(':', *colon_loc); 467 DCHECK_EQ(':', *colon_loc);
469 DCHECK_EQ(':', *current); 468 DCHECK_EQ(':', *current);
470 DCHECK_GE(' ', *line_end) 469 DCHECK_GE(' ', *line_end) << "\"" << std::string(line_begin, line_end)
471 << "\"" << std::string(line_begin, line_end) << "\""; 470 << "\"";
472 471
473 // TODO(fenix): Investigate whether or not the bounds tests in the 472 // TODO(fenix): Investigate whether or not the bounds tests in the
474 // while loops here are redundant, and if so, remove them. 473 // while loops here are redundant, and if so, remove them.
475 --current; 474 --current;
476 while (current > line_begin && *current <= ' ') --current; 475 while (current > line_begin && *current <= ' ')
476 --current;
477 current += (current != colon_loc); 477 current += (current != colon_loc);
478 current_header_line->key_end_idx = current - stream_begin; 478 current_header_line->key_end_idx = current - stream_begin;
479 479
480 current = colon_loc; 480 current = colon_loc;
481 DCHECK_EQ(':', *current); 481 DCHECK_EQ(':', *current);
482 ++current; 482 ++current;
483 while (current < line_end && *current <= ' ') ++current; 483 while (current < line_end && *current <= ' ')
484 ++current;
484 current_header_line->value_begin_idx = current - stream_begin; 485 current_header_line->value_begin_idx = current - stream_begin;
485 486
486 DCHECK_GE(current_header_line->key_end_idx, 487 DCHECK_GE(current_header_line->key_end_idx,
487 current_header_line->first_char_idx); 488 current_header_line->first_char_idx);
488 DCHECK_GE(current_header_line->value_begin_idx, 489 DCHECK_GE(current_header_line->value_begin_idx,
489 current_header_line->key_end_idx); 490 current_header_line->key_end_idx);
490 DCHECK_GE(current_header_line->last_char_idx, 491 DCHECK_GE(current_header_line->last_char_idx,
491 current_header_line->value_begin_idx); 492 current_header_line->value_begin_idx);
492 } 493 }
493 494
494 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() { 495 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {
495 DCHECK(!lines_.empty()); 496 DCHECK(!lines_.empty());
496 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 497 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
497 // The last line is always just a newline (and is uninteresting). 498 // The last line is always just a newline (and is uninteresting).
498 const Lines::size_type lines_size_m1 = lines_.size() - 1; 499 const Lines::size_type lines_size_m1 = lines_.size() - 1;
499 #if __SSE2__ 500 #if __SSE2__
500 const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':', 501 const __v16qi colons = {':', ':', ':', ':', ':', ':', ':', ':',
501 ':', ':', ':', ':', ':', ':', ':', ':'}; 502 ':', ':', ':', ':', ':', ':', ':', ':'};
502 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; 503 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;
503 #endif // __SSE2__ 504 #endif // __SSE2__
504 const char* current = stream_begin + lines_[1].first; 505 const char* current = stream_begin + lines_[1].first;
505 // This code is a bit more subtle than it may appear at first glance. 506 // This code is a bit more subtle than it may appear at first glance.
506 // This code looks for a colon in the current line... but it also looks 507 // This code looks for a colon in the current line... but it also looks
507 // beyond the current line. If there is no colon in the current line, then 508 // beyond the current line. If there is no colon in the current line, then
508 // for each subsequent line (until the colon which -has- been found is 509 // for each subsequent line (until the colon which -has- been found is
509 // associated with a line), no searching for a colon will be performed. In 510 // associated with a line), no searching for a colon will be performed. In
510 // this way, we minimize the amount of bytes we have scanned for a colon. 511 // this way, we minimize the amount of bytes we have scanned for a colon.
511 for (Lines::size_type i = 1; i < lines_size_m1;) { 512 for (Lines::size_type i = 1; i < lines_size_m1;) {
(...skipping 15 matching lines...) Expand all
527 } 528 }
528 const char* line_end = stream_begin + lines_[i - 1].second; 529 const char* line_end = stream_begin + lines_[i - 1].second;
529 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); 530 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
530 531
531 // We cleanup the whitespace at the end of the line before doing anything 532 // We cleanup the whitespace at the end of the line before doing anything
532 // else of interest as it allows us to do nothing when irregularly formatted 533 // else of interest as it allows us to do nothing when irregularly formatted
533 // headers are parsed (e.g. those with only keys, only values, or no colon). 534 // headers are parsed (e.g. those with only keys, only values, or no colon).
534 // 535 //
535 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. 536 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
536 --line_end; 537 --line_end;
537 DCHECK_EQ('\n', *line_end) 538 DCHECK_EQ('\n', *line_end) << "\"" << std::string(line_begin, line_end)
538 << "\"" << std::string(line_begin, line_end) << "\""; 539 << "\"";
539 while (*line_end <= ' ' && line_end > line_begin) { 540 while (*line_end <= ' ' && line_end > line_begin) {
540 --line_end; 541 --line_end;
541 } 542 }
542 ++line_end; 543 ++line_end;
543 DCHECK_GE(' ', *line_end); 544 DCHECK_GE(' ', *line_end);
544 DCHECK_LT(line_begin, line_end); 545 DCHECK_LT(line_begin, line_end);
545 546
546 // We use '0' for the block idx, because we're always writing to the first 547 // We use '0' for the block idx, because we're always writing to the first
547 // block from the framer (we do this because the framer requires that the 548 // block from the framer (we do this because the framer requires that the
548 // entire header sequence be in a contiguous buffer). 549 // entire header sequence be in a contiguous buffer).
(...skipping 12 matching lines...) Expand all
561 } else if (current < line_begin) { 562 } else if (current < line_begin) {
562 // When this condition is true, the last detected colon was part of a 563 // When this condition is true, the last detected colon was part of a
563 // previous line. We reset to the beginning of the line as we don't care 564 // previous line. We reset to the beginning of the line as we don't care
564 // about the presence of any colon before the beginning of the current 565 // about the presence of any colon before the beginning of the current
565 // line. 566 // line.
566 current = line_begin; 567 current = line_begin;
567 } 568 }
568 #if __SSE2__ 569 #if __SSE2__
569 while (current < header_lines_end_m16) { 570 while (current < header_lines_end_m16) {
570 __m128i header_bytes = 571 __m128i header_bytes =
571 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current)); 572 _mm_loadu_si128(reinterpret_cast<const __m128i*>(current));
572 __m128i colon_cmp = 573 __m128i colon_cmp =
573 _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons)); 574 _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons));
574 int colon_msk = _mm_movemask_epi8(colon_cmp); 575 int colon_msk = _mm_movemask_epi8(colon_cmp);
575 if (colon_msk == 0) { 576 if (colon_msk == 0) {
576 current += 16; 577 current += 16;
577 continue; 578 continue;
578 } 579 }
579 current += (ffs(colon_msk) - 1); 580 current += (ffs(colon_msk) - 1);
580 if (current > line_end) { 581 if (current > line_end) {
581 break; 582 break;
582 } 583 }
583 goto found_colon; 584 goto found_colon;
584 } 585 }
585 #endif // __SSE2__ 586 #endif // __SSE2__
586 for (; current < line_end; ++current) { 587 for (; current < line_end; ++current) {
587 if (*current != ':') { 588 if (*current != ':') {
588 continue; 589 continue;
589 } 590 }
590 goto found_colon; 591 goto found_colon;
591 } 592 }
592 // If we've gotten to here, then there was no colon 593 // If we've gotten to here, then there was no colon
593 // in the line. The arguments we passed into the construction 594 // in the line. The arguments we passed into the construction
594 // for the HeaderLineDescription object should be OK-- it assumes 595 // for the HeaderLineDescription object should be OK-- it assumes
595 // that the entire content is 'key' by default (which is true, as 596 // that the entire content is 'key' by default (which is true, as
596 // there was no colon, there can be no value). Note that this is a 597 // there was no colon, there can be no value). Note that this is a
597 // construct which is technically not allowed by the spec. 598 // construct which is technically not allowed by the spec.
598 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; 599 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
599 visitor_->HandleHeaderWarning(this); 600 visitor_->HandleHeaderWarning(this);
600 continue; 601 continue;
601 found_colon: 602 found_colon:
602 DCHECK_EQ(*current, ':'); 603 DCHECK_EQ(*current, ':');
603 DCHECK_LE(current - stream_begin, line_end - stream_begin); 604 DCHECK_LE(current - stream_begin, line_end - stream_begin);
604 DCHECK_LE(stream_begin - stream_begin, current - stream_begin); 605 DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
605 606
606 HeaderLineDescription& current_header_line = headers_->header_lines_.back(); 607 HeaderLineDescription& current_header_line = headers_->header_lines_.back();
607 current_header_line.key_end_idx = current - stream_begin; 608 current_header_line.key_end_idx = current - stream_begin;
608 current_header_line.value_begin_idx = current_header_line.key_end_idx; 609 current_header_line.value_begin_idx = current_header_line.key_end_idx;
609 if (current < line_end) { 610 if (current < line_end) {
610 ++current_header_line.key_end_idx; 611 ++current_header_line.key_end_idx;
611 612
612 CleanUpKeyValueWhitespace(stream_begin, 613 CleanUpKeyValueWhitespace(
613 line_begin, 614 stream_begin, line_begin, current, line_end, &current_header_line);
614 current,
615 line_end,
616 &current_header_line);
617 } 615 }
618 } 616 }
619 } 617 }
620 618
621 void BalsaFrame::ProcessContentLengthLine( 619 void BalsaFrame::ProcessContentLengthLine(
622 HeaderLines::size_type line_idx, 620 HeaderLines::size_type line_idx,
623 BalsaHeadersEnums::ContentLengthStatus* status, 621 BalsaHeadersEnums::ContentLengthStatus* status,
624 size_t* length) { 622 size_t* length) {
625 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; 623 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
626 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 624 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
627 const char* line_end = stream_begin + header_line.last_char_idx; 625 const char* line_end = stream_begin + header_line.last_char_idx;
628 const char* value_begin = (stream_begin + header_line.value_begin_idx); 626 const char* value_begin = (stream_begin + header_line.value_begin_idx);
629 627
630 if (value_begin >= line_end) { 628 if (value_begin >= line_end) {
631 // There is no non-whitespace value data. 629 // There is no non-whitespace value data.
632 #if DEBUGFRAMER 630 #if DEBUGFRAMER
633 LOG(INFO) << "invalid content-length -- no non-whitespace value data"; 631 LOG(INFO) << "invalid content-length -- no non-whitespace value data";
634 #endif 632 #endif
635 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; 633 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
636 return; 634 return;
637 } 635 }
638 636
639 *length = 0; 637 *length = 0;
640 while (value_begin < line_end) { 638 while (value_begin < line_end) {
641 if (*value_begin < '0' || *value_begin > '9') { 639 if (*value_begin < '0' || *value_begin > '9') {
642 // bad! content-length found, and couldn't parse all of it! 640 // bad! content-length found, and couldn't parse all of it!
643 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; 641 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
(...skipping 22 matching lines...) Expand all
666 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; 664 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;
667 } 665 }
668 666
669 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { 667 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
670 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; 668 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
671 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 669 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
672 const char* line_end = stream_begin + header_line.last_char_idx; 670 const char* line_end = stream_begin + header_line.last_char_idx;
673 const char* value_begin = stream_begin + header_line.value_begin_idx; 671 const char* value_begin = stream_begin + header_line.value_begin_idx;
674 size_t value_length = line_end - value_begin; 672 size_t value_length = line_end - value_begin;
675 673
676 if ((value_length == 7) && 674 if ((value_length == 7) && !strncasecmp(value_begin, "chunked", 7)) {
677 !strncasecmp(value_begin, "chunked", 7)) {
678 headers_->transfer_encoding_is_chunked_ = true; 675 headers_->transfer_encoding_is_chunked_ = true;
679 } else if ((value_length == 8) && 676 } else if ((value_length == 8) && !strncasecmp(value_begin, "identity", 8)) {
680 !strncasecmp(value_begin, "identity", 8)) {
681 headers_->transfer_encoding_is_chunked_ = false; 677 headers_->transfer_encoding_is_chunked_ = false;
682 } else { 678 } else {
683 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; 679 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;
684 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 680 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
685 visitor_->HandleHeaderError(this); 681 visitor_->HandleHeaderError(this);
686 return; 682 return;
687 } 683 }
688 } 684 }
689 685
690 namespace { 686 namespace {
691 bool SplitStringPiece(base::StringPiece original, char delim, 687 bool SplitStringPiece(base::StringPiece original,
692 base::StringPiece* before, base::StringPiece* after) { 688 char delim,
689 base::StringPiece* before,
690 base::StringPiece* after) {
693 const char* p = original.data(); 691 const char* p = original.data();
694 const char* end = p + original.size(); 692 const char* end = p + original.size();
695 693
696 while (p != end) { 694 while (p != end) {
697 if (*p == delim) { 695 if (*p == delim) {
698 ++p; 696 ++p;
699 } else { 697 } else {
700 const char* start = p; 698 const char* start = p;
701 while (++p != end && *p != delim) { 699 while (++p != end && *p != delim) {
702 // Skip to the next occurence of the delimiter. 700 // Skip to the next occurence of the delimiter.
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
740 738
741 extensions->AppendHeader(key, value); 739 extensions->AppendHeader(key, value);
742 740
743 StringPieceUtils::RemoveWhitespaceContext(&remaining); 741 StringPieceUtils::RemoveWhitespaceContext(&remaining);
744 SplitStringPiece(remaining, ';', &extension, &remaining); 742 SplitStringPiece(remaining, ';', &extension, &remaining);
745 } 743 }
746 } 744 }
747 745
748 } // anonymous namespace 746 } // anonymous namespace
749 747
750 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size, 748 void BalsaFrame::ProcessChunkExtensions(const char* input,
749 size_t size,
751 BalsaHeaders* extensions) { 750 BalsaHeaders* extensions) {
752 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions); 751 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);
753 } 752 }
754 753
755 void BalsaFrame::ProcessHeaderLines() { 754 void BalsaFrame::ProcessHeaderLines() {
756 HeaderLines::size_type content_length_idx = 0; 755 HeaderLines::size_type content_length_idx = 0;
757 HeaderLines::size_type transfer_encoding_idx = 0; 756 HeaderLines::size_type transfer_encoding_idx = 0;
758 757
759 DCHECK(!lines_.empty()); 758 DCHECK(!lines_.empty());
760 #if DEBUGFRAMER 759 #if DEBUGFRAMER
761 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; 760 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
762 #endif // DEBUGFRAMER 761 #endif // DEBUGFRAMER
763 762
764 // There is no need to attempt to process headers if no header lines exist. 763 // There is no need to attempt to process headers if no header lines exist.
765 // There are at least two lines in the message which are not header lines. 764 // There are at least two lines in the message which are not header lines.
766 // These two non-header lines are the first line of the message, and the 765 // These two non-header lines are the first line of the message, and the
767 // last line of the message (which is an empty line). 766 // last line of the message (which is an empty line).
768 // Thus, we test to see if we have more than two lines total before attempting 767 // Thus, we test to see if we have more than two lines total before attempting
769 // to parse any header lines. 768 // to parse any header lines.
770 if (lines_.size() > 2) { 769 if (lines_.size() > 2) {
771 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 770 const char* stream_begin = headers_->OriginalHeaderStreamBegin();
772 771
773 // Then, for the rest of the header data, we parse these into key-value 772 // Then, for the rest of the header data, we parse these into key-value
774 // pairs. 773 // pairs.
775 FindColonsAndParseIntoKeyValue(); 774 FindColonsAndParseIntoKeyValue();
776 // At this point, we've parsed all of the headers. Time to look for those 775 // At this point, we've parsed all of the headers. Time to look for those
777 // headers which we require for framing. 776 // headers which we require for framing.
778 const HeaderLines::size_type 777 const HeaderLines::size_type header_lines_size =
779 header_lines_size = headers_->header_lines_.size(); 778 headers_->header_lines_.size();
780 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { 779 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
781 const HeaderLineDescription& current_header_line = 780 const HeaderLineDescription& current_header_line =
782 headers_->header_lines_[i]; 781 headers_->header_lines_[i];
783 const char* key_begin = 782 const char* key_begin =
784 (stream_begin + current_header_line.first_char_idx); 783 (stream_begin + current_header_line.first_char_idx);
785 const char* key_end = (stream_begin + current_header_line.key_end_idx); 784 const char* key_end = (stream_begin + current_header_line.key_end_idx);
786 const size_t key_len = key_end - key_begin; 785 const size_t key_len = key_end - key_begin;
787 const char c = *key_begin; 786 const char c = *key_begin;
788 #if DEBUGFRAMER 787 #if DEBUGFRAMER
789 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len) 788 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)
790 << " c: '" << c << "' key_len: " << key_len; 789 << " c: '" << c << "' key_len: " << key_len;
791 #endif // DEBUGFRAMER 790 #endif // DEBUGFRAMER
792 // If a header begins with either lowercase or uppercase 'c' or 't', then 791 // If a header begins with either lowercase or uppercase 'c' or 't', then
793 // the header may be one of content-length, connection, content-encoding 792 // the header may be one of content-length, connection, content-encoding
794 // or transfer-encoding. These headers are special, as they change the way 793 // or transfer-encoding. These headers are special, as they change the way
795 // that the message is framed, and so the framer is required to search 794 // that the message is framed, and so the framer is required to search
796 // for them. 795 // for them.
797 796
798
799 if (c == 'c' || c == 'C') { 797 if (c == 'c' || c == 'C') {
800 if ((key_len == kContentLengthSize) && 798 if ((key_len == kContentLengthSize) &&
801 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { 799 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {
802 BalsaHeadersEnums::ContentLengthStatus content_length_status = 800 BalsaHeadersEnums::ContentLengthStatus content_length_status =
803 BalsaHeadersEnums::NO_CONTENT_LENGTH; 801 BalsaHeadersEnums::NO_CONTENT_LENGTH;
804 size_t length = 0; 802 size_t length = 0;
805 ProcessContentLengthLine(i, &content_length_status, &length); 803 ProcessContentLengthLine(i, &content_length_status, &length);
806 if (content_length_idx != 0) { // then we've already seen one! 804 if (content_length_idx != 0) { // then we've already seen one!
807 if ((headers_->content_length_status_ != content_length_status) || 805 if ((headers_->content_length_status_ != content_length_status) ||
808 ((headers_->content_length_status_ == 806 ((headers_->content_length_status_ ==
809 BalsaHeadersEnums::VALID_CONTENT_LENGTH) && 807 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
810 length != headers_->content_length_)) { 808 length != headers_->content_length_)) {
811 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; 809 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;
812 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 810 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
813 visitor_->HandleHeaderError(this); 811 visitor_->HandleHeaderError(this);
814 return; 812 return;
815 } 813 }
816 continue; 814 continue;
817 } else { 815 } else {
818 content_length_idx = i + 1; 816 content_length_idx = i + 1;
819 headers_->content_length_status_ = content_length_status; 817 headers_->content_length_status_ = content_length_status;
820 headers_->content_length_ = length; 818 headers_->content_length_ = length;
821 content_length_remaining_ = length; 819 content_length_remaining_ = length;
822 } 820 }
823
824 } 821 }
825 } else if (c == 't' || c == 'T') { 822 } else if (c == 't' || c == 'T') {
826 if ((key_len == kTransferEncodingSize) && 823 if ((key_len == kTransferEncodingSize) &&
827 0 == strncasecmp(key_begin, kTransferEncoding, 824 0 == strncasecmp(
828 kTransferEncodingSize)) { 825 key_begin, kTransferEncoding, kTransferEncodingSize)) {
829 if (transfer_encoding_idx != 0) { 826 if (transfer_encoding_idx != 0) {
830 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; 827 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;
831 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 828 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
832 visitor_->HandleHeaderError(this); 829 visitor_->HandleHeaderError(this);
833 return; 830 return;
834 } 831 }
835 transfer_encoding_idx = i + 1; 832 transfer_encoding_idx = i + 1;
836 } 833 }
837 } else if (i == 0 && (key_len == 0 || c == ' ')) { 834 } else if (i == 0 && (key_len == 0 || c == ' ')) {
838 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; 835 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;
(...skipping 11 matching lines...) Expand all
850 ProcessTransferEncodingLine(transfer_encoding_idx - 1); 847 ProcessTransferEncodingLine(transfer_encoding_idx - 1);
851 } 848 }
852 } 849 }
853 } 850 }
854 851
855 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { 852 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
856 // For responses, can't have a body if the request was a HEAD, or if it is 853 // For responses, can't have a body if the request was a HEAD, or if it is
857 // one of these response-codes. rfc2616 section 4.3 854 // one of these response-codes. rfc2616 section 4.3
858 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 855 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
859 if (is_request_ || 856 if (is_request_ ||
860 !(request_was_head_ || 857 !(request_was_head_ || (headers_->parsed_response_code_ >= 100 &&
861 (headers_->parsed_response_code_ >= 100 && 858 headers_->parsed_response_code_ < 200) ||
862 headers_->parsed_response_code_ < 200) ||
863 (headers_->parsed_response_code_ == 204) || 859 (headers_->parsed_response_code_ == 204) ||
864 (headers_->parsed_response_code_ == 304))) { 860 (headers_->parsed_response_code_ == 304))) {
865 // Then we can have a body. 861 // Then we can have a body.
866 if (headers_->transfer_encoding_is_chunked_) { 862 if (headers_->transfer_encoding_is_chunked_) {
867 // Note that 863 // Note that
868 // if ( Transfer-Encoding: chunked && Content-length: ) 864 // if ( Transfer-Encoding: chunked && Content-length: )
869 // then Transfer-Encoding: chunked trumps. 865 // then Transfer-Encoding: chunked trumps.
870 // This is as specified in the spec. 866 // This is as specified in the spec.
871 // rfc2616 section 4.4.3 867 // rfc2616 section 4.4.3
872 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; 868 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
(...skipping 14 matching lines...) Expand all
887 } 883 }
888 break; 884 break;
889 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: 885 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
890 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: 886 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
891 // If there were characters left-over after parsing the 887 // If there were characters left-over after parsing the
892 // content length, we should flag an error and stop. 888 // content length, we should flag an error and stop.
893 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 889 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
894 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; 890 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;
895 visitor_->HandleHeaderError(this); 891 visitor_->HandleHeaderError(this);
896 break; 892 break;
897 // We can have: no transfer-encoding, no content length, and no 893 // We can have: no transfer-encoding, no content length, and no
898 // connection: close... 894 // connection: close...
899 // Unfortunately, this case doesn't seem to be covered in the spec. 895 // Unfortunately, this case doesn't seem to be covered in the spec.
900 // We'll assume that the safest thing to do here is what the google 896 // We'll assume that the safest thing to do here is what the google
901 // binaries before 2008 already do, which is to assume that 897 // binaries before 2008 already do, which is to assume that
902 // everything until the connection is closed is body. 898 // everything until the connection is closed is body.
903 case BalsaHeadersEnums::NO_CONTENT_LENGTH: 899 case BalsaHeadersEnums::NO_CONTENT_LENGTH:
904 if (is_request_) { 900 if (is_request_) {
905 base::StringPiece method = headers_->request_method(); 901 base::StringPiece method = headers_->request_method();
906 // POSTs and PUTs should have a detectable body length. If they 902 // POSTs and PUTs should have a detectable body length. If they
907 // do not we consider it an error. 903 // do not we consider it an error.
908 if ((method.size() == 4 && 904 if ((method.size() == 4 &&
909 strncmp(method.data(), "POST", 4) == 0) || 905 strncmp(method.data(), "POST", 4) == 0) ||
910 (method.size() == 3 && 906 (method.size() == 3 && strncmp(method.data(), "PUT", 3) == 0)) {
911 strncmp(method.data(), "PUT", 3) == 0)) {
912 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 907 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
913 last_error_ = 908 last_error_ =
914 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; 909 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;
915 visitor_->HandleHeaderError(this); 910 visitor_->HandleHeaderError(this);
916 break; 911 break;
917 } 912 }
918 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 913 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
919 } else { 914 } else {
920 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; 915 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
921 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; 916 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;
922 visitor_->HandleHeaderWarning(this); 917 visitor_->HandleHeaderWarning(this);
923 } 918 }
924 break; 919 break;
925 // The COV_NF_... statements here provide hints to the apparatus 920 // The COV_NF_... statements here provide hints to the apparatus
926 // which computes coverage reports/ratios that this code is never 921 // which computes coverage reports/ratios that this code is never
927 // intended to be executed, and should technically be impossible. 922 // intended to be executed, and should technically be impossible.
928 // COV_NF_START 923 // COV_NF_START
929 default: 924 default:
930 LOG(FATAL) << "Saw a content_length_status: " 925 LOG(FATAL) << "Saw a content_length_status: "
931 << headers_->content_length_status_ << " which is unknown."; 926 << headers_->content_length_status_
927 << " which is unknown.";
932 // COV_NF_END 928 // COV_NF_END
933 } 929 }
934 } 930 }
935 } 931 }
936 } 932 }
937 933
938 size_t BalsaFrame::ProcessHeaders(const char* message_start, 934 size_t BalsaFrame::ProcessHeaders(const char* message_start,
939 size_t message_length) { 935 size_t message_length) {
940 const char* const original_message_start = message_start; 936 const char* const original_message_start = message_start;
941 const char* const message_end = message_start + message_length; 937 const char* const message_end = message_start + message_length;
(...skipping 22 matching lines...) Expand all
964 } else { 960 } else {
965 saw_non_newline_char_ = true; 961 saw_non_newline_char_ = true;
966 checkpoint = message_start = message_current; 962 checkpoint = message_start = message_current;
967 goto read_real_message; 963 goto read_real_message;
968 } 964 }
969 } 965 }
970 ++message_current; 966 ++message_current;
971 } while (message_current < message_end); 967 } while (message_current < message_end);
972 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks 968 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks
973 } else { 969 } else {
974 read_real_message: 970 read_real_message :
975 // Note that SSE2 can be enabled on certain piii platforms. 971 // Note that SSE2 can be enabled on certain piii platforms.
976 #if __SSE2__ 972 #if __SSE2__
977 { 973 {
978 const char* const message_end_m16 = message_end - 16; 974 const char* const message_end_m16 = message_end - 16;
979 __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 975 __v16qi newlines = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
980 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' }; 976 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
981 while (message_current < message_end_m16) { 977 while (message_current < message_end_m16) {
982 // What this does (using compiler intrinsics): 978 // What this does (using compiler intrinsics):
983 // 979 //
984 // Load 16 '\n's into an xmm register 980 // Load 16 '\n's into an xmm register
985 // Load 16 bytes of currennt message into an xmm register 981 // Load 16 bytes of currennt message into an xmm register
986 // Do byte-wise equals on those two xmm registers 982 // Do byte-wise equals on those two xmm registers
987 // Take the first bit of each byte, and put that into the first 983 // Take the first bit of each byte, and put that into the first
988 // 16 bits of a mask 984 // 16 bits of a mask
989 // If the mask is zero, no '\n' found. increment by 16 and try again 985 // If the mask is zero, no '\n' found. increment by 16 and try again
990 // Else scan forward to find the first set bit. 986 // Else scan forward to find the first set bit.
991 // Increment current by the index of the first set bit 987 // Increment current by the index of the first set bit
992 // (ffs returns index of first set bit + 1) 988 // (ffs returns index of first set bit + 1)
993 __m128i msg_bytes = 989 __m128i msg_bytes = _mm_loadu_si128(const_cast<__m128i*>(
994 _mm_loadu_si128(const_cast<__m128i *>( 990 reinterpret_cast<const __m128i*>(message_current)));
995 reinterpret_cast<const __m128i *>(message_current))); 991 __m128i newline_cmp =
996 __m128i newline_cmp =
997 _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines)); 992 _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines));
998 int newline_msk = _mm_movemask_epi8(newline_cmp); 993 int newline_msk = _mm_movemask_epi8(newline_cmp);
999 if (newline_msk == 0) { 994 if (newline_msk == 0) {
1000 message_current += 16; 995 message_current += 16;
1001 continue; 996 continue;
1002 } 997 }
1003 message_current += (ffs(newline_msk) - 1); 998 message_current += (ffs(newline_msk) - 1);
1004 const size_t relative_idx = message_current - message_start; 999 const size_t relative_idx = message_current - message_start;
1005 const size_t message_current_idx = 1 + base_idx + relative_idx; 1000 const size_t message_current_idx = 1 + base_idx + relative_idx;
1006 lines_.push_back(std::make_pair(last_slash_n_idx_, 1001 lines_.push_back(
1007 message_current_idx)); 1002 std::make_pair(last_slash_n_idx_, message_current_idx));
1008 if (lines_.size() == 1) { 1003 if (lines_.size() == 1) {
1009 headers_->WriteFromFramer(checkpoint, 1004 headers_->WriteFromFramer(checkpoint,
1010 1 + message_current - checkpoint); 1005 1 + message_current - checkpoint);
1011 checkpoint = message_current + 1; 1006 checkpoint = message_current + 1;
1012 const char* begin = headers_->OriginalHeaderStreamBegin(); 1007 const char* begin = headers_->OriginalHeaderStreamBegin();
1013 #if DEBUGFRAMER 1008 #if DEBUGFRAMER
1014 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); 1009 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1015 LOG(INFO) << "is_request_: " << is_request_; 1010 LOG(INFO) << "is_request_: " << is_request_;
1016 #endif 1011 #endif
1017 ProcessFirstLine(begin, begin + lines_[0].second); 1012 ProcessFirstLine(begin, begin + lines_[0].second);
1018 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) 1013 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1019 goto process_lines;
1020 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1021 goto bottom;
1022 }
1023 const size_t chars_since_last_slash_n = (message_current_idx -
1024 last_slash_n_idx_);
1025 last_slash_n_idx_ = message_current_idx;
1026 if (chars_since_last_slash_n > 2) {
1027 // We have a slash-n, but the last slash n was
1028 // more than 2 characters away from this. Thus, we know
1029 // that this cannot be an end-of-header.
1030 ++message_current;
1031 continue;
1032 }
1033 if ((chars_since_last_slash_n == 1) ||
1034 (((message_current > message_start) &&
1035 (*(message_current - 1) == '\r')) ||
1036 (last_char_was_slash_r_))) {
1037 goto process_lines; 1014 goto process_lines;
1038 } 1015 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1016 goto bottom;
1017 }
1018 const size_t chars_since_last_slash_n =
1019 (message_current_idx - last_slash_n_idx_);
1020 last_slash_n_idx_ = message_current_idx;
1021 if (chars_since_last_slash_n > 2) {
1022 // We have a slash-n, but the last slash n was
1023 // more than 2 characters away from this. Thus, we know
1024 // that this cannot be an end-of-header.
1039 ++message_current; 1025 ++message_current;
1026 continue;
1040 } 1027 }
1028 if ((chars_since_last_slash_n == 1) ||
1029 (((message_current > message_start) &&
1030 (*(message_current - 1) == '\r')) ||
1031 (last_char_was_slash_r_))) {
1032 goto process_lines;
1033 }
1034 ++message_current;
1041 } 1035 }
1036 }
1042 #endif // __SSE2__ 1037 #endif // __SSE2__
1043 while (message_current < message_end) { 1038 while (message_current < message_end) {
1044 if (*message_current != '\n') { 1039 if (*message_current != '\n') {
1045 ++message_current; 1040 ++message_current;
1046 continue; 1041 continue;
1047 } 1042 }
1048 const size_t relative_idx = message_current - message_start; 1043 const size_t relative_idx = message_current - message_start;
1049 const size_t message_current_idx = 1 + base_idx + relative_idx; 1044 const size_t message_current_idx = 1 + base_idx + relative_idx;
1050 lines_.push_back(std::make_pair(last_slash_n_idx_, 1045 lines_.push_back(
1051 message_current_idx)); 1046 std::make_pair(last_slash_n_idx_, message_current_idx));
1052 if (lines_.size() == 1) { 1047 if (lines_.size() == 1) {
1053 headers_->WriteFromFramer(checkpoint, 1048 headers_->WriteFromFramer(checkpoint,
1054 1 + message_current - checkpoint); 1049 1 + message_current - checkpoint);
1055 checkpoint = message_current + 1; 1050 checkpoint = message_current + 1;
1056 const char* begin = headers_->OriginalHeaderStreamBegin(); 1051 const char* begin = headers_->OriginalHeaderStreamBegin();
1057 #if DEBUGFRAMER 1052 #if DEBUGFRAMER
1058 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); 1053 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
1059 LOG(INFO) << "is_request_: " << is_request_; 1054 LOG(INFO) << "is_request_: " << is_request_;
1060 #endif 1055 #endif
1061 ProcessFirstLine(begin, begin + lines_[0].second); 1056 ProcessFirstLine(begin, begin + lines_[0].second);
1062 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) 1057 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
1063 goto process_lines; 1058 goto process_lines;
1064 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) 1059 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
1065 goto bottom; 1060 goto bottom;
1066 } 1061 }
1067 const size_t chars_since_last_slash_n = (message_current_idx - 1062 const size_t chars_since_last_slash_n =
1068 last_slash_n_idx_); 1063 (message_current_idx - last_slash_n_idx_);
1069 last_slash_n_idx_ = message_current_idx; 1064 last_slash_n_idx_ = message_current_idx;
1070 if (chars_since_last_slash_n > 2) { 1065 if (chars_since_last_slash_n > 2) {
1071 // false positive. 1066 // false positive.
1072 ++message_current; 1067 ++message_current;
1073 continue; 1068 continue;
1074 } 1069 }
1075 if ((chars_since_last_slash_n == 1) || 1070 if ((chars_since_last_slash_n == 1) ||
1076 (((message_current > message_start) && 1071 (((message_current > message_start) &&
1077 (*(message_current - 1) == '\r')) || 1072 (*(message_current - 1) == '\r')) ||
1078 (last_char_was_slash_r_))) { 1073 (last_char_was_slash_r_))) {
1079 goto process_lines; 1074 goto process_lines;
1080 } 1075 }
1081 ++message_current; 1076 ++message_current;
1082 } 1077 }
1083 } 1078 }
1084 continue; 1079 continue;
1085 process_lines: 1080 process_lines:
1086 ++message_current; 1081 ++message_current;
1087 DCHECK(message_current >= message_start); 1082 DCHECK(message_current >= message_start);
1088 if (message_current > message_start) { 1083 if (message_current > message_start) {
1089 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); 1084 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1090 } 1085 }
1091 1086
1092 // Check if we have exceeded maximum headers length 1087 // Check if we have exceeded maximum headers length
1093 // Although we check for this limit before and after we call this function 1088 // Although we check for this limit before and after we call this function
1094 // we check it here as well to make sure that in case the visitor changed 1089 // we check it here as well to make sure that in case the visitor changed
1095 // the max_header_length_ (for example after processing the first line) 1090 // the max_header_length_ (for example after processing the first line)
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1132 } 1127 }
1133 // If we've gotten to here, it means that we've consumed all of the 1128 // If we've gotten to here, it means that we've consumed all of the
1134 // available input. We need to record whether or not the last character we 1129 // available input. We need to record whether or not the last character we
1135 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds 1130 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
1136 // a header framing that is split across the two calls. 1131 // a header framing that is split across the two calls.
1137 last_char_was_slash_r_ = (*(message_end - 1) == '\r'); 1132 last_char_was_slash_r_ = (*(message_end - 1) == '\r');
1138 DCHECK(message_current >= message_start); 1133 DCHECK(message_current >= message_start);
1139 if (message_current > message_start) { 1134 if (message_current > message_start) {
1140 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); 1135 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
1141 } 1136 }
1142 bottom: 1137 bottom:
1143 return message_current - original_message_start; 1138 return message_current - original_message_start;
1144 } 1139 }
1145 1140
1146
1147 size_t BalsaFrame::BytesSafeToSplice() const { 1141 size_t BalsaFrame::BytesSafeToSplice() const {
1148 switch (parse_state_) { 1142 switch (parse_state_) {
1149 case BalsaFrameEnums::READING_CHUNK_DATA: 1143 case BalsaFrameEnums::READING_CHUNK_DATA:
1150 return chunk_length_remaining_; 1144 return chunk_length_remaining_;
1151 case BalsaFrameEnums::READING_UNTIL_CLOSE: 1145 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1152 return std::numeric_limits<size_t>::max(); 1146 return std::numeric_limits<size_t>::max();
1153 case BalsaFrameEnums::READING_CONTENT: 1147 case BalsaFrameEnums::READING_CONTENT:
1154 return content_length_remaining_; 1148 return content_length_remaining_;
1155 default: 1149 default:
1156 return 0; 1150 return 0;
1157 } 1151 }
1158 } 1152 }
1159 1153
1160 void BalsaFrame::BytesSpliced(size_t bytes_spliced) { 1154 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
1161 switch (parse_state_) { 1155 switch (parse_state_) {
1162 case BalsaFrameEnums::READING_CHUNK_DATA: 1156 case BalsaFrameEnums::READING_CHUNK_DATA:
1163 if (chunk_length_remaining_ >= bytes_spliced) { 1157 if (chunk_length_remaining_ >= bytes_spliced) {
1164 chunk_length_remaining_ -= bytes_spliced; 1158 chunk_length_remaining_ -= bytes_spliced;
1165 if (chunk_length_remaining_ == 0) { 1159 if (chunk_length_remaining_ == 0) {
1166 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; 1160 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1167 } 1161 }
1168 return; 1162 return;
1169 } else { 1163 } else {
1170 last_error_ = 1164 last_error_ = BalsaFrameEnums::
1171 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; 1165 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1172 goto error_exit; 1166 goto error_exit;
1173 } 1167 }
1174 1168
1175 case BalsaFrameEnums::READING_UNTIL_CLOSE: 1169 case BalsaFrameEnums::READING_UNTIL_CLOSE:
1176 return; 1170 return;
1177 1171
1178 case BalsaFrameEnums::READING_CONTENT: 1172 case BalsaFrameEnums::READING_CONTENT:
1179 if (content_length_remaining_ >= bytes_spliced) { 1173 if (content_length_remaining_ >= bytes_spliced) {
1180 content_length_remaining_ -= bytes_spliced; 1174 content_length_remaining_ -= bytes_spliced;
1181 if (content_length_remaining_ == 0) { 1175 if (content_length_remaining_ == 0) {
1182 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1176 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1183 visitor_->MessageDone(); 1177 visitor_->MessageDone();
1184 } 1178 }
1185 return; 1179 return;
1186 } else { 1180 } else {
1187 last_error_ = 1181 last_error_ = BalsaFrameEnums::
1188 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; 1182 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1189 goto error_exit; 1183 goto error_exit;
1190 } 1184 }
1191 1185
1192 default: 1186 default:
1193 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; 1187 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;
1194 goto error_exit; 1188 goto error_exit;
1195 } 1189 }
1196 1190
1197 error_exit: 1191 error_exit:
1198 parse_state_ = BalsaFrameEnums::PARSE_ERROR; 1192 parse_state_ = BalsaFrameEnums::PARSE_ERROR;
1199 visitor_->HandleBodyError(this); 1193 visitor_->HandleBodyError(this);
1200 }; 1194 };
1201 1195
1202 // You may note that the state-machine contained within this function has both 1196 // You may note that the state-machine contained within this function has both
1203 // switch and goto labels for nearly the same thing. For instance, the 1197 // switch and goto labels for nearly the same thing. For instance, the
1204 // following two labels refer to the same code block: 1198 // following two labels refer to the same code block:
1205 // label_reading_chunk_data: 1199 // label_reading_chunk_data:
1206 // case BalsaFrameEnums::READING_CHUNK_DATA: 1200 // case BalsaFrameEnums::READING_CHUNK_DATA:
1207 // The 'case' statement is required for the switch statement which occurs when 1201 // The 'case' statement is required for the switch statement which occurs when
1208 // ProcessInput is invoked. The goto label is required as the state-machine 1202 // ProcessInput is invoked. The goto label is required as the state-machine
1209 // does not use a computed goto in any subsequent operations. 1203 // does not use a computed goto in any subsequent operations.
1210 // 1204 //
1211 // Since several states exit the state machine for various reasons, there is 1205 // Since several states exit the state machine for various reasons, there is
1212 // also one label at the bottom of the function. When it is appropriate to 1206 // also one label at the bottom of the function. When it is appropriate to
1213 // return from the function, that part of the state machine instead issues a 1207 // return from the function, that part of the state machine instead issues a
1214 // goto bottom; This results in less code duplication, and makes debugging 1208 // goto bottom; This results in less code duplication, and makes debugging
1215 // easier (as you can add a statement to a section of code which is guaranteed 1209 // easier (as you can add a statement to a section of code which is guaranteed
1216 // to be invoked when the function is exiting. 1210 // to be invoked when the function is exiting.
1217 size_t BalsaFrame::ProcessInput(const char* input, size_t size) { 1211 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
1218 const char* current = input; 1212 const char* current = input;
1219 const char* on_entry = current; 1213 const char* on_entry = current;
1220 const char* end = current + size; 1214 const char* end = current + size;
1221 #if DEBUGFRAMER 1215 #if DEBUGFRAMER
1222 LOG(INFO) << "\n==============" 1216 LOG(INFO) << "\n==============" << BalsaFrameEnums::ParseStateToString(
1223 << BalsaFrameEnums::ParseStateToString(parse_state_) 1217 parse_state_) << "===============\n";
1224 << "===============\n";
1225 #endif // DEBUGFRAMER 1218 #endif // DEBUGFRAMER
1226 1219
1227 DCHECK(headers_ != NULL); 1220 DCHECK(headers_ != NULL);
1228 if (headers_ == NULL) return 0; 1221 if (headers_ == NULL)
1222 return 0;
1229 1223
1230 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { 1224 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1231 const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); 1225 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
1232 // Yes, we still have to check this here as the user can change the 1226 // Yes, we still have to check this here as the user can change the
1233 // max_header_length amount! 1227 // max_header_length amount!
1234 // Also it is possible that we have reached the maximum allowed header size, 1228 // Also it is possible that we have reached the maximum allowed header size,
1235 // and we have more to consume (remember we are still inside 1229 // and we have more to consume (remember we are still inside
1236 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. 1230 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
1237 if (header_length > max_header_length_ || 1231 if (header_length > max_header_length_ ||
1238 (header_length == max_header_length_ && size > 0)) { 1232 (header_length == max_header_length_ && size > 0)) {
(...skipping 24 matching lines...) Expand all
1263 } 1257 }
1264 goto bottom; 1258 goto bottom;
1265 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || 1259 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1266 parse_state_ == BalsaFrameEnums::PARSE_ERROR) { 1260 parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1267 // Can do nothing more 'till we're reset. 1261 // Can do nothing more 'till we're reset.
1268 goto bottom; 1262 goto bottom;
1269 } 1263 }
1270 1264
1271 while (current < end) { 1265 while (current < end) {
1272 switch (parse_state_) { 1266 switch (parse_state_) {
1273 label_reading_chunk_length: 1267 label_reading_chunk_length:
1274 case BalsaFrameEnums::READING_CHUNK_LENGTH: 1268 case BalsaFrameEnums::READING_CHUNK_LENGTH:
1275 // In this state we read the chunk length. 1269 // In this state we read the chunk length.
1276 // Note that once we hit a character which is not in: 1270 // Note that once we hit a character which is not in:
1277 // [0-9;A-Fa-f\n], we transition to a different state. 1271 // [0-9;A-Fa-f\n], we transition to a different state.
1278 // 1272 //
1279 { 1273 {
1280 // If we used strtol, etc, we'd have to buffer this line. 1274 // If we used strtol, etc, we'd have to buffer this line.
1281 // This is more annoying than simply doing the conversion 1275 // This is more annoying than simply doing the conversion
1282 // here. This code accounts for overflow. 1276 // here. This code accounts for overflow.
1283 static const signed char buf[] = { 1277 static const signed char buf[] = {
1284 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f 1278 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f
1285 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, 1279 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
1286 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f 1280 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f
1287 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1288 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f 1282 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f
1289 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1283 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1290 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f 1284 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f
1291 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, 1285 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,
1292 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f 1286 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f
1293 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1287 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1294 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f 1288 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f
1295 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1289 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1296 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f 1290 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f
1297 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1291 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1298 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f 1292 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f
1299 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1293 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1300 }; 1294 };
1301 // valid cases: 1295 // valid cases:
1302 // "09123\n" // -> 09123 1296 // "09123\n" // -> 09123
1303 // "09123\r\n" // -> 09123 1297 // "09123\r\n" // -> 09123
1304 // "09123 \n" // -> 09123 1298 // "09123 \n" // -> 09123
1305 // "09123 \r\n" // -> 09123 1299 // "09123 \r\n" // -> 09123
1306 // "09123 12312\n" // -> 09123 1300 // "09123 12312\n" // -> 09123
1307 // "09123 12312\r\n" // -> 09123 1301 // "09123 12312\r\n" // -> 09123
1308 // "09123; foo=bar\n" // -> 09123 1302 // "09123; foo=bar\n" // -> 09123
1309 // "09123; foo=bar\r\n" // -> 09123 1303 // "09123; foo=bar\r\n" // -> 09123
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1349 1343
1350 --current; 1344 --current;
1351 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; 1345 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1352 visitor_->ProcessChunkLength(chunk_length_remaining_); 1346 visitor_->ProcessChunkLength(chunk_length_remaining_);
1353 goto label_reading_chunk_extension; 1347 goto label_reading_chunk_extension;
1354 } 1348 }
1355 } 1349 }
1356 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1350 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1357 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH 1351 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH
1358 1352
1359 label_reading_chunk_extension: 1353 label_reading_chunk_extension:
1360 case BalsaFrameEnums::READING_CHUNK_EXTENSION: 1354 case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
1361 { 1355 // TODO(phython): Convert this scanning to be 16 bytes at a time if
1362 // TODO(phython): Convert this scanning to be 16 bytes at a time if 1356 // there is data to be read.
1363 // there is data to be read. 1357 const char* extensions_start = current;
1364 const char* extensions_start = current; 1358 size_t extensions_length = 0;
1365 size_t extensions_length = 0; 1359 while (current < end) {
1366 while (current < end) { 1360 const char c = *current;
1367 const char c = *current; 1361 if (c == '\r' || c == '\n') {
1368 if (c == '\r' || c == '\n') { 1362 extensions_length = (extensions_start == current)
1369 extensions_length = 1363 ? 0
1370 (extensions_start == current) ? 1364 : current - extensions_start - 1;
1371 0 : 1365 }
1372 current - extensions_start - 1; 1366
1367 ++current;
1368 if (c == '\n') {
1369 chunk_length_character_extracted_ = false;
1370 visitor_->ProcessChunkExtensions(extensions_start,
1371 extensions_length);
1372 if (chunk_length_remaining_ != 0) {
1373 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1374 goto label_reading_chunk_data;
1373 } 1375 }
1374 1376 HeaderFramingFound('\n');
1375 ++current; 1377 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1376 if (c == '\n') { 1378 goto label_reading_last_chunk_term;
1377 chunk_length_character_extracted_ = false;
1378 visitor_->ProcessChunkExtensions(
1379 extensions_start, extensions_length);
1380 if (chunk_length_remaining_ != 0) {
1381 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
1382 goto label_reading_chunk_data;
1383 }
1384 HeaderFramingFound('\n');
1385 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1386 goto label_reading_last_chunk_term;
1387 }
1388 } 1379 }
1389 visitor_->ProcessChunkExtensions(
1390 extensions_start, extensions_length);
1391 } 1380 }
1381 visitor_->ProcessChunkExtensions(extensions_start, extensions_length);
1382 }
1392 1383
1393 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1384 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1394 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION 1385 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION
1395 1386
1396 label_reading_chunk_data: 1387 label_reading_chunk_data:
1397 case BalsaFrameEnums::READING_CHUNK_DATA: 1388 case BalsaFrameEnums::READING_CHUNK_DATA:
1398 while (current < end) { 1389 while (current < end) {
1399 if (chunk_length_remaining_ == 0) { 1390 if (chunk_length_remaining_ == 0) {
1400 break; 1391 break;
1401 } 1392 }
1402 // read in the chunk 1393 // read in the chunk
1403 size_t bytes_remaining = end - current; 1394 size_t bytes_remaining = end - current;
1404 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ? 1395 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
1405 chunk_length_remaining_ : bytes_remaining; 1396 ? chunk_length_remaining_
1397 : bytes_remaining;
1406 const char* tmp_current = current + consumed_bytes; 1398 const char* tmp_current = current + consumed_bytes;
1407 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); 1399 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);
1408 visitor_->ProcessBodyData(current, consumed_bytes); 1400 visitor_->ProcessBodyData(current, consumed_bytes);
1409 on_entry = current = tmp_current; 1401 on_entry = current = tmp_current;
1410 chunk_length_remaining_ -= consumed_bytes; 1402 chunk_length_remaining_ -= consumed_bytes;
1411 } 1403 }
1412 if (chunk_length_remaining_ == 0) { 1404 if (chunk_length_remaining_ == 0) {
1413 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; 1405 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
1414 goto label_reading_chunk_term; 1406 goto label_reading_chunk_term;
1415 } 1407 }
1416 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1408 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1417 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA 1409 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA
1418 1410
1419 label_reading_chunk_term: 1411 label_reading_chunk_term:
1420 case BalsaFrameEnums::READING_CHUNK_TERM: 1412 case BalsaFrameEnums::READING_CHUNK_TERM:
1421 while (current < end) { 1413 while (current < end) {
1422 const char c = *current; 1414 const char c = *current;
1423 ++current; 1415 ++current;
1424 1416
1425 if (c == '\n') { 1417 if (c == '\n') {
1426 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; 1418 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1427 goto label_reading_chunk_length; 1419 goto label_reading_chunk_length;
1428 } 1420 }
1429 } 1421 }
1430 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1422 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1431 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM 1423 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM
1432 1424
1433 label_reading_last_chunk_term: 1425 label_reading_last_chunk_term:
1434 case BalsaFrameEnums::READING_LAST_CHUNK_TERM: 1426 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1435 while (current < end) { 1427 while (current < end) {
1436 const char c = *current; 1428 const char c = *current;
1437 1429
1438 if (!HeaderFramingFound(c)) { 1430 if (!HeaderFramingFound(c)) {
1439 // If not, however, since the spec only suggests that the 1431 // If not, however, since the spec only suggests that the
1440 // client SHOULD indicate the presence of trailers, we get to 1432 // client SHOULD indicate the presence of trailers, we get to
1441 // *test* that they did or didn't. 1433 // *test* that they did or didn't.
1442 // If all of the bytes we've seen since: 1434 // If all of the bytes we've seen since:
1443 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF 1435 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
(...skipping 21 matching lines...) Expand all
1465 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1457 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1466 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1458 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1467 visitor_->MessageDone(); 1459 visitor_->MessageDone();
1468 goto bottom; 1460 goto bottom;
1469 } 1461 }
1470 break; // from while loop 1462 break; // from while loop
1471 } 1463 }
1472 visitor_->ProcessBodyInput(on_entry, current - on_entry); 1464 visitor_->ProcessBodyInput(on_entry, current - on_entry);
1473 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM 1465 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM
1474 1466
1475 label_reading_trailer: 1467 label_reading_trailer:
1476 case BalsaFrameEnums::READING_TRAILER: 1468 case BalsaFrameEnums::READING_TRAILER:
1477 while (current < end) { 1469 while (current < end) {
1478 const char c = *current; 1470 const char c = *current;
1479 ++current; 1471 ++current;
1480 // TODO(fenix): If we ever care about trailers as part of framing, 1472 // TODO(fenix): If we ever care about trailers as part of framing,
1481 // deal with them here (see below for part of the 'solution') 1473 // deal with them here (see below for part of the 'solution')
1482 // if (LineFramingFound(c)) { 1474 // if (LineFramingFound(c)) {
1483 // trailer_lines_.push_back(make_pair(start_of_line_, 1475 // trailer_lines_.push_back(make_pair(start_of_line_,
1484 // trailer_length_ - 1)); 1476 // trailer_length_ - 1));
1485 // start_of_line_ = trailer_length_; 1477 // start_of_line_ = trailer_length_;
1486 // } 1478 // }
1487 if (HeaderFramingFound(c)) { 1479 if (HeaderFramingFound(c)) {
1488 // ProcessTrailers(visitor_, &trailers_); 1480 // ProcessTrailers(visitor_, &trailers_);
1489 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1481 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1490 visitor_->ProcessTrailerInput(on_entry, current - on_entry); 1482 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1491 visitor_->MessageDone(); 1483 visitor_->MessageDone();
1492 goto bottom; 1484 goto bottom;
1493 } 1485 }
1494 } 1486 }
1495 visitor_->ProcessTrailerInput(on_entry, current - on_entry); 1487 visitor_->ProcessTrailerInput(on_entry, current - on_entry);
1496 break; // case BalsaFrameEnums::READING_TRAILER 1488 break; // case BalsaFrameEnums::READING_TRAILER
1497 1489
1498 // Note that there is no label: 1490 // Note that there is no label:
1499 // 'label_reading_until_close' 1491 // 'label_reading_until_close'
1500 // here. This is because the state-machine exists immediately after 1492 // here. This is because the state-machine exists immediately after
1501 // reading the headers instead of transitioning here (as it would 1493 // reading the headers instead of transitioning here (as it would
1502 // do if it was consuming all the data it could, all the time). 1494 // do if it was consuming all the data it could, all the time).
1503 case BalsaFrameEnums::READING_UNTIL_CLOSE: 1495 case BalsaFrameEnums::READING_UNTIL_CLOSE: {
1504 { 1496 const size_t bytes_remaining = end - current;
1505 const size_t bytes_remaining = end - current; 1497 if (bytes_remaining > 0) {
1506 if (bytes_remaining > 0) { 1498 visitor_->ProcessBodyInput(current, bytes_remaining);
1507 visitor_->ProcessBodyInput(current, bytes_remaining); 1499 visitor_->ProcessBodyData(current, bytes_remaining);
1508 visitor_->ProcessBodyData(current, bytes_remaining); 1500 current += bytes_remaining;
1509 current += bytes_remaining;
1510 }
1511 } 1501 }
1502 }
1512 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE 1503 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE
1513 1504
1514 // label_reading_content: 1505 // label_reading_content:
1515 case BalsaFrameEnums::READING_CONTENT: 1506 case BalsaFrameEnums::READING_CONTENT:
1516 #if DEBUGFRAMER 1507 #if DEBUGFRAMER
1517 LOG(INFO) << "ReadingContent: " << content_length_remaining_; 1508 LOG(INFO) << "ReadingContent: " << content_length_remaining_;
1518 #endif // DEBUGFRAMER 1509 #endif // DEBUGFRAMER
1519 while (content_length_remaining_ && current < end) { 1510 while (content_length_remaining_ && current < end) {
1520 // read in the content 1511 // read in the content
1521 const size_t bytes_remaining = end - current; 1512 const size_t bytes_remaining = end - current;
1522 const size_t consumed_bytes = 1513 const size_t consumed_bytes =
1523 (content_length_remaining_ < bytes_remaining) ? 1514 (content_length_remaining_ < bytes_remaining)
1524 content_length_remaining_ : bytes_remaining; 1515 ? content_length_remaining_
1516 : bytes_remaining;
1525 visitor_->ProcessBodyInput(current, consumed_bytes); 1517 visitor_->ProcessBodyInput(current, consumed_bytes);
1526 visitor_->ProcessBodyData(current, consumed_bytes); 1518 visitor_->ProcessBodyData(current, consumed_bytes);
1527 current += consumed_bytes; 1519 current += consumed_bytes;
1528 content_length_remaining_ -= consumed_bytes; 1520 content_length_remaining_ -= consumed_bytes;
1529 } 1521 }
1530 if (content_length_remaining_ == 0) { 1522 if (content_length_remaining_ == 0) {
1531 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1523 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1532 visitor_->MessageDone(); 1524 visitor_->MessageDone();
1533 } 1525 }
1534 goto bottom; // case BalsaFrameEnums::READING_CONTENT 1526 goto bottom; // case BalsaFrameEnums::READING_CONTENT
1535 1527
1536 default: 1528 default:
1537 // The state-machine should never be in a state that isn't handled 1529 // The state-machine should never be in a state that isn't handled
1538 // above. This is a glaring logic error, and we should do something 1530 // above. This is a glaring logic error, and we should do something
1539 // drastic to ensure that this gets looked-at and fixed. 1531 // drastic to ensure that this gets looked-at and fixed.
1540 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE 1532 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
1541 << " memory corruption?!"; // COV_NF_LINE 1533 << " memory corruption?!"; // COV_NF_LINE
1542 } 1534 }
1543 } 1535 }
1544 bottom: 1536 bottom:
1545 #if DEBUGFRAMER 1537 #if DEBUGFRAMER
1546 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" 1538 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
1547 << std::string(input, current) 1539 << std::string(input, current) << "\n$$$$$$$$$$$$$$"
1548 << "\n$$$$$$$$$$$$$$" 1540 << BalsaFrameEnums::ParseStateToString(parse_state_)
1549 << BalsaFrameEnums::ParseStateToString(parse_state_) 1541 << "$$$$$$$$$$$$$$$"
1550 << "$$$$$$$$$$$$$$$" 1542 << " consumed: " << (current - input);
1551 << " consumed: " << (current - input);
1552 if (Error()) { 1543 if (Error()) {
1553 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); 1544 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());
1554 } 1545 }
1555 #endif // DEBUGFRAMER 1546 #endif // DEBUGFRAMER
1556 return current - input; 1547 return current - input;
1557 } 1548 }
1558 1549
1559 const uint32 BalsaFrame::kValidTerm1; 1550 const uint32 BalsaFrame::kValidTerm1;
1560 const uint32 BalsaFrame::kValidTerm1Mask; 1551 const uint32 BalsaFrame::kValidTerm1Mask;
1561 const uint32 BalsaFrame::kValidTerm2; 1552 const uint32 BalsaFrame::kValidTerm2;
1562 const uint32 BalsaFrame::kValidTerm2Mask; 1553 const uint32 BalsaFrame::kValidTerm2Mask;
1563 1554
1564 } // namespace net 1555 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698