OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/tools/balsa/balsa_frame.h" | 5 #include "net/tools/balsa/balsa_frame.h" |
6 | 6 |
7 #include <assert.h> | 7 #include <assert.h> |
8 #if __SSE2__ | 8 #if __SSE2__ |
9 #include <emmintrin.h> | 9 #include <emmintrin.h> |
10 #endif // __SSE2__ | 10 #endif // __SSE2__ |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 content_length_remaining_(0), | 49 content_length_remaining_(0), |
50 last_slash_n_loc_(NULL), | 50 last_slash_n_loc_(NULL), |
51 last_recorded_slash_n_loc_(NULL), | 51 last_recorded_slash_n_loc_(NULL), |
52 last_slash_n_idx_(0), | 52 last_slash_n_idx_(0), |
53 term_chars_(0), | 53 term_chars_(0), |
54 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), | 54 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), |
55 last_error_(BalsaFrameEnums::NO_ERROR), | 55 last_error_(BalsaFrameEnums::NO_ERROR), |
56 headers_(NULL) { | 56 headers_(NULL) { |
57 } | 57 } |
58 | 58 |
59 BalsaFrame::~BalsaFrame() {} | 59 BalsaFrame::~BalsaFrame() { |
| 60 } |
60 | 61 |
61 void BalsaFrame::Reset() { | 62 void BalsaFrame::Reset() { |
62 last_char_was_slash_r_ = false; | 63 last_char_was_slash_r_ = false; |
63 saw_non_newline_char_ = false; | 64 saw_non_newline_char_ = false; |
64 start_was_space_ = true; | 65 start_was_space_ = true; |
65 chunk_length_character_extracted_ = false; | 66 chunk_length_character_extracted_ = false; |
66 // is_request_ = true; // not reset between messages. | 67 // is_request_ = true; // not reset between messages. |
67 // request_was_head_ = false; // not reset between messages. | 68 // request_was_head_ = false; // not reset between messages. |
68 // max_header_length_ = 4096; // not reset between messages. | 69 // max_header_length_ = 4096; // not reset between messages. |
69 // max_request_uri_length_ = 2048; // not reset between messages. | 70 // max_request_uri_length_ = 2048; // not reset between messages. |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
259 // The two following statements should not be possible. | 260 // The two following statements should not be possible. |
260 if (end == begin) { | 261 if (end == begin) { |
261 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; | 262 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; |
262 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" | 263 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" |
263 << headers->OriginalHeadersForDebugging(); | 264 << headers->OriginalHeadersForDebugging(); |
264 return false; | 265 return false; |
265 } | 266 } |
266 | 267 |
267 // whitespace_1_idx_ | 268 // whitespace_1_idx_ |
268 headers->whitespace_1_idx_ = current - begin; | 269 headers->whitespace_1_idx_ = current - begin; |
269 // This loop is commented out as it is never used in current code. This is | 270 // This loop is commented out as it is never used in current code. This is |
270 // true only because we don't begin parsing the headers at all until we've | 271 // true only because we don't begin parsing the headers at all until we've |
271 // encountered a non whitespace character at the beginning of the stream, at | 272 // encountered a non whitespace character at the beginning of the stream, at |
272 // which point we begin our demarcation of header-start. If we did -not- do | 273 // which point we begin our demarcation of header-start. If we did -not- do |
273 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop | 274 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop |
274 // would be necessary for the proper functioning of this parsing. | 275 // would be necessary for the proper functioning of this parsing. |
275 // This is left here as this function may (in the future) be refactored out | 276 // This is left here as this function may (in the future) be refactored out |
276 // of the BalsaFrame class so that it may be shared between code in | 277 // of the BalsaFrame class so that it may be shared between code in |
277 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the | 278 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the |
278 // set_first_line() function (at which point it would be necessary). | 279 // set_first_line() function (at which point it would be necessary). |
279 #if 0 | 280 #if 0 |
280 while (*current <= ' ') { | 281 while (*current <= ' ') { |
281 ++current; | 282 ++current; |
282 } | 283 } |
283 #endif | 284 #endif |
284 // non_whitespace_1_idx_ | 285 // non_whitespace_1_idx_ |
285 headers->non_whitespace_1_idx_ = current - begin; | 286 headers->non_whitespace_1_idx_ = current - begin; |
286 do { | 287 do { |
287 // The first time through, we're guaranteed that the current character | 288 // The first time through, we're guaranteed that the current character |
288 // won't be a whitespace (else the loop above wouldn't have terminated). | 289 // won't be a whitespace (else the loop above wouldn't have terminated). |
289 // That implies that we're guaranteed to get at least one non-whitespace | 290 // That implies that we're guaranteed to get at least one non-whitespace |
290 // character if we get into this loop at all. | 291 // character if we get into this loop at all. |
291 ++current; | 292 ++current; |
292 if (current == end) { | 293 if (current == end) { |
293 headers->whitespace_2_idx_ = current - begin; | 294 headers->whitespace_2_idx_ = current - begin; |
294 headers->non_whitespace_2_idx_ = current - begin; | 295 headers->non_whitespace_2_idx_ = current - begin; |
295 headers->whitespace_3_idx_ = current - begin; | 296 headers->whitespace_3_idx_ = current - begin; |
296 headers->non_whitespace_3_idx_ = current - begin; | 297 headers->non_whitespace_3_idx_ = current - begin; |
297 headers->whitespace_4_idx_ = current - begin; | 298 headers->whitespace_4_idx_ = current - begin; |
298 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request | 299 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request |
299 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response | 300 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response |
300 *error_code = | 301 *error_code = static_cast<BalsaFrameEnums::ErrorCode>( |
301 static_cast<BalsaFrameEnums::ErrorCode>( | 302 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + |
302 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + | 303 is_request); |
303 is_request); | |
304 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION | 304 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION |
305 return false; | 305 return false; |
306 } | 306 } |
307 goto output_exhausted; | 307 goto output_exhausted; |
308 } | 308 } |
309 } while (*current > ' '); | 309 } while (*current > ' '); |
310 // whitespace_2_idx_ | 310 // whitespace_2_idx_ |
311 headers->whitespace_2_idx_ = current - begin; | 311 headers->whitespace_2_idx_ = current - begin; |
312 do { | 312 do { |
313 ++current; | 313 ++current; |
314 // Note that due to the loop which consumes all of the whitespace | 314 // Note that due to the loop which consumes all of the whitespace |
315 // at the end of the line, current can never == end while in this function. | 315 // at the end of the line, current can never == end while in this function. |
316 } while (*current <= ' '); | 316 } while (*current <= ' '); |
317 // non_whitespace_2_idx_ | 317 // non_whitespace_2_idx_ |
318 headers->non_whitespace_2_idx_ = current - begin; | 318 headers->non_whitespace_2_idx_ = current - begin; |
319 do { | 319 do { |
320 ++current; | 320 ++current; |
321 if (current == end) { | 321 if (current == end) { |
322 headers->whitespace_3_idx_ = current - begin; | 322 headers->whitespace_3_idx_ = current - begin; |
323 headers->non_whitespace_3_idx_ = current - begin; | 323 headers->non_whitespace_3_idx_ = current - begin; |
324 headers->whitespace_4_idx_ = current - begin; | 324 headers->whitespace_4_idx_ = current - begin; |
325 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request | 325 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request |
326 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response | 326 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response |
327 *error_code = | 327 *error_code = static_cast<BalsaFrameEnums::ErrorCode>( |
328 static_cast<BalsaFrameEnums::ErrorCode>( | 328 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE + |
329 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE | 329 is_request); |
330 + is_request); | |
331 goto output_exhausted; | 330 goto output_exhausted; |
332 } | 331 } |
333 } while (*current > ' '); | 332 } while (*current > ' '); |
334 // whitespace_3_idx_ | 333 // whitespace_3_idx_ |
335 headers->whitespace_3_idx_ = current - begin; | 334 headers->whitespace_3_idx_ = current - begin; |
336 do { | 335 do { |
337 ++current; | 336 ++current; |
338 // Note that due to the loop which consumes all of the whitespace | 337 // Note that due to the loop which consumes all of the whitespace |
339 // at the end of the line, current can never == end while in this function. | 338 // at the end of the line, current can never == end while in this function. |
340 } while (*current <= ' '); | 339 } while (*current <= ' '); |
341 // non_whitespace_3_idx_ | 340 // non_whitespace_3_idx_ |
342 headers->non_whitespace_3_idx_ = current - begin; | 341 headers->non_whitespace_3_idx_ = current - begin; |
343 headers->whitespace_4_idx_ = end - begin; | 342 headers->whitespace_4_idx_ = end - begin; |
344 | 343 |
345 output_exhausted: | 344 output_exhausted: |
346 // Note that we don't fail the parse immediately when parsing of the | 345 // Note that we don't fail the parse immediately when parsing of the |
347 // firstline fails. Depending on the protocol type, we may want to accept | 346 // firstline fails. Depending on the protocol type, we may want to accept |
348 // a firstline with only one or two elements, e.g., for HTTP/0.9: | 347 // a firstline with only one or two elements, e.g., for HTTP/0.9: |
349 // GET\r\n | 348 // GET\r\n |
350 // or | 349 // or |
351 // GET /\r\n | 350 // GET /\r\n |
352 // should be parsed without issue (though the visitor should know that | 351 // should be parsed without issue (though the visitor should know that |
353 // parsing the entire line was not exactly as it should be). | 352 // parsing the entire line was not exactly as it should be). |
354 // | 353 // |
355 // Eventually, these errors may be removed alltogether, as the visitor can | 354 // Eventually, these errors may be removed alltogether, as the visitor can |
356 // detect them on its own by examining the size of the various fields. | 355 // detect them on its own by examining the size of the various fields. |
357 // headers->set_first_line(non_whitespace_1_idx_, current); | 356 // headers->set_first_line(non_whitespace_1_idx_, current); |
358 | 357 |
359 if (is_request) { | 358 if (is_request) { |
360 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > | 359 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > |
361 max_request_uri_length) { | 360 max_request_uri_length) { |
362 // For requests, we need at least the method. We could assume that a | 361 // For requests, we need at least the method. We could assume that a |
363 // blank URI means "/". If version isn't stated, it should be assumed | 362 // blank URI means "/". If version isn't stated, it should be assumed |
364 // to be HTTP/0.9 by the visitor. | 363 // to be HTTP/0.9 by the visitor. |
365 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; | 364 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; |
366 return false; | 365 return false; |
367 } | 366 } |
368 } else { | 367 } else { |
369 headers->parsed_response_code_ = 0; | 368 headers->parsed_response_code_ = 0; |
370 { | 369 { |
371 const char* parsed_response_code_current = | 370 const char* parsed_response_code_current = |
372 begin + headers->non_whitespace_2_idx_; | 371 begin + headers->non_whitespace_2_idx_; |
373 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; | 372 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; |
374 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; | 373 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; |
375 | 374 |
376 // Convert a string of [0-9]* into an int. | 375 // Convert a string of [0-9]* into an int. |
377 // Note that this allows for the conversion of response codes which | 376 // Note that this allows for the conversion of response codes which |
378 // are outside the bounds of normal HTTP response codes (no checking | 377 // are outside the bounds of normal HTTP response codes (no checking |
379 // is done to ensure that these are valid-- they're merely parsed)! | 378 // is done to ensure that these are valid-- they're merely parsed)! |
380 while (parsed_response_code_current < parsed_response_code_end) { | 379 while (parsed_response_code_current < parsed_response_code_end) { |
381 if (*parsed_response_code_current < '0' || | 380 if (*parsed_response_code_current < '0' || |
382 *parsed_response_code_current > '9') { | 381 *parsed_response_code_current > '9') { |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
460 void BalsaFrame::CleanUpKeyValueWhitespace( | 459 void BalsaFrame::CleanUpKeyValueWhitespace( |
461 const char* stream_begin, | 460 const char* stream_begin, |
462 const char* line_begin, | 461 const char* line_begin, |
463 const char* current, | 462 const char* current, |
464 const char* line_end, | 463 const char* line_end, |
465 HeaderLineDescription* current_header_line) { | 464 HeaderLineDescription* current_header_line) { |
466 const char* colon_loc = current; | 465 const char* colon_loc = current; |
467 DCHECK_LT(colon_loc, line_end); | 466 DCHECK_LT(colon_loc, line_end); |
468 DCHECK_EQ(':', *colon_loc); | 467 DCHECK_EQ(':', *colon_loc); |
469 DCHECK_EQ(':', *current); | 468 DCHECK_EQ(':', *current); |
470 DCHECK_GE(' ', *line_end) | 469 DCHECK_GE(' ', *line_end) << "\"" << std::string(line_begin, line_end) |
471 << "\"" << std::string(line_begin, line_end) << "\""; | 470 << "\""; |
472 | 471 |
473 // TODO(fenix): Investigate whether or not the bounds tests in the | 472 // TODO(fenix): Investigate whether or not the bounds tests in the |
474 // while loops here are redundant, and if so, remove them. | 473 // while loops here are redundant, and if so, remove them. |
475 --current; | 474 --current; |
476 while (current > line_begin && *current <= ' ') --current; | 475 while (current > line_begin && *current <= ' ') |
| 476 --current; |
477 current += (current != colon_loc); | 477 current += (current != colon_loc); |
478 current_header_line->key_end_idx = current - stream_begin; | 478 current_header_line->key_end_idx = current - stream_begin; |
479 | 479 |
480 current = colon_loc; | 480 current = colon_loc; |
481 DCHECK_EQ(':', *current); | 481 DCHECK_EQ(':', *current); |
482 ++current; | 482 ++current; |
483 while (current < line_end && *current <= ' ') ++current; | 483 while (current < line_end && *current <= ' ') |
| 484 ++current; |
484 current_header_line->value_begin_idx = current - stream_begin; | 485 current_header_line->value_begin_idx = current - stream_begin; |
485 | 486 |
486 DCHECK_GE(current_header_line->key_end_idx, | 487 DCHECK_GE(current_header_line->key_end_idx, |
487 current_header_line->first_char_idx); | 488 current_header_line->first_char_idx); |
488 DCHECK_GE(current_header_line->value_begin_idx, | 489 DCHECK_GE(current_header_line->value_begin_idx, |
489 current_header_line->key_end_idx); | 490 current_header_line->key_end_idx); |
490 DCHECK_GE(current_header_line->last_char_idx, | 491 DCHECK_GE(current_header_line->last_char_idx, |
491 current_header_line->value_begin_idx); | 492 current_header_line->value_begin_idx); |
492 } | 493 } |
493 | 494 |
494 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() { | 495 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() { |
495 DCHECK(!lines_.empty()); | 496 DCHECK(!lines_.empty()); |
496 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | 497 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); |
497 // The last line is always just a newline (and is uninteresting). | 498 // The last line is always just a newline (and is uninteresting). |
498 const Lines::size_type lines_size_m1 = lines_.size() - 1; | 499 const Lines::size_type lines_size_m1 = lines_.size() - 1; |
499 #if __SSE2__ | 500 #if __SSE2__ |
500 const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':', | 501 const __v16qi colons = {':', ':', ':', ':', ':', ':', ':', ':', |
501 ':', ':', ':', ':', ':', ':', ':', ':'}; | 502 ':', ':', ':', ':', ':', ':', ':', ':'}; |
502 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; | 503 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; |
503 #endif // __SSE2__ | 504 #endif // __SSE2__ |
504 const char* current = stream_begin + lines_[1].first; | 505 const char* current = stream_begin + lines_[1].first; |
505 // This code is a bit more subtle than it may appear at first glance. | 506 // This code is a bit more subtle than it may appear at first glance. |
506 // This code looks for a colon in the current line... but it also looks | 507 // This code looks for a colon in the current line... but it also looks |
507 // beyond the current line. If there is no colon in the current line, then | 508 // beyond the current line. If there is no colon in the current line, then |
508 // for each subsequent line (until the colon which -has- been found is | 509 // for each subsequent line (until the colon which -has- been found is |
509 // associated with a line), no searching for a colon will be performed. In | 510 // associated with a line), no searching for a colon will be performed. In |
510 // this way, we minimize the amount of bytes we have scanned for a colon. | 511 // this way, we minimize the amount of bytes we have scanned for a colon. |
511 for (Lines::size_type i = 1; i < lines_size_m1;) { | 512 for (Lines::size_type i = 1; i < lines_size_m1;) { |
(...skipping 15 matching lines...) Expand all Loading... |
527 } | 528 } |
528 const char* line_end = stream_begin + lines_[i - 1].second; | 529 const char* line_end = stream_begin + lines_[i - 1].second; |
529 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); | 530 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); |
530 | 531 |
531 // We cleanup the whitespace at the end of the line before doing anything | 532 // We cleanup the whitespace at the end of the line before doing anything |
532 // else of interest as it allows us to do nothing when irregularly formatted | 533 // else of interest as it allows us to do nothing when irregularly formatted |
533 // headers are parsed (e.g. those with only keys, only values, or no colon). | 534 // headers are parsed (e.g. those with only keys, only values, or no colon). |
534 // | 535 // |
535 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. | 536 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. |
536 --line_end; | 537 --line_end; |
537 DCHECK_EQ('\n', *line_end) | 538 DCHECK_EQ('\n', *line_end) << "\"" << std::string(line_begin, line_end) |
538 << "\"" << std::string(line_begin, line_end) << "\""; | 539 << "\""; |
539 while (*line_end <= ' ' && line_end > line_begin) { | 540 while (*line_end <= ' ' && line_end > line_begin) { |
540 --line_end; | 541 --line_end; |
541 } | 542 } |
542 ++line_end; | 543 ++line_end; |
543 DCHECK_GE(' ', *line_end); | 544 DCHECK_GE(' ', *line_end); |
544 DCHECK_LT(line_begin, line_end); | 545 DCHECK_LT(line_begin, line_end); |
545 | 546 |
546 // We use '0' for the block idx, because we're always writing to the first | 547 // We use '0' for the block idx, because we're always writing to the first |
547 // block from the framer (we do this because the framer requires that the | 548 // block from the framer (we do this because the framer requires that the |
548 // entire header sequence be in a contiguous buffer). | 549 // entire header sequence be in a contiguous buffer). |
(...skipping 12 matching lines...) Expand all Loading... |
561 } else if (current < line_begin) { | 562 } else if (current < line_begin) { |
562 // When this condition is true, the last detected colon was part of a | 563 // When this condition is true, the last detected colon was part of a |
563 // previous line. We reset to the beginning of the line as we don't care | 564 // previous line. We reset to the beginning of the line as we don't care |
564 // about the presence of any colon before the beginning of the current | 565 // about the presence of any colon before the beginning of the current |
565 // line. | 566 // line. |
566 current = line_begin; | 567 current = line_begin; |
567 } | 568 } |
568 #if __SSE2__ | 569 #if __SSE2__ |
569 while (current < header_lines_end_m16) { | 570 while (current < header_lines_end_m16) { |
570 __m128i header_bytes = | 571 __m128i header_bytes = |
571 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current)); | 572 _mm_loadu_si128(reinterpret_cast<const __m128i*>(current)); |
572 __m128i colon_cmp = | 573 __m128i colon_cmp = |
573 _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons)); | 574 _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons)); |
574 int colon_msk = _mm_movemask_epi8(colon_cmp); | 575 int colon_msk = _mm_movemask_epi8(colon_cmp); |
575 if (colon_msk == 0) { | 576 if (colon_msk == 0) { |
576 current += 16; | 577 current += 16; |
577 continue; | 578 continue; |
578 } | 579 } |
579 current += (ffs(colon_msk) - 1); | 580 current += (ffs(colon_msk) - 1); |
580 if (current > line_end) { | 581 if (current > line_end) { |
581 break; | 582 break; |
582 } | 583 } |
583 goto found_colon; | 584 goto found_colon; |
584 } | 585 } |
585 #endif // __SSE2__ | 586 #endif // __SSE2__ |
586 for (; current < line_end; ++current) { | 587 for (; current < line_end; ++current) { |
587 if (*current != ':') { | 588 if (*current != ':') { |
588 continue; | 589 continue; |
589 } | 590 } |
590 goto found_colon; | 591 goto found_colon; |
591 } | 592 } |
592 // If we've gotten to here, then there was no colon | 593 // If we've gotten to here, then there was no colon |
593 // in the line. The arguments we passed into the construction | 594 // in the line. The arguments we passed into the construction |
594 // for the HeaderLineDescription object should be OK-- it assumes | 595 // for the HeaderLineDescription object should be OK-- it assumes |
595 // that the entire content is 'key' by default (which is true, as | 596 // that the entire content is 'key' by default (which is true, as |
596 // there was no colon, there can be no value). Note that this is a | 597 // there was no colon, there can be no value). Note that this is a |
597 // construct which is technically not allowed by the spec. | 598 // construct which is technically not allowed by the spec. |
598 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; | 599 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; |
599 visitor_->HandleHeaderWarning(this); | 600 visitor_->HandleHeaderWarning(this); |
600 continue; | 601 continue; |
601 found_colon: | 602 found_colon: |
602 DCHECK_EQ(*current, ':'); | 603 DCHECK_EQ(*current, ':'); |
603 DCHECK_LE(current - stream_begin, line_end - stream_begin); | 604 DCHECK_LE(current - stream_begin, line_end - stream_begin); |
604 DCHECK_LE(stream_begin - stream_begin, current - stream_begin); | 605 DCHECK_LE(stream_begin - stream_begin, current - stream_begin); |
605 | 606 |
606 HeaderLineDescription& current_header_line = headers_->header_lines_.back(); | 607 HeaderLineDescription& current_header_line = headers_->header_lines_.back(); |
607 current_header_line.key_end_idx = current - stream_begin; | 608 current_header_line.key_end_idx = current - stream_begin; |
608 current_header_line.value_begin_idx = current_header_line.key_end_idx; | 609 current_header_line.value_begin_idx = current_header_line.key_end_idx; |
609 if (current < line_end) { | 610 if (current < line_end) { |
610 ++current_header_line.key_end_idx; | 611 ++current_header_line.key_end_idx; |
611 | 612 |
612 CleanUpKeyValueWhitespace(stream_begin, | 613 CleanUpKeyValueWhitespace( |
613 line_begin, | 614 stream_begin, line_begin, current, line_end, ¤t_header_line); |
614 current, | |
615 line_end, | |
616 ¤t_header_line); | |
617 } | 615 } |
618 } | 616 } |
619 } | 617 } |
620 | 618 |
621 void BalsaFrame::ProcessContentLengthLine( | 619 void BalsaFrame::ProcessContentLengthLine( |
622 HeaderLines::size_type line_idx, | 620 HeaderLines::size_type line_idx, |
623 BalsaHeadersEnums::ContentLengthStatus* status, | 621 BalsaHeadersEnums::ContentLengthStatus* status, |
624 size_t* length) { | 622 size_t* length) { |
625 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; | 623 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; |
626 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | 624 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); |
627 const char* line_end = stream_begin + header_line.last_char_idx; | 625 const char* line_end = stream_begin + header_line.last_char_idx; |
628 const char* value_begin = (stream_begin + header_line.value_begin_idx); | 626 const char* value_begin = (stream_begin + header_line.value_begin_idx); |
629 | 627 |
630 if (value_begin >= line_end) { | 628 if (value_begin >= line_end) { |
631 // There is no non-whitespace value data. | 629 // There is no non-whitespace value data. |
632 #if DEBUGFRAMER | 630 #if DEBUGFRAMER |
633 LOG(INFO) << "invalid content-length -- no non-whitespace value data"; | 631 LOG(INFO) << "invalid content-length -- no non-whitespace value data"; |
634 #endif | 632 #endif |
635 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; | 633 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; |
636 return; | 634 return; |
637 } | 635 } |
638 | 636 |
639 *length = 0; | 637 *length = 0; |
640 while (value_begin < line_end) { | 638 while (value_begin < line_end) { |
641 if (*value_begin < '0' || *value_begin > '9') { | 639 if (*value_begin < '0' || *value_begin > '9') { |
642 // bad! content-length found, and couldn't parse all of it! | 640 // bad! content-length found, and couldn't parse all of it! |
643 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; | 641 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; |
(...skipping 22 matching lines...) Expand all Loading... |
666 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; | 664 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; |
667 } | 665 } |
668 | 666 |
669 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { | 667 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { |
670 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; | 668 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; |
671 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | 669 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); |
672 const char* line_end = stream_begin + header_line.last_char_idx; | 670 const char* line_end = stream_begin + header_line.last_char_idx; |
673 const char* value_begin = stream_begin + header_line.value_begin_idx; | 671 const char* value_begin = stream_begin + header_line.value_begin_idx; |
674 size_t value_length = line_end - value_begin; | 672 size_t value_length = line_end - value_begin; |
675 | 673 |
676 if ((value_length == 7) && | 674 if ((value_length == 7) && !strncasecmp(value_begin, "chunked", 7)) { |
677 !strncasecmp(value_begin, "chunked", 7)) { | |
678 headers_->transfer_encoding_is_chunked_ = true; | 675 headers_->transfer_encoding_is_chunked_ = true; |
679 } else if ((value_length == 8) && | 676 } else if ((value_length == 8) && !strncasecmp(value_begin, "identity", 8)) { |
680 !strncasecmp(value_begin, "identity", 8)) { | |
681 headers_->transfer_encoding_is_chunked_ = false; | 677 headers_->transfer_encoding_is_chunked_ = false; |
682 } else { | 678 } else { |
683 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; | 679 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; |
684 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 680 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
685 visitor_->HandleHeaderError(this); | 681 visitor_->HandleHeaderError(this); |
686 return; | 682 return; |
687 } | 683 } |
688 } | 684 } |
689 | 685 |
690 namespace { | 686 namespace { |
691 bool SplitStringPiece(base::StringPiece original, char delim, | 687 bool SplitStringPiece(base::StringPiece original, |
692 base::StringPiece* before, base::StringPiece* after) { | 688 char delim, |
| 689 base::StringPiece* before, |
| 690 base::StringPiece* after) { |
693 const char* p = original.data(); | 691 const char* p = original.data(); |
694 const char* end = p + original.size(); | 692 const char* end = p + original.size(); |
695 | 693 |
696 while (p != end) { | 694 while (p != end) { |
697 if (*p == delim) { | 695 if (*p == delim) { |
698 ++p; | 696 ++p; |
699 } else { | 697 } else { |
700 const char* start = p; | 698 const char* start = p; |
701 while (++p != end && *p != delim) { | 699 while (++p != end && *p != delim) { |
702 // Skip to the next occurence of the delimiter. | 700 // Skip to the next occurence of the delimiter. |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
740 | 738 |
741 extensions->AppendHeader(key, value); | 739 extensions->AppendHeader(key, value); |
742 | 740 |
743 StringPieceUtils::RemoveWhitespaceContext(&remaining); | 741 StringPieceUtils::RemoveWhitespaceContext(&remaining); |
744 SplitStringPiece(remaining, ';', &extension, &remaining); | 742 SplitStringPiece(remaining, ';', &extension, &remaining); |
745 } | 743 } |
746 } | 744 } |
747 | 745 |
748 } // anonymous namespace | 746 } // anonymous namespace |
749 | 747 |
750 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size, | 748 void BalsaFrame::ProcessChunkExtensions(const char* input, |
| 749 size_t size, |
751 BalsaHeaders* extensions) { | 750 BalsaHeaders* extensions) { |
752 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions); | 751 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions); |
753 } | 752 } |
754 | 753 |
755 void BalsaFrame::ProcessHeaderLines() { | 754 void BalsaFrame::ProcessHeaderLines() { |
756 HeaderLines::size_type content_length_idx = 0; | 755 HeaderLines::size_type content_length_idx = 0; |
757 HeaderLines::size_type transfer_encoding_idx = 0; | 756 HeaderLines::size_type transfer_encoding_idx = 0; |
758 | 757 |
759 DCHECK(!lines_.empty()); | 758 DCHECK(!lines_.empty()); |
760 #if DEBUGFRAMER | 759 #if DEBUGFRAMER |
761 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; | 760 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; |
762 #endif // DEBUGFRAMER | 761 #endif // DEBUGFRAMER |
763 | 762 |
764 // There is no need to attempt to process headers if no header lines exist. | 763 // There is no need to attempt to process headers if no header lines exist. |
765 // There are at least two lines in the message which are not header lines. | 764 // There are at least two lines in the message which are not header lines. |
766 // These two non-header lines are the first line of the message, and the | 765 // These two non-header lines are the first line of the message, and the |
767 // last line of the message (which is an empty line). | 766 // last line of the message (which is an empty line). |
768 // Thus, we test to see if we have more than two lines total before attempting | 767 // Thus, we test to see if we have more than two lines total before attempting |
769 // to parse any header lines. | 768 // to parse any header lines. |
770 if (lines_.size() > 2) { | 769 if (lines_.size() > 2) { |
771 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | 770 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); |
772 | 771 |
773 // Then, for the rest of the header data, we parse these into key-value | 772 // Then, for the rest of the header data, we parse these into key-value |
774 // pairs. | 773 // pairs. |
775 FindColonsAndParseIntoKeyValue(); | 774 FindColonsAndParseIntoKeyValue(); |
776 // At this point, we've parsed all of the headers. Time to look for those | 775 // At this point, we've parsed all of the headers. Time to look for those |
777 // headers which we require for framing. | 776 // headers which we require for framing. |
778 const HeaderLines::size_type | 777 const HeaderLines::size_type header_lines_size = |
779 header_lines_size = headers_->header_lines_.size(); | 778 headers_->header_lines_.size(); |
780 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { | 779 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { |
781 const HeaderLineDescription& current_header_line = | 780 const HeaderLineDescription& current_header_line = |
782 headers_->header_lines_[i]; | 781 headers_->header_lines_[i]; |
783 const char* key_begin = | 782 const char* key_begin = |
784 (stream_begin + current_header_line.first_char_idx); | 783 (stream_begin + current_header_line.first_char_idx); |
785 const char* key_end = (stream_begin + current_header_line.key_end_idx); | 784 const char* key_end = (stream_begin + current_header_line.key_end_idx); |
786 const size_t key_len = key_end - key_begin; | 785 const size_t key_len = key_end - key_begin; |
787 const char c = *key_begin; | 786 const char c = *key_begin; |
788 #if DEBUGFRAMER | 787 #if DEBUGFRAMER |
789 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len) | 788 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len) |
790 << " c: '" << c << "' key_len: " << key_len; | 789 << " c: '" << c << "' key_len: " << key_len; |
791 #endif // DEBUGFRAMER | 790 #endif // DEBUGFRAMER |
792 // If a header begins with either lowercase or uppercase 'c' or 't', then | 791 // If a header begins with either lowercase or uppercase 'c' or 't', then |
793 // the header may be one of content-length, connection, content-encoding | 792 // the header may be one of content-length, connection, content-encoding |
794 // or transfer-encoding. These headers are special, as they change the way | 793 // or transfer-encoding. These headers are special, as they change the way |
795 // that the message is framed, and so the framer is required to search | 794 // that the message is framed, and so the framer is required to search |
796 // for them. | 795 // for them. |
797 | 796 |
798 | |
799 if (c == 'c' || c == 'C') { | 797 if (c == 'c' || c == 'C') { |
800 if ((key_len == kContentLengthSize) && | 798 if ((key_len == kContentLengthSize) && |
801 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { | 799 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { |
802 BalsaHeadersEnums::ContentLengthStatus content_length_status = | 800 BalsaHeadersEnums::ContentLengthStatus content_length_status = |
803 BalsaHeadersEnums::NO_CONTENT_LENGTH; | 801 BalsaHeadersEnums::NO_CONTENT_LENGTH; |
804 size_t length = 0; | 802 size_t length = 0; |
805 ProcessContentLengthLine(i, &content_length_status, &length); | 803 ProcessContentLengthLine(i, &content_length_status, &length); |
806 if (content_length_idx != 0) { // then we've already seen one! | 804 if (content_length_idx != 0) { // then we've already seen one! |
807 if ((headers_->content_length_status_ != content_length_status) || | 805 if ((headers_->content_length_status_ != content_length_status) || |
808 ((headers_->content_length_status_ == | 806 ((headers_->content_length_status_ == |
809 BalsaHeadersEnums::VALID_CONTENT_LENGTH) && | 807 BalsaHeadersEnums::VALID_CONTENT_LENGTH) && |
810 length != headers_->content_length_)) { | 808 length != headers_->content_length_)) { |
811 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; | 809 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; |
812 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 810 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
813 visitor_->HandleHeaderError(this); | 811 visitor_->HandleHeaderError(this); |
814 return; | 812 return; |
815 } | 813 } |
816 continue; | 814 continue; |
817 } else { | 815 } else { |
818 content_length_idx = i + 1; | 816 content_length_idx = i + 1; |
819 headers_->content_length_status_ = content_length_status; | 817 headers_->content_length_status_ = content_length_status; |
820 headers_->content_length_ = length; | 818 headers_->content_length_ = length; |
821 content_length_remaining_ = length; | 819 content_length_remaining_ = length; |
822 } | 820 } |
823 | |
824 } | 821 } |
825 } else if (c == 't' || c == 'T') { | 822 } else if (c == 't' || c == 'T') { |
826 if ((key_len == kTransferEncodingSize) && | 823 if ((key_len == kTransferEncodingSize) && |
827 0 == strncasecmp(key_begin, kTransferEncoding, | 824 0 == strncasecmp( |
828 kTransferEncodingSize)) { | 825 key_begin, kTransferEncoding, kTransferEncodingSize)) { |
829 if (transfer_encoding_idx != 0) { | 826 if (transfer_encoding_idx != 0) { |
830 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; | 827 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; |
831 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 828 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
832 visitor_->HandleHeaderError(this); | 829 visitor_->HandleHeaderError(this); |
833 return; | 830 return; |
834 } | 831 } |
835 transfer_encoding_idx = i + 1; | 832 transfer_encoding_idx = i + 1; |
836 } | 833 } |
837 } else if (i == 0 && (key_len == 0 || c == ' ')) { | 834 } else if (i == 0 && (key_len == 0 || c == ' ')) { |
838 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; | 835 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; |
(...skipping 11 matching lines...) Expand all Loading... |
850 ProcessTransferEncodingLine(transfer_encoding_idx - 1); | 847 ProcessTransferEncodingLine(transfer_encoding_idx - 1); |
851 } | 848 } |
852 } | 849 } |
853 } | 850 } |
854 | 851 |
855 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { | 852 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { |
856 // For responses, can't have a body if the request was a HEAD, or if it is | 853 // For responses, can't have a body if the request was a HEAD, or if it is |
857 // one of these response-codes. rfc2616 section 4.3 | 854 // one of these response-codes. rfc2616 section 4.3 |
858 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 855 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
859 if (is_request_ || | 856 if (is_request_ || |
860 !(request_was_head_ || | 857 !(request_was_head_ || (headers_->parsed_response_code_ >= 100 && |
861 (headers_->parsed_response_code_ >= 100 && | 858 headers_->parsed_response_code_ < 200) || |
862 headers_->parsed_response_code_ < 200) || | |
863 (headers_->parsed_response_code_ == 204) || | 859 (headers_->parsed_response_code_ == 204) || |
864 (headers_->parsed_response_code_ == 304))) { | 860 (headers_->parsed_response_code_ == 304))) { |
865 // Then we can have a body. | 861 // Then we can have a body. |
866 if (headers_->transfer_encoding_is_chunked_) { | 862 if (headers_->transfer_encoding_is_chunked_) { |
867 // Note that | 863 // Note that |
868 // if ( Transfer-Encoding: chunked && Content-length: ) | 864 // if ( Transfer-Encoding: chunked && Content-length: ) |
869 // then Transfer-Encoding: chunked trumps. | 865 // then Transfer-Encoding: chunked trumps. |
870 // This is as specified in the spec. | 866 // This is as specified in the spec. |
871 // rfc2616 section 4.4.3 | 867 // rfc2616 section 4.4.3 |
872 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; | 868 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; |
(...skipping 14 matching lines...) Expand all Loading... |
887 } | 883 } |
888 break; | 884 break; |
889 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: | 885 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: |
890 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: | 886 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: |
891 // If there were characters left-over after parsing the | 887 // If there were characters left-over after parsing the |
892 // content length, we should flag an error and stop. | 888 // content length, we should flag an error and stop. |
893 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 889 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
894 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; | 890 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; |
895 visitor_->HandleHeaderError(this); | 891 visitor_->HandleHeaderError(this); |
896 break; | 892 break; |
897 // We can have: no transfer-encoding, no content length, and no | 893 // We can have: no transfer-encoding, no content length, and no |
898 // connection: close... | 894 // connection: close... |
899 // Unfortunately, this case doesn't seem to be covered in the spec. | 895 // Unfortunately, this case doesn't seem to be covered in the spec. |
900 // We'll assume that the safest thing to do here is what the google | 896 // We'll assume that the safest thing to do here is what the google |
901 // binaries before 2008 already do, which is to assume that | 897 // binaries before 2008 already do, which is to assume that |
902 // everything until the connection is closed is body. | 898 // everything until the connection is closed is body. |
903 case BalsaHeadersEnums::NO_CONTENT_LENGTH: | 899 case BalsaHeadersEnums::NO_CONTENT_LENGTH: |
904 if (is_request_) { | 900 if (is_request_) { |
905 base::StringPiece method = headers_->request_method(); | 901 base::StringPiece method = headers_->request_method(); |
906 // POSTs and PUTs should have a detectable body length. If they | 902 // POSTs and PUTs should have a detectable body length. If they |
907 // do not we consider it an error. | 903 // do not we consider it an error. |
908 if ((method.size() == 4 && | 904 if ((method.size() == 4 && |
909 strncmp(method.data(), "POST", 4) == 0) || | 905 strncmp(method.data(), "POST", 4) == 0) || |
910 (method.size() == 3 && | 906 (method.size() == 3 && strncmp(method.data(), "PUT", 3) == 0)) { |
911 strncmp(method.data(), "PUT", 3) == 0)) { | |
912 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 907 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
913 last_error_ = | 908 last_error_ = |
914 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; | 909 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; |
915 visitor_->HandleHeaderError(this); | 910 visitor_->HandleHeaderError(this); |
916 break; | 911 break; |
917 } | 912 } |
918 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 913 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
919 } else { | 914 } else { |
920 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; | 915 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; |
921 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; | 916 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; |
922 visitor_->HandleHeaderWarning(this); | 917 visitor_->HandleHeaderWarning(this); |
923 } | 918 } |
924 break; | 919 break; |
925 // The COV_NF_... statements here provide hints to the apparatus | 920 // The COV_NF_... statements here provide hints to the apparatus |
926 // which computes coverage reports/ratios that this code is never | 921 // which computes coverage reports/ratios that this code is never |
927 // intended to be executed, and should technically be impossible. | 922 // intended to be executed, and should technically be impossible. |
928 // COV_NF_START | 923 // COV_NF_START |
929 default: | 924 default: |
930 LOG(FATAL) << "Saw a content_length_status: " | 925 LOG(FATAL) << "Saw a content_length_status: " |
931 << headers_->content_length_status_ << " which is unknown."; | 926 << headers_->content_length_status_ |
| 927 << " which is unknown."; |
932 // COV_NF_END | 928 // COV_NF_END |
933 } | 929 } |
934 } | 930 } |
935 } | 931 } |
936 } | 932 } |
937 | 933 |
938 size_t BalsaFrame::ProcessHeaders(const char* message_start, | 934 size_t BalsaFrame::ProcessHeaders(const char* message_start, |
939 size_t message_length) { | 935 size_t message_length) { |
940 const char* const original_message_start = message_start; | 936 const char* const original_message_start = message_start; |
941 const char* const message_end = message_start + message_length; | 937 const char* const message_end = message_start + message_length; |
(...skipping 22 matching lines...) Expand all Loading... |
964 } else { | 960 } else { |
965 saw_non_newline_char_ = true; | 961 saw_non_newline_char_ = true; |
966 checkpoint = message_start = message_current; | 962 checkpoint = message_start = message_current; |
967 goto read_real_message; | 963 goto read_real_message; |
968 } | 964 } |
969 } | 965 } |
970 ++message_current; | 966 ++message_current; |
971 } while (message_current < message_end); | 967 } while (message_current < message_end); |
972 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks | 968 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks |
973 } else { | 969 } else { |
974 read_real_message: | 970 read_real_message : |
975 // Note that SSE2 can be enabled on certain piii platforms. | 971 // Note that SSE2 can be enabled on certain piii platforms. |
976 #if __SSE2__ | 972 #if __SSE2__ |
977 { | 973 { |
978 const char* const message_end_m16 = message_end - 16; | 974 const char* const message_end_m16 = message_end - 16; |
979 __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', | 975 __v16qi newlines = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', |
980 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' }; | 976 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; |
981 while (message_current < message_end_m16) { | 977 while (message_current < message_end_m16) { |
982 // What this does (using compiler intrinsics): | 978 // What this does (using compiler intrinsics): |
983 // | 979 // |
984 // Load 16 '\n's into an xmm register | 980 // Load 16 '\n's into an xmm register |
985 // Load 16 bytes of currennt message into an xmm register | 981 // Load 16 bytes of currennt message into an xmm register |
986 // Do byte-wise equals on those two xmm registers | 982 // Do byte-wise equals on those two xmm registers |
987 // Take the first bit of each byte, and put that into the first | 983 // Take the first bit of each byte, and put that into the first |
988 // 16 bits of a mask | 984 // 16 bits of a mask |
989 // If the mask is zero, no '\n' found. increment by 16 and try again | 985 // If the mask is zero, no '\n' found. increment by 16 and try again |
990 // Else scan forward to find the first set bit. | 986 // Else scan forward to find the first set bit. |
991 // Increment current by the index of the first set bit | 987 // Increment current by the index of the first set bit |
992 // (ffs returns index of first set bit + 1) | 988 // (ffs returns index of first set bit + 1) |
993 __m128i msg_bytes = | 989 __m128i msg_bytes = _mm_loadu_si128(const_cast<__m128i*>( |
994 _mm_loadu_si128(const_cast<__m128i *>( | 990 reinterpret_cast<const __m128i*>(message_current))); |
995 reinterpret_cast<const __m128i *>(message_current))); | 991 __m128i newline_cmp = |
996 __m128i newline_cmp = | |
997 _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines)); | 992 _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines)); |
998 int newline_msk = _mm_movemask_epi8(newline_cmp); | 993 int newline_msk = _mm_movemask_epi8(newline_cmp); |
999 if (newline_msk == 0) { | 994 if (newline_msk == 0) { |
1000 message_current += 16; | 995 message_current += 16; |
1001 continue; | 996 continue; |
1002 } | 997 } |
1003 message_current += (ffs(newline_msk) - 1); | 998 message_current += (ffs(newline_msk) - 1); |
1004 const size_t relative_idx = message_current - message_start; | 999 const size_t relative_idx = message_current - message_start; |
1005 const size_t message_current_idx = 1 + base_idx + relative_idx; | 1000 const size_t message_current_idx = 1 + base_idx + relative_idx; |
1006 lines_.push_back(std::make_pair(last_slash_n_idx_, | 1001 lines_.push_back( |
1007 message_current_idx)); | 1002 std::make_pair(last_slash_n_idx_, message_current_idx)); |
1008 if (lines_.size() == 1) { | 1003 if (lines_.size() == 1) { |
1009 headers_->WriteFromFramer(checkpoint, | 1004 headers_->WriteFromFramer(checkpoint, |
1010 1 + message_current - checkpoint); | 1005 1 + message_current - checkpoint); |
1011 checkpoint = message_current + 1; | 1006 checkpoint = message_current + 1; |
1012 const char* begin = headers_->OriginalHeaderStreamBegin(); | 1007 const char* begin = headers_->OriginalHeaderStreamBegin(); |
1013 #if DEBUGFRAMER | 1008 #if DEBUGFRAMER |
1014 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); | 1009 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); |
1015 LOG(INFO) << "is_request_: " << is_request_; | 1010 LOG(INFO) << "is_request_: " << is_request_; |
1016 #endif | 1011 #endif |
1017 ProcessFirstLine(begin, begin + lines_[0].second); | 1012 ProcessFirstLine(begin, begin + lines_[0].second); |
1018 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) | 1013 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) |
1019 goto process_lines; | |
1020 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) | |
1021 goto bottom; | |
1022 } | |
1023 const size_t chars_since_last_slash_n = (message_current_idx - | |
1024 last_slash_n_idx_); | |
1025 last_slash_n_idx_ = message_current_idx; | |
1026 if (chars_since_last_slash_n > 2) { | |
1027 // We have a slash-n, but the last slash n was | |
1028 // more than 2 characters away from this. Thus, we know | |
1029 // that this cannot be an end-of-header. | |
1030 ++message_current; | |
1031 continue; | |
1032 } | |
1033 if ((chars_since_last_slash_n == 1) || | |
1034 (((message_current > message_start) && | |
1035 (*(message_current - 1) == '\r')) || | |
1036 (last_char_was_slash_r_))) { | |
1037 goto process_lines; | 1014 goto process_lines; |
1038 } | 1015 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) |
| 1016 goto bottom; |
| 1017 } |
| 1018 const size_t chars_since_last_slash_n = |
| 1019 (message_current_idx - last_slash_n_idx_); |
| 1020 last_slash_n_idx_ = message_current_idx; |
| 1021 if (chars_since_last_slash_n > 2) { |
| 1022 // We have a slash-n, but the last slash n was |
| 1023 // more than 2 characters away from this. Thus, we know |
| 1024 // that this cannot be an end-of-header. |
1039 ++message_current; | 1025 ++message_current; |
| 1026 continue; |
1040 } | 1027 } |
| 1028 if ((chars_since_last_slash_n == 1) || |
| 1029 (((message_current > message_start) && |
| 1030 (*(message_current - 1) == '\r')) || |
| 1031 (last_char_was_slash_r_))) { |
| 1032 goto process_lines; |
| 1033 } |
| 1034 ++message_current; |
1041 } | 1035 } |
| 1036 } |
1042 #endif // __SSE2__ | 1037 #endif // __SSE2__ |
1043 while (message_current < message_end) { | 1038 while (message_current < message_end) { |
1044 if (*message_current != '\n') { | 1039 if (*message_current != '\n') { |
1045 ++message_current; | 1040 ++message_current; |
1046 continue; | 1041 continue; |
1047 } | 1042 } |
1048 const size_t relative_idx = message_current - message_start; | 1043 const size_t relative_idx = message_current - message_start; |
1049 const size_t message_current_idx = 1 + base_idx + relative_idx; | 1044 const size_t message_current_idx = 1 + base_idx + relative_idx; |
1050 lines_.push_back(std::make_pair(last_slash_n_idx_, | 1045 lines_.push_back( |
1051 message_current_idx)); | 1046 std::make_pair(last_slash_n_idx_, message_current_idx)); |
1052 if (lines_.size() == 1) { | 1047 if (lines_.size() == 1) { |
1053 headers_->WriteFromFramer(checkpoint, | 1048 headers_->WriteFromFramer(checkpoint, |
1054 1 + message_current - checkpoint); | 1049 1 + message_current - checkpoint); |
1055 checkpoint = message_current + 1; | 1050 checkpoint = message_current + 1; |
1056 const char* begin = headers_->OriginalHeaderStreamBegin(); | 1051 const char* begin = headers_->OriginalHeaderStreamBegin(); |
1057 #if DEBUGFRAMER | 1052 #if DEBUGFRAMER |
1058 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); | 1053 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); |
1059 LOG(INFO) << "is_request_: " << is_request_; | 1054 LOG(INFO) << "is_request_: " << is_request_; |
1060 #endif | 1055 #endif |
1061 ProcessFirstLine(begin, begin + lines_[0].second); | 1056 ProcessFirstLine(begin, begin + lines_[0].second); |
1062 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) | 1057 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) |
1063 goto process_lines; | 1058 goto process_lines; |
1064 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) | 1059 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) |
1065 goto bottom; | 1060 goto bottom; |
1066 } | 1061 } |
1067 const size_t chars_since_last_slash_n = (message_current_idx - | 1062 const size_t chars_since_last_slash_n = |
1068 last_slash_n_idx_); | 1063 (message_current_idx - last_slash_n_idx_); |
1069 last_slash_n_idx_ = message_current_idx; | 1064 last_slash_n_idx_ = message_current_idx; |
1070 if (chars_since_last_slash_n > 2) { | 1065 if (chars_since_last_slash_n > 2) { |
1071 // false positive. | 1066 // false positive. |
1072 ++message_current; | 1067 ++message_current; |
1073 continue; | 1068 continue; |
1074 } | 1069 } |
1075 if ((chars_since_last_slash_n == 1) || | 1070 if ((chars_since_last_slash_n == 1) || |
1076 (((message_current > message_start) && | 1071 (((message_current > message_start) && |
1077 (*(message_current - 1) == '\r')) || | 1072 (*(message_current - 1) == '\r')) || |
1078 (last_char_was_slash_r_))) { | 1073 (last_char_was_slash_r_))) { |
1079 goto process_lines; | 1074 goto process_lines; |
1080 } | 1075 } |
1081 ++message_current; | 1076 ++message_current; |
1082 } | 1077 } |
1083 } | 1078 } |
1084 continue; | 1079 continue; |
1085 process_lines: | 1080 process_lines: |
1086 ++message_current; | 1081 ++message_current; |
1087 DCHECK(message_current >= message_start); | 1082 DCHECK(message_current >= message_start); |
1088 if (message_current > message_start) { | 1083 if (message_current > message_start) { |
1089 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); | 1084 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); |
1090 } | 1085 } |
1091 | 1086 |
1092 // Check if we have exceeded maximum headers length | 1087 // Check if we have exceeded maximum headers length |
1093 // Although we check for this limit before and after we call this function | 1088 // Although we check for this limit before and after we call this function |
1094 // we check it here as well to make sure that in case the visitor changed | 1089 // we check it here as well to make sure that in case the visitor changed |
1095 // the max_header_length_ (for example after processing the first line) | 1090 // the max_header_length_ (for example after processing the first line) |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1132 } | 1127 } |
1133 // If we've gotten to here, it means that we've consumed all of the | 1128 // If we've gotten to here, it means that we've consumed all of the |
1134 // available input. We need to record whether or not the last character we | 1129 // available input. We need to record whether or not the last character we |
1135 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds | 1130 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds |
1136 // a header framing that is split across the two calls. | 1131 // a header framing that is split across the two calls. |
1137 last_char_was_slash_r_ = (*(message_end - 1) == '\r'); | 1132 last_char_was_slash_r_ = (*(message_end - 1) == '\r'); |
1138 DCHECK(message_current >= message_start); | 1133 DCHECK(message_current >= message_start); |
1139 if (message_current > message_start) { | 1134 if (message_current > message_start) { |
1140 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); | 1135 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); |
1141 } | 1136 } |
1142 bottom: | 1137 bottom: |
1143 return message_current - original_message_start; | 1138 return message_current - original_message_start; |
1144 } | 1139 } |
1145 | 1140 |
1146 | |
1147 size_t BalsaFrame::BytesSafeToSplice() const { | 1141 size_t BalsaFrame::BytesSafeToSplice() const { |
1148 switch (parse_state_) { | 1142 switch (parse_state_) { |
1149 case BalsaFrameEnums::READING_CHUNK_DATA: | 1143 case BalsaFrameEnums::READING_CHUNK_DATA: |
1150 return chunk_length_remaining_; | 1144 return chunk_length_remaining_; |
1151 case BalsaFrameEnums::READING_UNTIL_CLOSE: | 1145 case BalsaFrameEnums::READING_UNTIL_CLOSE: |
1152 return std::numeric_limits<size_t>::max(); | 1146 return std::numeric_limits<size_t>::max(); |
1153 case BalsaFrameEnums::READING_CONTENT: | 1147 case BalsaFrameEnums::READING_CONTENT: |
1154 return content_length_remaining_; | 1148 return content_length_remaining_; |
1155 default: | 1149 default: |
1156 return 0; | 1150 return 0; |
1157 } | 1151 } |
1158 } | 1152 } |
1159 | 1153 |
1160 void BalsaFrame::BytesSpliced(size_t bytes_spliced) { | 1154 void BalsaFrame::BytesSpliced(size_t bytes_spliced) { |
1161 switch (parse_state_) { | 1155 switch (parse_state_) { |
1162 case BalsaFrameEnums::READING_CHUNK_DATA: | 1156 case BalsaFrameEnums::READING_CHUNK_DATA: |
1163 if (chunk_length_remaining_ >= bytes_spliced) { | 1157 if (chunk_length_remaining_ >= bytes_spliced) { |
1164 chunk_length_remaining_ -= bytes_spliced; | 1158 chunk_length_remaining_ -= bytes_spliced; |
1165 if (chunk_length_remaining_ == 0) { | 1159 if (chunk_length_remaining_ == 0) { |
1166 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; | 1160 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; |
1167 } | 1161 } |
1168 return; | 1162 return; |
1169 } else { | 1163 } else { |
1170 last_error_ = | 1164 last_error_ = BalsaFrameEnums:: |
1171 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; | 1165 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; |
1172 goto error_exit; | 1166 goto error_exit; |
1173 } | 1167 } |
1174 | 1168 |
1175 case BalsaFrameEnums::READING_UNTIL_CLOSE: | 1169 case BalsaFrameEnums::READING_UNTIL_CLOSE: |
1176 return; | 1170 return; |
1177 | 1171 |
1178 case BalsaFrameEnums::READING_CONTENT: | 1172 case BalsaFrameEnums::READING_CONTENT: |
1179 if (content_length_remaining_ >= bytes_spliced) { | 1173 if (content_length_remaining_ >= bytes_spliced) { |
1180 content_length_remaining_ -= bytes_spliced; | 1174 content_length_remaining_ -= bytes_spliced; |
1181 if (content_length_remaining_ == 0) { | 1175 if (content_length_remaining_ == 0) { |
1182 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 1176 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
1183 visitor_->MessageDone(); | 1177 visitor_->MessageDone(); |
1184 } | 1178 } |
1185 return; | 1179 return; |
1186 } else { | 1180 } else { |
1187 last_error_ = | 1181 last_error_ = BalsaFrameEnums:: |
1188 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; | 1182 CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; |
1189 goto error_exit; | 1183 goto error_exit; |
1190 } | 1184 } |
1191 | 1185 |
1192 default: | 1186 default: |
1193 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; | 1187 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; |
1194 goto error_exit; | 1188 goto error_exit; |
1195 } | 1189 } |
1196 | 1190 |
1197 error_exit: | 1191 error_exit: |
1198 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | 1192 parse_state_ = BalsaFrameEnums::PARSE_ERROR; |
1199 visitor_->HandleBodyError(this); | 1193 visitor_->HandleBodyError(this); |
1200 }; | 1194 }; |
1201 | 1195 |
1202 // You may note that the state-machine contained within this function has both | 1196 // You may note that the state-machine contained within this function has both |
1203 // switch and goto labels for nearly the same thing. For instance, the | 1197 // switch and goto labels for nearly the same thing. For instance, the |
1204 // following two labels refer to the same code block: | 1198 // following two labels refer to the same code block: |
1205 // label_reading_chunk_data: | 1199 // label_reading_chunk_data: |
1206 // case BalsaFrameEnums::READING_CHUNK_DATA: | 1200 // case BalsaFrameEnums::READING_CHUNK_DATA: |
1207 // The 'case' statement is required for the switch statement which occurs when | 1201 // The 'case' statement is required for the switch statement which occurs when |
1208 // ProcessInput is invoked. The goto label is required as the state-machine | 1202 // ProcessInput is invoked. The goto label is required as the state-machine |
1209 // does not use a computed goto in any subsequent operations. | 1203 // does not use a computed goto in any subsequent operations. |
1210 // | 1204 // |
1211 // Since several states exit the state machine for various reasons, there is | 1205 // Since several states exit the state machine for various reasons, there is |
1212 // also one label at the bottom of the function. When it is appropriate to | 1206 // also one label at the bottom of the function. When it is appropriate to |
1213 // return from the function, that part of the state machine instead issues a | 1207 // return from the function, that part of the state machine instead issues a |
1214 // goto bottom; This results in less code duplication, and makes debugging | 1208 // goto bottom; This results in less code duplication, and makes debugging |
1215 // easier (as you can add a statement to a section of code which is guaranteed | 1209 // easier (as you can add a statement to a section of code which is guaranteed |
1216 // to be invoked when the function is exiting. | 1210 // to be invoked when the function is exiting. |
1217 size_t BalsaFrame::ProcessInput(const char* input, size_t size) { | 1211 size_t BalsaFrame::ProcessInput(const char* input, size_t size) { |
1218 const char* current = input; | 1212 const char* current = input; |
1219 const char* on_entry = current; | 1213 const char* on_entry = current; |
1220 const char* end = current + size; | 1214 const char* end = current + size; |
1221 #if DEBUGFRAMER | 1215 #if DEBUGFRAMER |
1222 LOG(INFO) << "\n==============" | 1216 LOG(INFO) << "\n==============" << BalsaFrameEnums::ParseStateToString( |
1223 << BalsaFrameEnums::ParseStateToString(parse_state_) | 1217 parse_state_) << "===============\n"; |
1224 << "===============\n"; | |
1225 #endif // DEBUGFRAMER | 1218 #endif // DEBUGFRAMER |
1226 | 1219 |
1227 DCHECK(headers_ != NULL); | 1220 DCHECK(headers_ != NULL); |
1228 if (headers_ == NULL) return 0; | 1221 if (headers_ == NULL) |
| 1222 return 0; |
1229 | 1223 |
1230 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { | 1224 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { |
1231 const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); | 1225 const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); |
1232 // Yes, we still have to check this here as the user can change the | 1226 // Yes, we still have to check this here as the user can change the |
1233 // max_header_length amount! | 1227 // max_header_length amount! |
1234 // Also it is possible that we have reached the maximum allowed header size, | 1228 // Also it is possible that we have reached the maximum allowed header size, |
1235 // and we have more to consume (remember we are still inside | 1229 // and we have more to consume (remember we are still inside |
1236 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. | 1230 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. |
1237 if (header_length > max_header_length_ || | 1231 if (header_length > max_header_length_ || |
1238 (header_length == max_header_length_ && size > 0)) { | 1232 (header_length == max_header_length_ && size > 0)) { |
(...skipping 24 matching lines...) Expand all Loading... |
1263 } | 1257 } |
1264 goto bottom; | 1258 goto bottom; |
1265 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || | 1259 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || |
1266 parse_state_ == BalsaFrameEnums::PARSE_ERROR) { | 1260 parse_state_ == BalsaFrameEnums::PARSE_ERROR) { |
1267 // Can do nothing more 'till we're reset. | 1261 // Can do nothing more 'till we're reset. |
1268 goto bottom; | 1262 goto bottom; |
1269 } | 1263 } |
1270 | 1264 |
1271 while (current < end) { | 1265 while (current < end) { |
1272 switch (parse_state_) { | 1266 switch (parse_state_) { |
1273 label_reading_chunk_length: | 1267 label_reading_chunk_length: |
1274 case BalsaFrameEnums::READING_CHUNK_LENGTH: | 1268 case BalsaFrameEnums::READING_CHUNK_LENGTH: |
1275 // In this state we read the chunk length. | 1269 // In this state we read the chunk length. |
1276 // Note that once we hit a character which is not in: | 1270 // Note that once we hit a character which is not in: |
1277 // [0-9;A-Fa-f\n], we transition to a different state. | 1271 // [0-9;A-Fa-f\n], we transition to a different state. |
1278 // | 1272 // |
1279 { | 1273 { |
1280 // If we used strtol, etc, we'd have to buffer this line. | 1274 // If we used strtol, etc, we'd have to buffer this line. |
1281 // This is more annoying than simply doing the conversion | 1275 // This is more annoying than simply doing the conversion |
1282 // here. This code accounts for overflow. | 1276 // here. This code accounts for overflow. |
1283 static const signed char buf[] = { | 1277 static const signed char buf[] = { |
1284 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f | 1278 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f |
1285 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, | 1279 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, |
1286 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f | 1280 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f |
1287 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1288 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f | 1282 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f |
1289 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1283 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1290 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f | 1284 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f |
1291 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, | 1285 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, |
1292 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f | 1286 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f |
1293 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1287 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1294 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f | 1288 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f |
1295 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1289 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1296 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f | 1290 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f |
1297 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1291 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1298 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f | 1292 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f |
1299 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 1293 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
1300 }; | 1294 }; |
1301 // valid cases: | 1295 // valid cases: |
1302 // "09123\n" // -> 09123 | 1296 // "09123\n" // -> 09123 |
1303 // "09123\r\n" // -> 09123 | 1297 // "09123\r\n" // -> 09123 |
1304 // "09123 \n" // -> 09123 | 1298 // "09123 \n" // -> 09123 |
1305 // "09123 \r\n" // -> 09123 | 1299 // "09123 \r\n" // -> 09123 |
1306 // "09123 12312\n" // -> 09123 | 1300 // "09123 12312\n" // -> 09123 |
1307 // "09123 12312\r\n" // -> 09123 | 1301 // "09123 12312\r\n" // -> 09123 |
1308 // "09123; foo=bar\n" // -> 09123 | 1302 // "09123; foo=bar\n" // -> 09123 |
1309 // "09123; foo=bar\r\n" // -> 09123 | 1303 // "09123; foo=bar\r\n" // -> 09123 |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1349 | 1343 |
1350 --current; | 1344 --current; |
1351 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; | 1345 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; |
1352 visitor_->ProcessChunkLength(chunk_length_remaining_); | 1346 visitor_->ProcessChunkLength(chunk_length_remaining_); |
1353 goto label_reading_chunk_extension; | 1347 goto label_reading_chunk_extension; |
1354 } | 1348 } |
1355 } | 1349 } |
1356 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1350 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1357 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH | 1351 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH |
1358 | 1352 |
1359 label_reading_chunk_extension: | 1353 label_reading_chunk_extension: |
1360 case BalsaFrameEnums::READING_CHUNK_EXTENSION: | 1354 case BalsaFrameEnums::READING_CHUNK_EXTENSION: { |
1361 { | 1355 // TODO(phython): Convert this scanning to be 16 bytes at a time if |
1362 // TODO(phython): Convert this scanning to be 16 bytes at a time if | 1356 // there is data to be read. |
1363 // there is data to be read. | 1357 const char* extensions_start = current; |
1364 const char* extensions_start = current; | 1358 size_t extensions_length = 0; |
1365 size_t extensions_length = 0; | 1359 while (current < end) { |
1366 while (current < end) { | 1360 const char c = *current; |
1367 const char c = *current; | 1361 if (c == '\r' || c == '\n') { |
1368 if (c == '\r' || c == '\n') { | 1362 extensions_length = (extensions_start == current) |
1369 extensions_length = | 1363 ? 0 |
1370 (extensions_start == current) ? | 1364 : current - extensions_start - 1; |
1371 0 : | 1365 } |
1372 current - extensions_start - 1; | 1366 |
| 1367 ++current; |
| 1368 if (c == '\n') { |
| 1369 chunk_length_character_extracted_ = false; |
| 1370 visitor_->ProcessChunkExtensions(extensions_start, |
| 1371 extensions_length); |
| 1372 if (chunk_length_remaining_ != 0) { |
| 1373 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; |
| 1374 goto label_reading_chunk_data; |
1373 } | 1375 } |
1374 | 1376 HeaderFramingFound('\n'); |
1375 ++current; | 1377 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; |
1376 if (c == '\n') { | 1378 goto label_reading_last_chunk_term; |
1377 chunk_length_character_extracted_ = false; | |
1378 visitor_->ProcessChunkExtensions( | |
1379 extensions_start, extensions_length); | |
1380 if (chunk_length_remaining_ != 0) { | |
1381 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; | |
1382 goto label_reading_chunk_data; | |
1383 } | |
1384 HeaderFramingFound('\n'); | |
1385 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; | |
1386 goto label_reading_last_chunk_term; | |
1387 } | |
1388 } | 1379 } |
1389 visitor_->ProcessChunkExtensions( | |
1390 extensions_start, extensions_length); | |
1391 } | 1380 } |
| 1381 visitor_->ProcessChunkExtensions(extensions_start, extensions_length); |
| 1382 } |
1392 | 1383 |
1393 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1384 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1394 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION | 1385 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION |
1395 | 1386 |
1396 label_reading_chunk_data: | 1387 label_reading_chunk_data: |
1397 case BalsaFrameEnums::READING_CHUNK_DATA: | 1388 case BalsaFrameEnums::READING_CHUNK_DATA: |
1398 while (current < end) { | 1389 while (current < end) { |
1399 if (chunk_length_remaining_ == 0) { | 1390 if (chunk_length_remaining_ == 0) { |
1400 break; | 1391 break; |
1401 } | 1392 } |
1402 // read in the chunk | 1393 // read in the chunk |
1403 size_t bytes_remaining = end - current; | 1394 size_t bytes_remaining = end - current; |
1404 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ? | 1395 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) |
1405 chunk_length_remaining_ : bytes_remaining; | 1396 ? chunk_length_remaining_ |
| 1397 : bytes_remaining; |
1406 const char* tmp_current = current + consumed_bytes; | 1398 const char* tmp_current = current + consumed_bytes; |
1407 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); | 1399 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); |
1408 visitor_->ProcessBodyData(current, consumed_bytes); | 1400 visitor_->ProcessBodyData(current, consumed_bytes); |
1409 on_entry = current = tmp_current; | 1401 on_entry = current = tmp_current; |
1410 chunk_length_remaining_ -= consumed_bytes; | 1402 chunk_length_remaining_ -= consumed_bytes; |
1411 } | 1403 } |
1412 if (chunk_length_remaining_ == 0) { | 1404 if (chunk_length_remaining_ == 0) { |
1413 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; | 1405 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; |
1414 goto label_reading_chunk_term; | 1406 goto label_reading_chunk_term; |
1415 } | 1407 } |
1416 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1408 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1417 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA | 1409 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA |
1418 | 1410 |
1419 label_reading_chunk_term: | 1411 label_reading_chunk_term: |
1420 case BalsaFrameEnums::READING_CHUNK_TERM: | 1412 case BalsaFrameEnums::READING_CHUNK_TERM: |
1421 while (current < end) { | 1413 while (current < end) { |
1422 const char c = *current; | 1414 const char c = *current; |
1423 ++current; | 1415 ++current; |
1424 | 1416 |
1425 if (c == '\n') { | 1417 if (c == '\n') { |
1426 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; | 1418 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; |
1427 goto label_reading_chunk_length; | 1419 goto label_reading_chunk_length; |
1428 } | 1420 } |
1429 } | 1421 } |
1430 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1422 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1431 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM | 1423 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM |
1432 | 1424 |
1433 label_reading_last_chunk_term: | 1425 label_reading_last_chunk_term: |
1434 case BalsaFrameEnums::READING_LAST_CHUNK_TERM: | 1426 case BalsaFrameEnums::READING_LAST_CHUNK_TERM: |
1435 while (current < end) { | 1427 while (current < end) { |
1436 const char c = *current; | 1428 const char c = *current; |
1437 | 1429 |
1438 if (!HeaderFramingFound(c)) { | 1430 if (!HeaderFramingFound(c)) { |
1439 // If not, however, since the spec only suggests that the | 1431 // If not, however, since the spec only suggests that the |
1440 // client SHOULD indicate the presence of trailers, we get to | 1432 // client SHOULD indicate the presence of trailers, we get to |
1441 // *test* that they did or didn't. | 1433 // *test* that they did or didn't. |
1442 // If all of the bytes we've seen since: | 1434 // If all of the bytes we've seen since: |
1443 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF | 1435 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF |
(...skipping 21 matching lines...) Expand all Loading... |
1465 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 1457 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
1466 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1458 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1467 visitor_->MessageDone(); | 1459 visitor_->MessageDone(); |
1468 goto bottom; | 1460 goto bottom; |
1469 } | 1461 } |
1470 break; // from while loop | 1462 break; // from while loop |
1471 } | 1463 } |
1472 visitor_->ProcessBodyInput(on_entry, current - on_entry); | 1464 visitor_->ProcessBodyInput(on_entry, current - on_entry); |
1473 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM | 1465 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM |
1474 | 1466 |
1475 label_reading_trailer: | 1467 label_reading_trailer: |
1476 case BalsaFrameEnums::READING_TRAILER: | 1468 case BalsaFrameEnums::READING_TRAILER: |
1477 while (current < end) { | 1469 while (current < end) { |
1478 const char c = *current; | 1470 const char c = *current; |
1479 ++current; | 1471 ++current; |
1480 // TODO(fenix): If we ever care about trailers as part of framing, | 1472 // TODO(fenix): If we ever care about trailers as part of framing, |
1481 // deal with them here (see below for part of the 'solution') | 1473 // deal with them here (see below for part of the 'solution') |
1482 // if (LineFramingFound(c)) { | 1474 // if (LineFramingFound(c)) { |
1483 // trailer_lines_.push_back(make_pair(start_of_line_, | 1475 // trailer_lines_.push_back(make_pair(start_of_line_, |
1484 // trailer_length_ - 1)); | 1476 // trailer_length_ - 1)); |
1485 // start_of_line_ = trailer_length_; | 1477 // start_of_line_ = trailer_length_; |
1486 // } | 1478 // } |
1487 if (HeaderFramingFound(c)) { | 1479 if (HeaderFramingFound(c)) { |
1488 // ProcessTrailers(visitor_, &trailers_); | 1480 // ProcessTrailers(visitor_, &trailers_); |
1489 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 1481 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
1490 visitor_->ProcessTrailerInput(on_entry, current - on_entry); | 1482 visitor_->ProcessTrailerInput(on_entry, current - on_entry); |
1491 visitor_->MessageDone(); | 1483 visitor_->MessageDone(); |
1492 goto bottom; | 1484 goto bottom; |
1493 } | 1485 } |
1494 } | 1486 } |
1495 visitor_->ProcessTrailerInput(on_entry, current - on_entry); | 1487 visitor_->ProcessTrailerInput(on_entry, current - on_entry); |
1496 break; // case BalsaFrameEnums::READING_TRAILER | 1488 break; // case BalsaFrameEnums::READING_TRAILER |
1497 | 1489 |
1498 // Note that there is no label: | 1490 // Note that there is no label: |
1499 // 'label_reading_until_close' | 1491 // 'label_reading_until_close' |
1500 // here. This is because the state-machine exists immediately after | 1492 // here. This is because the state-machine exists immediately after |
1501 // reading the headers instead of transitioning here (as it would | 1493 // reading the headers instead of transitioning here (as it would |
1502 // do if it was consuming all the data it could, all the time). | 1494 // do if it was consuming all the data it could, all the time). |
1503 case BalsaFrameEnums::READING_UNTIL_CLOSE: | 1495 case BalsaFrameEnums::READING_UNTIL_CLOSE: { |
1504 { | 1496 const size_t bytes_remaining = end - current; |
1505 const size_t bytes_remaining = end - current; | 1497 if (bytes_remaining > 0) { |
1506 if (bytes_remaining > 0) { | 1498 visitor_->ProcessBodyInput(current, bytes_remaining); |
1507 visitor_->ProcessBodyInput(current, bytes_remaining); | 1499 visitor_->ProcessBodyData(current, bytes_remaining); |
1508 visitor_->ProcessBodyData(current, bytes_remaining); | 1500 current += bytes_remaining; |
1509 current += bytes_remaining; | |
1510 } | |
1511 } | 1501 } |
| 1502 } |
1512 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE | 1503 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE |
1513 | 1504 |
1514 // label_reading_content: | 1505 // label_reading_content: |
1515 case BalsaFrameEnums::READING_CONTENT: | 1506 case BalsaFrameEnums::READING_CONTENT: |
1516 #if DEBUGFRAMER | 1507 #if DEBUGFRAMER |
1517 LOG(INFO) << "ReadingContent: " << content_length_remaining_; | 1508 LOG(INFO) << "ReadingContent: " << content_length_remaining_; |
1518 #endif // DEBUGFRAMER | 1509 #endif // DEBUGFRAMER |
1519 while (content_length_remaining_ && current < end) { | 1510 while (content_length_remaining_ && current < end) { |
1520 // read in the content | 1511 // read in the content |
1521 const size_t bytes_remaining = end - current; | 1512 const size_t bytes_remaining = end - current; |
1522 const size_t consumed_bytes = | 1513 const size_t consumed_bytes = |
1523 (content_length_remaining_ < bytes_remaining) ? | 1514 (content_length_remaining_ < bytes_remaining) |
1524 content_length_remaining_ : bytes_remaining; | 1515 ? content_length_remaining_ |
| 1516 : bytes_remaining; |
1525 visitor_->ProcessBodyInput(current, consumed_bytes); | 1517 visitor_->ProcessBodyInput(current, consumed_bytes); |
1526 visitor_->ProcessBodyData(current, consumed_bytes); | 1518 visitor_->ProcessBodyData(current, consumed_bytes); |
1527 current += consumed_bytes; | 1519 current += consumed_bytes; |
1528 content_length_remaining_ -= consumed_bytes; | 1520 content_length_remaining_ -= consumed_bytes; |
1529 } | 1521 } |
1530 if (content_length_remaining_ == 0) { | 1522 if (content_length_remaining_ == 0) { |
1531 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | 1523 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; |
1532 visitor_->MessageDone(); | 1524 visitor_->MessageDone(); |
1533 } | 1525 } |
1534 goto bottom; // case BalsaFrameEnums::READING_CONTENT | 1526 goto bottom; // case BalsaFrameEnums::READING_CONTENT |
1535 | 1527 |
1536 default: | 1528 default: |
1537 // The state-machine should never be in a state that isn't handled | 1529 // The state-machine should never be in a state that isn't handled |
1538 // above. This is a glaring logic error, and we should do something | 1530 // above. This is a glaring logic error, and we should do something |
1539 // drastic to ensure that this gets looked-at and fixed. | 1531 // drastic to ensure that this gets looked-at and fixed. |
1540 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE | 1532 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE |
1541 << " memory corruption?!"; // COV_NF_LINE | 1533 << " memory corruption?!"; // COV_NF_LINE |
1542 } | 1534 } |
1543 } | 1535 } |
1544 bottom: | 1536 bottom: |
1545 #if DEBUGFRAMER | 1537 #if DEBUGFRAMER |
1546 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" | 1538 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" |
1547 << std::string(input, current) | 1539 << std::string(input, current) << "\n$$$$$$$$$$$$$$" |
1548 << "\n$$$$$$$$$$$$$$" | 1540 << BalsaFrameEnums::ParseStateToString(parse_state_) |
1549 << BalsaFrameEnums::ParseStateToString(parse_state_) | 1541 << "$$$$$$$$$$$$$$$" |
1550 << "$$$$$$$$$$$$$$$" | 1542 << " consumed: " << (current - input); |
1551 << " consumed: " << (current - input); | |
1552 if (Error()) { | 1543 if (Error()) { |
1553 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); | 1544 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); |
1554 } | 1545 } |
1555 #endif // DEBUGFRAMER | 1546 #endif // DEBUGFRAMER |
1556 return current - input; | 1547 return current - input; |
1557 } | 1548 } |
1558 | 1549 |
1559 const uint32 BalsaFrame::kValidTerm1; | 1550 const uint32 BalsaFrame::kValidTerm1; |
1560 const uint32 BalsaFrame::kValidTerm1Mask; | 1551 const uint32 BalsaFrame::kValidTerm1Mask; |
1561 const uint32 BalsaFrame::kValidTerm2; | 1552 const uint32 BalsaFrame::kValidTerm2; |
1562 const uint32 BalsaFrame::kValidTerm2Mask; | 1553 const uint32 BalsaFrame::kValidTerm2Mask; |
1563 | 1554 |
1564 } // namespace net | 1555 } // namespace net |
OLD | NEW |