| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/tools/balsa/balsa_frame.h" | |
| 6 | |
| 7 // Visual C++ defines _M_IX86_FP as 2 if the /arch:SSE2 compiler option is | |
| 8 // specified. | |
| 9 #if !defined(__SSE2__) && _M_IX86_FP == 2 | |
| 10 #define __SSE2__ 1 | |
| 11 #endif | |
| 12 | |
| 13 #include <assert.h> | |
| 14 #if __SSE2__ | |
| 15 #include <emmintrin.h> | |
| 16 #endif // __SSE2__ | |
| 17 | |
| 18 #include <limits> | |
| 19 #include <string> | |
| 20 #include <utility> | |
| 21 #include <vector> | |
| 22 | |
| 23 #include "base/logging.h" | |
| 24 #include "base/strings/string_piece.h" | |
| 25 #include "base/strings/string_util.h" | |
| 26 #include "net/tools/balsa/balsa_enums.h" | |
| 27 #include "net/tools/balsa/balsa_headers.h" | |
| 28 #include "net/tools/balsa/balsa_visitor_interface.h" | |
| 29 #include "net/tools/balsa/buffer_interface.h" | |
| 30 #include "net/tools/balsa/simple_buffer.h" | |
| 31 #include "net/tools/balsa/string_piece_utils.h" | |
| 32 | |
| 33 #if defined(COMPILER_MSVC) | |
| 34 #include <intrin.h> | |
| 35 #include <string.h> | |
| 36 | |
| 37 #pragma intrinsic(_BitScanForward) | |
| 38 | |
| 39 static int ffs(int i) { | |
| 40 unsigned long index; | |
| 41 return _BitScanForward(&index, i) ? index + 1 : 0; | |
| 42 } | |
| 43 | |
| 44 #define strncasecmp _strnicmp | |
| 45 #else | |
| 46 #include <strings.h> | |
| 47 #endif | |
| 48 | |
| 49 namespace net { | |
| 50 | |
| 51 // Constants holding some header names for headers which can affect the way the | |
| 52 // HTTP message is framed, and so must be processed specially: | |
| 53 static const char kContentLength[] = "content-length"; | |
| 54 static const size_t kContentLengthSize = sizeof(kContentLength) - 1; | |
| 55 static const char kTransferEncoding[] = "transfer-encoding"; | |
| 56 static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1; | |
| 57 | |
| 58 BalsaFrame::BalsaFrame() | |
| 59 : last_char_was_slash_r_(false), | |
| 60 saw_non_newline_char_(false), | |
| 61 start_was_space_(true), | |
| 62 chunk_length_character_extracted_(false), | |
| 63 is_request_(true), | |
| 64 request_was_head_(false), | |
| 65 max_header_length_(16 * 1024), | |
| 66 max_request_uri_length_(2048), | |
| 67 visitor_(&do_nothing_visitor_), | |
| 68 chunk_length_remaining_(0), | |
| 69 content_length_remaining_(0), | |
| 70 last_slash_n_loc_(NULL), | |
| 71 last_recorded_slash_n_loc_(NULL), | |
| 72 last_slash_n_idx_(0), | |
| 73 term_chars_(0), | |
| 74 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), | |
| 75 last_error_(BalsaFrameEnums::NO_ERROR), | |
| 76 headers_(NULL) { | |
| 77 } | |
| 78 | |
| 79 BalsaFrame::~BalsaFrame() {} | |
| 80 | |
| 81 void BalsaFrame::Reset() { | |
| 82 last_char_was_slash_r_ = false; | |
| 83 saw_non_newline_char_ = false; | |
| 84 start_was_space_ = true; | |
| 85 chunk_length_character_extracted_ = false; | |
| 86 // is_request_ = true; // not reset between messages. | |
| 87 // request_was_head_ = false; // not reset between messages. | |
| 88 // max_header_length_ = 4096; // not reset between messages. | |
| 89 // max_request_uri_length_ = 2048; // not reset between messages. | |
| 90 // visitor_ = &do_nothing_visitor_; // not reset between messages. | |
| 91 chunk_length_remaining_ = 0; | |
| 92 content_length_remaining_ = 0; | |
| 93 last_slash_n_loc_ = NULL; | |
| 94 last_recorded_slash_n_loc_ = NULL; | |
| 95 last_slash_n_idx_ = 0; | |
| 96 term_chars_ = 0; | |
| 97 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE; | |
| 98 last_error_ = BalsaFrameEnums::NO_ERROR; | |
| 99 lines_.clear(); | |
| 100 if (headers_ != NULL) { | |
| 101 headers_->Clear(); | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 const char* BalsaFrameEnums::ParseStateToString( | |
| 106 BalsaFrameEnums::ParseState error_code) { | |
| 107 switch (error_code) { | |
| 108 case PARSE_ERROR: | |
| 109 return "PARSE_ERROR"; | |
| 110 case READING_HEADER_AND_FIRSTLINE: | |
| 111 return "READING_HEADER_AND_FIRSTLINE"; | |
| 112 case READING_CHUNK_LENGTH: | |
| 113 return "READING_CHUNK_LENGTH"; | |
| 114 case READING_CHUNK_EXTENSION: | |
| 115 return "READING_CHUNK_EXTENSION"; | |
| 116 case READING_CHUNK_DATA: | |
| 117 return "READING_CHUNK_DATA"; | |
| 118 case READING_CHUNK_TERM: | |
| 119 return "READING_CHUNK_TERM"; | |
| 120 case READING_LAST_CHUNK_TERM: | |
| 121 return "READING_LAST_CHUNK_TERM"; | |
| 122 case READING_TRAILER: | |
| 123 return "READING_TRAILER"; | |
| 124 case READING_UNTIL_CLOSE: | |
| 125 return "READING_UNTIL_CLOSE"; | |
| 126 case READING_CONTENT: | |
| 127 return "READING_CONTENT"; | |
| 128 case MESSAGE_FULLY_READ: | |
| 129 return "MESSAGE_FULLY_READ"; | |
| 130 case NUM_STATES: | |
| 131 return "UNKNOWN_STATE"; | |
| 132 } | |
| 133 return "UNKNOWN_STATE"; | |
| 134 } | |
| 135 | |
| 136 const char* BalsaFrameEnums::ErrorCodeToString( | |
| 137 BalsaFrameEnums::ErrorCode error_code) { | |
| 138 switch (error_code) { | |
| 139 case NO_ERROR: | |
| 140 return "NO_ERROR"; | |
| 141 case NO_STATUS_LINE_IN_RESPONSE: | |
| 142 return "NO_STATUS_LINE_IN_RESPONSE"; | |
| 143 case NO_REQUEST_LINE_IN_REQUEST: | |
| 144 return "NO_REQUEST_LINE_IN_REQUEST"; | |
| 145 case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION: | |
| 146 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION"; | |
| 147 case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD: | |
| 148 return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD"; | |
| 149 case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE: | |
| 150 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE"; | |
| 151 case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI: | |
| 152 return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI"; | |
| 153 case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE: | |
| 154 return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE"; | |
| 155 case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION: | |
| 156 return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION"; | |
| 157 case FAILED_CONVERTING_STATUS_CODE_TO_INT: | |
| 158 return "FAILED_CONVERTING_STATUS_CODE_TO_INT"; | |
| 159 case REQUEST_URI_TOO_LONG: | |
| 160 return "REQUEST_URI_TOO_LONG"; | |
| 161 case HEADERS_TOO_LONG: | |
| 162 return "HEADERS_TOO_LONG"; | |
| 163 case UNPARSABLE_CONTENT_LENGTH: | |
| 164 return "UNPARSABLE_CONTENT_LENGTH"; | |
| 165 case MAYBE_BODY_BUT_NO_CONTENT_LENGTH: | |
| 166 return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH"; | |
| 167 case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH: | |
| 168 return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH"; | |
| 169 case HEADER_MISSING_COLON: | |
| 170 return "HEADER_MISSING_COLON"; | |
| 171 case INVALID_CHUNK_LENGTH: | |
| 172 return "INVALID_CHUNK_LENGTH"; | |
| 173 case CHUNK_LENGTH_OVERFLOW: | |
| 174 return "CHUNK_LENGTH_OVERFLOW"; | |
| 175 case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO: | |
| 176 return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO"; | |
| 177 case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT: | |
| 178 return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT"; | |
| 179 case MULTIPLE_CONTENT_LENGTH_KEYS: | |
| 180 return "MULTIPLE_CONTENT_LENGTH_KEYS"; | |
| 181 case MULTIPLE_TRANSFER_ENCODING_KEYS: | |
| 182 return "MULTIPLE_TRANSFER_ENCODING_KEYS"; | |
| 183 case UNKNOWN_TRANSFER_ENCODING: | |
| 184 return "UNKNOWN_TRANSFER_ENCODING"; | |
| 185 case INVALID_HEADER_FORMAT: | |
| 186 return "INVALID_HEADER_FORMAT"; | |
| 187 case INTERNAL_LOGIC_ERROR: | |
| 188 return "INTERNAL_LOGIC_ERROR"; | |
| 189 case NUM_ERROR_CODES: | |
| 190 return "UNKNOWN_ERROR"; | |
| 191 } | |
| 192 return "UNKNOWN_ERROR"; | |
| 193 } | |
| 194 | |
| 195 // Summary: | |
| 196 // Parses the first line of either a request or response. | |
| 197 // Note that in the case of a detected warning, error_code will be set | |
| 198 // but the function will not return false. | |
| 199 // Exactly zero or one warning or error (but not both) may be detected | |
| 200 // by this function. | |
| 201 // Note that this function will not write the data of the first-line | |
| 202 // into the header's buffer (that should already have been done elsewhere). | |
| 203 // | |
| 204 // Pre-conditions: | |
| 205 // begin != end | |
| 206 // *begin should be a character which is > ' '. This implies that there | |
| 207 // is at least one non-whitespace characters between [begin, end). | |
| 208 // headers is a valid pointer to a BalsaHeaders class. | |
| 209 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value. | |
| 210 // Entire first line must exist between [begin, end) | |
| 211 // Exactly zero or one newlines -may- exist between [begin, end) | |
| 212 // [begin, end) should exist in the header's buffer. | |
| 213 // | |
| 214 // Side-effects: | |
| 215 // headers will be modified | |
| 216 // error_code may be modified if either a warning or error is detected | |
| 217 // | |
| 218 // Returns: | |
| 219 // True if no error (as opposed to warning) is detected. | |
| 220 // False if an error (as opposed to warning) is detected. | |
| 221 | |
| 222 // | |
| 223 // If there is indeed non-whitespace in the line, then the following | |
| 224 // will take care of this for you: | |
| 225 // while (*begin <= ' ') ++begin; | |
| 226 // ProcessFirstLine(begin, end, is_request, &headers, &error_code); | |
| 227 // | |
| 228 bool ParseHTTPFirstLine(const char* begin, | |
| 229 const char* end, | |
| 230 bool is_request, | |
| 231 size_t max_request_uri_length, | |
| 232 BalsaHeaders* headers, | |
| 233 BalsaFrameEnums::ErrorCode* error_code) { | |
| 234 const char* current = begin; | |
| 235 // HTTP firstlines all have the following structure: | |
| 236 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF | |
| 237 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n" | |
| 238 // ws1 nws1 ws2 nws2 ws3 nws3 ws4 | |
| 239 // | [-------) [-------) [----------------) | |
| 240 // REQ: method request_uri version | |
| 241 // RESP: version statuscode reason | |
| 242 // | |
| 243 // The first NONWS->LWS component we'll call firstline_a. | |
| 244 // The second firstline_b, and the third firstline_c. | |
| 245 // | |
| 246 // firstline_a goes from nws1 to (but not including) ws2 | |
| 247 // firstline_b goes from nws2 to (but not including) ws3 | |
| 248 // firstline_c goes from nws3 to (but not including) ws4 | |
| 249 // | |
| 250 // In the code: | |
| 251 // ws1 == whitespace_1_idx_ | |
| 252 // nws1 == non_whitespace_1_idx_ | |
| 253 // ws2 == whitespace_2_idx_ | |
| 254 // nws2 == non_whitespace_2_idx_ | |
| 255 // ws3 == whitespace_3_idx_ | |
| 256 // nws3 == non_whitespace_3_idx_ | |
| 257 // ws4 == whitespace_4_idx_ | |
| 258 | |
| 259 // Kill all whitespace (including '\r\n') at the end of the line. | |
| 260 --end; | |
| 261 if (*end != '\n') { | |
| 262 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; | |
| 263 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" | |
| 264 << headers->OriginalHeadersForDebugging(); | |
| 265 return false; | |
| 266 } | |
| 267 while (begin < end && *end <= ' ') { | |
| 268 --end; | |
| 269 } | |
| 270 DCHECK(*end != '\n'); | |
| 271 if (*end == '\n') { | |
| 272 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; | |
| 273 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" | |
| 274 << headers->OriginalHeadersForDebugging(); | |
| 275 return false; | |
| 276 } | |
| 277 ++end; | |
| 278 | |
| 279 // The two following statements should not be possible. | |
| 280 if (end == begin) { | |
| 281 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; | |
| 282 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" | |
| 283 << headers->OriginalHeadersForDebugging(); | |
| 284 return false; | |
| 285 } | |
| 286 | |
| 287 // whitespace_1_idx_ | |
| 288 headers->whitespace_1_idx_ = current - begin; | |
| 289 // This loop is commented out as it is never used in current code. This is | |
| 290 // true only because we don't begin parsing the headers at all until we've | |
| 291 // encountered a non whitespace character at the beginning of the stream, at | |
| 292 // which point we begin our demarcation of header-start. If we did -not- do | |
| 293 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop | |
| 294 // would be necessary for the proper functioning of this parsing. | |
| 295 // This is left here as this function may (in the future) be refactored out | |
| 296 // of the BalsaFrame class so that it may be shared between code in | |
| 297 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the | |
| 298 // set_first_line() function (at which point it would be necessary). | |
| 299 #if 0 | |
| 300 while (*current <= ' ') { | |
| 301 ++current; | |
| 302 } | |
| 303 #endif | |
| 304 // non_whitespace_1_idx_ | |
| 305 headers->non_whitespace_1_idx_ = current - begin; | |
| 306 do { | |
| 307 // The first time through, we're guaranteed that the current character | |
| 308 // won't be a whitespace (else the loop above wouldn't have terminated). | |
| 309 // That implies that we're guaranteed to get at least one non-whitespace | |
| 310 // character if we get into this loop at all. | |
| 311 ++current; | |
| 312 if (current == end) { | |
| 313 headers->whitespace_2_idx_ = current - begin; | |
| 314 headers->non_whitespace_2_idx_ = current - begin; | |
| 315 headers->whitespace_3_idx_ = current - begin; | |
| 316 headers->non_whitespace_3_idx_ = current - begin; | |
| 317 headers->whitespace_4_idx_ = current - begin; | |
| 318 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request | |
| 319 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response | |
| 320 *error_code = | |
| 321 static_cast<BalsaFrameEnums::ErrorCode>( | |
| 322 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + | |
| 323 is_request); | |
| 324 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION | |
| 325 return false; | |
| 326 } | |
| 327 goto output_exhausted; | |
| 328 } | |
| 329 } while (*current > ' '); | |
| 330 // whitespace_2_idx_ | |
| 331 headers->whitespace_2_idx_ = current - begin; | |
| 332 do { | |
| 333 ++current; | |
| 334 // Note that due to the loop which consumes all of the whitespace | |
| 335 // at the end of the line, current can never == end while in this function. | |
| 336 } while (*current <= ' '); | |
| 337 // non_whitespace_2_idx_ | |
| 338 headers->non_whitespace_2_idx_ = current - begin; | |
| 339 do { | |
| 340 ++current; | |
| 341 if (current == end) { | |
| 342 headers->whitespace_3_idx_ = current - begin; | |
| 343 headers->non_whitespace_3_idx_ = current - begin; | |
| 344 headers->whitespace_4_idx_ = current - begin; | |
| 345 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request | |
| 346 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response | |
| 347 *error_code = | |
| 348 static_cast<BalsaFrameEnums::ErrorCode>( | |
| 349 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE | |
| 350 + is_request); | |
| 351 goto output_exhausted; | |
| 352 } | |
| 353 } while (*current > ' '); | |
| 354 // whitespace_3_idx_ | |
| 355 headers->whitespace_3_idx_ = current - begin; | |
| 356 do { | |
| 357 ++current; | |
| 358 // Note that due to the loop which consumes all of the whitespace | |
| 359 // at the end of the line, current can never == end while in this function. | |
| 360 } while (*current <= ' '); | |
| 361 // non_whitespace_3_idx_ | |
| 362 headers->non_whitespace_3_idx_ = current - begin; | |
| 363 headers->whitespace_4_idx_ = end - begin; | |
| 364 | |
| 365 output_exhausted: | |
| 366 // Note that we don't fail the parse immediately when parsing of the | |
| 367 // firstline fails. Depending on the protocol type, we may want to accept | |
| 368 // a firstline with only one or two elements, e.g., for HTTP/0.9: | |
| 369 // GET\r\n | |
| 370 // or | |
| 371 // GET /\r\n | |
| 372 // should be parsed without issue (though the visitor should know that | |
| 373 // parsing the entire line was not exactly as it should be). | |
| 374 // | |
| 375 // Eventually, these errors may be removed alltogether, as the visitor can | |
| 376 // detect them on its own by examining the size of the various fields. | |
| 377 // headers->set_first_line(non_whitespace_1_idx_, current); | |
| 378 | |
| 379 if (is_request) { | |
| 380 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > | |
| 381 max_request_uri_length) { | |
| 382 // For requests, we need at least the method. We could assume that a | |
| 383 // blank URI means "/". If version isn't stated, it should be assumed | |
| 384 // to be HTTP/0.9 by the visitor. | |
| 385 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; | |
| 386 return false; | |
| 387 } | |
| 388 } else { | |
| 389 headers->parsed_response_code_ = 0; | |
| 390 { | |
| 391 const char* parsed_response_code_current = | |
| 392 begin + headers->non_whitespace_2_idx_; | |
| 393 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; | |
| 394 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; | |
| 395 | |
| 396 // Convert a string of [0-9]* into an int. | |
| 397 // Note that this allows for the conversion of response codes which | |
| 398 // are outside the bounds of normal HTTP response codes (no checking | |
| 399 // is done to ensure that these are valid-- they're merely parsed)! | |
| 400 while (parsed_response_code_current < parsed_response_code_end) { | |
| 401 if (*parsed_response_code_current < '0' || | |
| 402 *parsed_response_code_current > '9') { | |
| 403 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; | |
| 404 return false; | |
| 405 } | |
| 406 size_t status_code_x_10 = headers->parsed_response_code_ * 10; | |
| 407 uint8_t c = *parsed_response_code_current - '0'; | |
| 408 if ((headers->parsed_response_code_ > kMaxDiv10) || | |
| 409 (std::numeric_limits<size_t>::max() - status_code_x_10) < c) { | |
| 410 // overflow. | |
| 411 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; | |
| 412 return false; | |
| 413 } | |
| 414 headers->parsed_response_code_ = status_code_x_10 + c; | |
| 415 ++parsed_response_code_current; | |
| 416 } | |
| 417 } | |
| 418 } | |
| 419 return true; | |
| 420 } | |
| 421 | |
| 422 // begin - beginning of the firstline | |
| 423 // end - end of the firstline | |
| 424 // | |
| 425 // A precondition for this function is that there is non-whitespace between | |
| 426 // [begin, end). If this precondition is not met, the function will not perform | |
| 427 // as expected (and bad things may happen, and it will eat your first, second, | |
| 428 // and third unborn children!). | |
| 429 // | |
| 430 // Another precondition for this function is that [begin, end) includes | |
| 431 // at most one newline, which must be at the end of the line. | |
| 432 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) { | |
| 433 BalsaFrameEnums::ErrorCode previous_error = last_error_; | |
| 434 if (!ParseHTTPFirstLine(begin, | |
| 435 end, | |
| 436 is_request_, | |
| 437 max_request_uri_length_, | |
| 438 headers_, | |
| 439 &last_error_)) { | |
| 440 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 441 visitor_->HandleHeaderError(this); | |
| 442 return; | |
| 443 } | |
| 444 if (previous_error != last_error_) { | |
| 445 visitor_->HandleHeaderWarning(this); | |
| 446 } | |
| 447 | |
| 448 if (is_request_) { | |
| 449 size_t version_length = | |
| 450 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_; | |
| 451 visitor_->ProcessRequestFirstLine( | |
| 452 begin + headers_->non_whitespace_1_idx_, | |
| 453 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, | |
| 454 begin + headers_->non_whitespace_1_idx_, | |
| 455 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, | |
| 456 begin + headers_->non_whitespace_2_idx_, | |
| 457 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, | |
| 458 begin + headers_->non_whitespace_3_idx_, | |
| 459 version_length); | |
| 460 if (version_length == 0) | |
| 461 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 462 } else { | |
| 463 visitor_->ProcessResponseFirstLine( | |
| 464 begin + headers_->non_whitespace_1_idx_, | |
| 465 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, | |
| 466 begin + headers_->non_whitespace_1_idx_, | |
| 467 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, | |
| 468 begin + headers_->non_whitespace_2_idx_, | |
| 469 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, | |
| 470 begin + headers_->non_whitespace_3_idx_, | |
| 471 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_); | |
| 472 } | |
| 473 } | |
| 474 | |
| 475 // 'stream_begin' points to the first character of the headers buffer. | |
| 476 // 'line_begin' points to the first character of the line. | |
| 477 // 'current' points to a char which is ':'. | |
| 478 // 'line_end' points to the position of '\n' + 1. | |
| 479 // 'line_begin' points to the position of first character of line. | |
| 480 void BalsaFrame::CleanUpKeyValueWhitespace( | |
| 481 const char* stream_begin, | |
| 482 const char* line_begin, | |
| 483 const char* current, | |
| 484 const char* line_end, | |
| 485 HeaderLineDescription* current_header_line) { | |
| 486 const char* colon_loc = current; | |
| 487 DCHECK_LT(colon_loc, line_end); | |
| 488 DCHECK_EQ(':', *colon_loc); | |
| 489 DCHECK_EQ(':', *current); | |
| 490 DCHECK_GE(' ', *line_end) | |
| 491 << "\"" << std::string(line_begin, line_end) << "\""; | |
| 492 | |
| 493 // TODO(fenix): Investigate whether or not the bounds tests in the | |
| 494 // while loops here are redundant, and if so, remove them. | |
| 495 --current; | |
| 496 while (current > line_begin && *current <= ' ') --current; | |
| 497 current += (current != colon_loc); | |
| 498 current_header_line->key_end_idx = current - stream_begin; | |
| 499 | |
| 500 current = colon_loc; | |
| 501 DCHECK_EQ(':', *current); | |
| 502 ++current; | |
| 503 while (current < line_end && *current <= ' ') ++current; | |
| 504 current_header_line->value_begin_idx = current - stream_begin; | |
| 505 | |
| 506 DCHECK_GE(current_header_line->key_end_idx, | |
| 507 current_header_line->first_char_idx); | |
| 508 DCHECK_GE(current_header_line->value_begin_idx, | |
| 509 current_header_line->key_end_idx); | |
| 510 DCHECK_GE(current_header_line->last_char_idx, | |
| 511 current_header_line->value_begin_idx); | |
| 512 } | |
| 513 | |
| 514 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() { | |
| 515 DCHECK(!lines_.empty()); | |
| 516 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | |
| 517 // The last line is always just a newline (and is uninteresting). | |
| 518 const Lines::size_type lines_size_m1 = lines_.size() - 1; | |
| 519 #if __SSE2__ | |
| 520 const __m128i colons = _mm_set1_epi8(':'); | |
| 521 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; | |
| 522 #endif // __SSE2__ | |
| 523 const char* current = stream_begin + lines_[1].first; | |
| 524 // This code is a bit more subtle than it may appear at first glance. | |
| 525 // This code looks for a colon in the current line... but it also looks | |
| 526 // beyond the current line. If there is no colon in the current line, then | |
| 527 // for each subsequent line (until the colon which -has- been found is | |
| 528 // associated with a line), no searching for a colon will be performed. In | |
| 529 // this way, we minimize the amount of bytes we have scanned for a colon. | |
| 530 for (Lines::size_type i = 1; i < lines_size_m1;) { | |
| 531 const char* line_begin = stream_begin + lines_[i].first; | |
| 532 | |
| 533 // Here we handle possible continuations. Note that we do not replace | |
| 534 // the '\n' in the line before a continuation (at least, as of now), | |
| 535 // which implies that any code which looks for a value must deal with | |
| 536 // "\r\n", etc -within- the line (and not just at the end of it). | |
| 537 for (++i; i < lines_size_m1; ++i) { | |
| 538 const char c = *(stream_begin + lines_[i].first); | |
| 539 if (c > ' ') { | |
| 540 // Not a continuation, so stop. Note that if the 'original' i = 1, | |
| 541 // and the next line is not a continuation, we'll end up with i = 2 | |
| 542 // when we break. This handles the incrementing of i for the outer | |
| 543 // loop. | |
| 544 break; | |
| 545 } | |
| 546 } | |
| 547 const char* line_end = stream_begin + lines_[i - 1].second; | |
| 548 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); | |
| 549 | |
| 550 // We cleanup the whitespace at the end of the line before doing anything | |
| 551 // else of interest as it allows us to do nothing when irregularly formatted | |
| 552 // headers are parsed (e.g. those with only keys, only values, or no colon). | |
| 553 // | |
| 554 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. | |
| 555 --line_end; | |
| 556 DCHECK_EQ('\n', *line_end) | |
| 557 << "\"" << std::string(line_begin, line_end) << "\""; | |
| 558 while (*line_end <= ' ' && line_end > line_begin) { | |
| 559 --line_end; | |
| 560 } | |
| 561 ++line_end; | |
| 562 DCHECK_GE(' ', *line_end); | |
| 563 DCHECK_LT(line_begin, line_end); | |
| 564 | |
| 565 // We use '0' for the block idx, because we're always writing to the first | |
| 566 // block from the framer (we do this because the framer requires that the | |
| 567 // entire header sequence be in a contiguous buffer). | |
| 568 headers_->header_lines_.push_back( | |
| 569 HeaderLineDescription(line_begin - stream_begin, | |
| 570 line_end - stream_begin, | |
| 571 line_end - stream_begin, | |
| 572 line_end - stream_begin, | |
| 573 0)); | |
| 574 if (current >= line_end) { | |
| 575 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; | |
| 576 visitor_->HandleHeaderWarning(this); | |
| 577 // Then the next colon will not be found within this header line-- time | |
| 578 // to try again with another header-line. | |
| 579 continue; | |
| 580 } else if (current < line_begin) { | |
| 581 // When this condition is true, the last detected colon was part of a | |
| 582 // previous line. We reset to the beginning of the line as we don't care | |
| 583 // about the presence of any colon before the beginning of the current | |
| 584 // line. | |
| 585 current = line_begin; | |
| 586 } | |
| 587 #if __SSE2__ | |
| 588 while (current < header_lines_end_m16) { | |
| 589 __m128i header_bytes = | |
| 590 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current)); | |
| 591 __m128i colon_cmp = _mm_cmpeq_epi8(header_bytes, colons); | |
| 592 int colon_msk = _mm_movemask_epi8(colon_cmp); | |
| 593 if (colon_msk == 0) { | |
| 594 current += 16; | |
| 595 continue; | |
| 596 } | |
| 597 current += (ffs(colon_msk) - 1); | |
| 598 if (current > line_end) { | |
| 599 break; | |
| 600 } | |
| 601 goto found_colon; | |
| 602 } | |
| 603 #endif // __SSE2__ | |
| 604 for (; current < line_end; ++current) { | |
| 605 if (*current != ':') { | |
| 606 continue; | |
| 607 } | |
| 608 goto found_colon; | |
| 609 } | |
| 610 // If we've gotten to here, then there was no colon | |
| 611 // in the line. The arguments we passed into the construction | |
| 612 // for the HeaderLineDescription object should be OK-- it assumes | |
| 613 // that the entire content is 'key' by default (which is true, as | |
| 614 // there was no colon, there can be no value). Note that this is a | |
| 615 // construct which is technically not allowed by the spec. | |
| 616 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; | |
| 617 visitor_->HandleHeaderWarning(this); | |
| 618 continue; | |
| 619 found_colon: | |
| 620 DCHECK_EQ(*current, ':'); | |
| 621 DCHECK_LE(current - stream_begin, line_end - stream_begin); | |
| 622 DCHECK_LE(stream_begin - stream_begin, current - stream_begin); | |
| 623 | |
| 624 HeaderLineDescription& current_header_line = headers_->header_lines_.back(); | |
| 625 current_header_line.key_end_idx = current - stream_begin; | |
| 626 current_header_line.value_begin_idx = current_header_line.key_end_idx; | |
| 627 if (current < line_end) { | |
| 628 ++current_header_line.key_end_idx; | |
| 629 | |
| 630 CleanUpKeyValueWhitespace(stream_begin, | |
| 631 line_begin, | |
| 632 current, | |
| 633 line_end, | |
| 634 ¤t_header_line); | |
| 635 } | |
| 636 } | |
| 637 } | |
| 638 | |
| 639 void BalsaFrame::ProcessContentLengthLine( | |
| 640 HeaderLines::size_type line_idx, | |
| 641 BalsaHeadersEnums::ContentLengthStatus* status, | |
| 642 size_t* length) { | |
| 643 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; | |
| 644 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | |
| 645 const char* line_end = stream_begin + header_line.last_char_idx; | |
| 646 const char* value_begin = (stream_begin + header_line.value_begin_idx); | |
| 647 | |
| 648 if (value_begin >= line_end) { | |
| 649 // There is no non-whitespace value data. | |
| 650 #if DEBUGFRAMER | |
| 651 LOG(INFO) << "invalid content-length -- no non-whitespace value data"; | |
| 652 #endif | |
| 653 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; | |
| 654 return; | |
| 655 } | |
| 656 | |
| 657 *length = 0; | |
| 658 while (value_begin < line_end) { | |
| 659 if (*value_begin < '0' || *value_begin > '9') { | |
| 660 // bad! content-length found, and couldn't parse all of it! | |
| 661 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; | |
| 662 #if DEBUGFRAMER | |
| 663 LOG(INFO) << "invalid content-length - non numeric character detected"; | |
| 664 #endif // DEBUGFRAMER | |
| 665 return; | |
| 666 } | |
| 667 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; | |
| 668 size_t length_x_10 = *length * 10; | |
| 669 const unsigned char c = *value_begin - '0'; | |
| 670 if (*length > kMaxDiv10 || | |
| 671 (std::numeric_limits<size_t>::max() - length_x_10) < c) { | |
| 672 *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW; | |
| 673 #if DEBUGFRAMER | |
| 674 LOG(INFO) << "content-length overflow"; | |
| 675 #endif // DEBUGFRAMER | |
| 676 return; | |
| 677 } | |
| 678 *length = length_x_10 + c; | |
| 679 ++value_begin; | |
| 680 } | |
| 681 #if DEBUGFRAMER | |
| 682 LOG(INFO) << "content_length parsed: " << *length; | |
| 683 #endif // DEBUGFRAMER | |
| 684 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; | |
| 685 } | |
| 686 | |
| 687 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { | |
| 688 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; | |
| 689 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | |
| 690 const char* line_end = stream_begin + header_line.last_char_idx; | |
| 691 const char* value_begin = stream_begin + header_line.value_begin_idx; | |
| 692 size_t value_length = line_end - value_begin; | |
| 693 | |
| 694 if ((value_length == 7) && | |
| 695 !strncasecmp(value_begin, "chunked", 7)) { | |
| 696 headers_->transfer_encoding_is_chunked_ = true; | |
| 697 } else if ((value_length == 8) && | |
| 698 !strncasecmp(value_begin, "identity", 8)) { | |
| 699 headers_->transfer_encoding_is_chunked_ = false; | |
| 700 } else { | |
| 701 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; | |
| 702 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 703 visitor_->HandleHeaderError(this); | |
| 704 return; | |
| 705 } | |
| 706 } | |
| 707 | |
| 708 namespace { | |
| 709 bool SplitStringPiece(base::StringPiece original, char delim, | |
| 710 base::StringPiece* before, base::StringPiece* after) { | |
| 711 const char* p = original.data(); | |
| 712 const char* end = p + original.size(); | |
| 713 | |
| 714 while (p != end) { | |
| 715 if (*p == delim) { | |
| 716 ++p; | |
| 717 } else { | |
| 718 const char* start = p; | |
| 719 while (++p != end && *p != delim) { | |
| 720 // Skip to the next occurence of the delimiter. | |
| 721 } | |
| 722 *before = base::StringPiece(start, p - start); | |
| 723 if (p != end) | |
| 724 *after = base::StringPiece(p + 1, end - (p + 1)); | |
| 725 else | |
| 726 *after = base::StringPiece(""); | |
| 727 *before = base::TrimWhitespaceASCII(*before, base::TRIM_ALL); | |
| 728 *after = base::TrimWhitespaceASCII(*after, base::TRIM_ALL); | |
| 729 return true; | |
| 730 } | |
| 731 } | |
| 732 | |
| 733 *before = original; | |
| 734 *after = ""; | |
| 735 return false; | |
| 736 } | |
| 737 | |
| 738 // TODO(phython): Fix this function to properly deal with quoted values. | |
| 739 // E.g. ";;foo", "\";;\"", or \"aa; | |
| 740 // The last example, the semi-colon is a separator between extensions. | |
| 741 void ProcessChunkExtensionsManual(base::StringPiece all_extensions, | |
| 742 BalsaHeaders* extensions) { | |
| 743 base::StringPiece extension; | |
| 744 base::StringPiece remaining; | |
| 745 all_extensions = base::TrimWhitespaceASCII(all_extensions, base::TRIM_ALL); | |
| 746 SplitStringPiece(all_extensions, ';', &extension, &remaining); | |
| 747 while (!extension.empty()) { | |
| 748 base::StringPiece key; | |
| 749 base::StringPiece value; | |
| 750 SplitStringPiece(extension, '=', &key, &value); | |
| 751 if (!value.empty()) { | |
| 752 // Strip quotation marks if they exist. | |
| 753 if (!value.empty() && value.front() == '"') | |
| 754 value.remove_prefix(1); | |
| 755 if (!value.empty() && value.back() == '"') | |
| 756 value.remove_suffix(1); | |
| 757 } | |
| 758 | |
| 759 extensions->AppendHeader(key, value); | |
| 760 | |
| 761 remaining = base::TrimWhitespaceASCII(remaining, base::TRIM_ALL); | |
| 762 SplitStringPiece(remaining, ';', &extension, &remaining); | |
| 763 } | |
| 764 } | |
| 765 | |
| 766 } // anonymous namespace | |
| 767 | |
| 768 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size, | |
| 769 BalsaHeaders* extensions) { | |
| 770 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions); | |
| 771 } | |
| 772 | |
| 773 void BalsaFrame::ProcessHeaderLines() { | |
| 774 HeaderLines::size_type content_length_idx = 0; | |
| 775 HeaderLines::size_type transfer_encoding_idx = 0; | |
| 776 | |
| 777 DCHECK(!lines_.empty()); | |
| 778 #if DEBUGFRAMER | |
| 779 LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; | |
| 780 #endif // DEBUGFRAMER | |
| 781 | |
| 782 // There is no need to attempt to process headers if no header lines exist. | |
| 783 // There are at least two lines in the message which are not header lines. | |
| 784 // These two non-header lines are the first line of the message, and the | |
| 785 // last line of the message (which is an empty line). | |
| 786 // Thus, we test to see if we have more than two lines total before attempting | |
| 787 // to parse any header lines. | |
| 788 if (lines_.size() > 2) { | |
| 789 const char* stream_begin = headers_->OriginalHeaderStreamBegin(); | |
| 790 | |
| 791 // Then, for the rest of the header data, we parse these into key-value | |
| 792 // pairs. | |
| 793 FindColonsAndParseIntoKeyValue(); | |
| 794 // At this point, we've parsed all of the headers. Time to look for those | |
| 795 // headers which we require for framing. | |
| 796 const HeaderLines::size_type | |
| 797 header_lines_size = headers_->header_lines_.size(); | |
| 798 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { | |
| 799 const HeaderLineDescription& current_header_line = | |
| 800 headers_->header_lines_[i]; | |
| 801 const char* key_begin = | |
| 802 (stream_begin + current_header_line.first_char_idx); | |
| 803 const char* key_end = (stream_begin + current_header_line.key_end_idx); | |
| 804 const size_t key_len = key_end - key_begin; | |
| 805 const char c = *key_begin; | |
| 806 #if DEBUGFRAMER | |
| 807 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len) | |
| 808 << " c: '" << c << "' key_len: " << key_len; | |
| 809 #endif // DEBUGFRAMER | |
| 810 // If a header begins with either lowercase or uppercase 'c' or 't', then | |
| 811 // the header may be one of content-length, connection, content-encoding | |
| 812 // or transfer-encoding. These headers are special, as they change the way | |
| 813 // that the message is framed, and so the framer is required to search | |
| 814 // for them. | |
| 815 | |
| 816 | |
| 817 if (c == 'c' || c == 'C') { | |
| 818 if ((key_len == kContentLengthSize) && | |
| 819 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { | |
| 820 BalsaHeadersEnums::ContentLengthStatus content_length_status = | |
| 821 BalsaHeadersEnums::NO_CONTENT_LENGTH; | |
| 822 size_t length = 0; | |
| 823 ProcessContentLengthLine(i, &content_length_status, &length); | |
| 824 if (content_length_idx != 0) { // then we've already seen one! | |
| 825 if ((headers_->content_length_status_ != content_length_status) || | |
| 826 ((headers_->content_length_status_ == | |
| 827 BalsaHeadersEnums::VALID_CONTENT_LENGTH) && | |
| 828 length != headers_->content_length_)) { | |
| 829 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; | |
| 830 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 831 visitor_->HandleHeaderError(this); | |
| 832 return; | |
| 833 } | |
| 834 continue; | |
| 835 } else { | |
| 836 content_length_idx = i + 1; | |
| 837 headers_->content_length_status_ = content_length_status; | |
| 838 headers_->content_length_ = length; | |
| 839 content_length_remaining_ = length; | |
| 840 } | |
| 841 | |
| 842 } | |
| 843 } else if (c == 't' || c == 'T') { | |
| 844 if ((key_len == kTransferEncodingSize) && | |
| 845 0 == strncasecmp(key_begin, kTransferEncoding, | |
| 846 kTransferEncodingSize)) { | |
| 847 if (transfer_encoding_idx != 0) { | |
| 848 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; | |
| 849 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 850 visitor_->HandleHeaderError(this); | |
| 851 return; | |
| 852 } | |
| 853 transfer_encoding_idx = i + 1; | |
| 854 } | |
| 855 } else if (i == 0 && (key_len == 0 || c == ' ')) { | |
| 856 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; | |
| 857 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 858 visitor_->HandleHeaderError(this); | |
| 859 return; | |
| 860 } | |
| 861 } | |
| 862 if (headers_->transfer_encoding_is_chunked_) { | |
| 863 headers_->content_length_ = 0; | |
| 864 headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; | |
| 865 content_length_remaining_ = 0; | |
| 866 } | |
| 867 if (transfer_encoding_idx != 0) { | |
| 868 ProcessTransferEncodingLine(transfer_encoding_idx - 1); | |
| 869 } | |
| 870 } | |
| 871 } | |
| 872 | |
| 873 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { | |
| 874 // For responses, can't have a body if the request was a HEAD, or if it is | |
| 875 // one of these response-codes. rfc2616 section 4.3 | |
| 876 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 877 if (is_request_ || | |
| 878 !(request_was_head_ || | |
| 879 (headers_->parsed_response_code_ >= 100 && | |
| 880 headers_->parsed_response_code_ < 200) || | |
| 881 (headers_->parsed_response_code_ == 204) || | |
| 882 (headers_->parsed_response_code_ == 304))) { | |
| 883 // Then we can have a body. | |
| 884 if (headers_->transfer_encoding_is_chunked_) { | |
| 885 // Note that | |
| 886 // if ( Transfer-Encoding: chunked && Content-length: ) | |
| 887 // then Transfer-Encoding: chunked trumps. | |
| 888 // This is as specified in the spec. | |
| 889 // rfc2616 section 4.4.3 | |
| 890 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; | |
| 891 } else { | |
| 892 // Errors parsing content-length definitely can cause | |
| 893 // protocol errors/warnings | |
| 894 switch (headers_->content_length_status_) { | |
| 895 // If we have a content-length, and it is parsed | |
| 896 // properly, there are two options. | |
| 897 // 1) zero content, in which case the message is done, and | |
| 898 // 2) nonzero content, in which case we have to | |
| 899 // consume the body. | |
| 900 case BalsaHeadersEnums::VALID_CONTENT_LENGTH: | |
| 901 if (headers_->content_length_ == 0) { | |
| 902 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 903 } else { | |
| 904 parse_state_ = BalsaFrameEnums::READING_CONTENT; | |
| 905 } | |
| 906 break; | |
| 907 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: | |
| 908 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: | |
| 909 // If there were characters left-over after parsing the | |
| 910 // content length, we should flag an error and stop. | |
| 911 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 912 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; | |
| 913 visitor_->HandleHeaderError(this); | |
| 914 break; | |
| 915 // We can have: no transfer-encoding, no content length, and no | |
| 916 // connection: close... | |
| 917 // Unfortunately, this case doesn't seem to be covered in the spec. | |
| 918 // We'll assume that the safest thing to do here is what the google | |
| 919 // binaries before 2008 already do, which is to assume that | |
| 920 // everything until the connection is closed is body. | |
| 921 case BalsaHeadersEnums::NO_CONTENT_LENGTH: | |
| 922 if (is_request_) { | |
| 923 base::StringPiece method = headers_->request_method(); | |
| 924 // POSTs and PUTs should have a detectable body length. If they | |
| 925 // do not we consider it an error. | |
| 926 if ((method.size() == 4 && | |
| 927 strncmp(method.data(), "POST", 4) == 0) || | |
| 928 (method.size() == 3 && | |
| 929 strncmp(method.data(), "PUT", 3) == 0)) { | |
| 930 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 931 last_error_ = | |
| 932 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; | |
| 933 visitor_->HandleHeaderError(this); | |
| 934 break; | |
| 935 } | |
| 936 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 937 } else { | |
| 938 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; | |
| 939 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; | |
| 940 visitor_->HandleHeaderWarning(this); | |
| 941 } | |
| 942 break; | |
| 943 // The COV_NF_... statements here provide hints to the apparatus | |
| 944 // which computes coverage reports/ratios that this code is never | |
| 945 // intended to be executed, and should technically be impossible. | |
| 946 // COV_NF_START | |
| 947 default: | |
| 948 LOG(FATAL) << "Saw a content_length_status: " | |
| 949 << headers_->content_length_status_ << " which is unknown."; | |
| 950 // COV_NF_END | |
| 951 } | |
| 952 } | |
| 953 } | |
| 954 } | |
| 955 | |
| 956 size_t BalsaFrame::ProcessHeaders(const char* message_start, | |
| 957 size_t message_length) { | |
| 958 const char* const original_message_start = message_start; | |
| 959 const char* const message_end = message_start + message_length; | |
| 960 const char* message_current = message_start; | |
| 961 const char* checkpoint = message_start; | |
| 962 | |
| 963 if (message_length == 0) { | |
| 964 goto bottom; | |
| 965 } | |
| 966 | |
| 967 while (message_current < message_end) { | |
| 968 size_t base_idx = headers_->GetReadableBytesFromHeaderStream(); | |
| 969 | |
| 970 // Yes, we could use strchr (assuming null termination), or | |
| 971 // memchr, but as it turns out that is slower than this tight loop | |
| 972 // for the input that we see. | |
| 973 if (!saw_non_newline_char_) { | |
| 974 do { | |
| 975 const char c = *message_current; | |
| 976 if (c != '\r' && c != '\n') { | |
| 977 if (c <= ' ') { | |
| 978 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 979 last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST; | |
| 980 visitor_->HandleHeaderError(this); | |
| 981 goto bottom; | |
| 982 } else { | |
| 983 saw_non_newline_char_ = true; | |
| 984 checkpoint = message_start = message_current; | |
| 985 goto read_real_message; | |
| 986 } | |
| 987 } | |
| 988 ++message_current; | |
| 989 } while (message_current < message_end); | |
| 990 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks | |
| 991 } else { | |
| 992 read_real_message: | |
| 993 // Note that SSE2 can be enabled on certain piii platforms. | |
| 994 #if __SSE2__ | |
| 995 { | |
| 996 const char* const message_end_m16 = message_end - 16; | |
| 997 __m128i newlines = _mm_set1_epi8('\n'); | |
| 998 while (message_current < message_end_m16) { | |
| 999 // What this does (using compiler intrinsics): | |
| 1000 // | |
| 1001 // Load 16 '\n's into an xmm register | |
| 1002 // Load 16 bytes of currennt message into an xmm register | |
| 1003 // Do byte-wise equals on those two xmm registers | |
| 1004 // Take the first bit of each byte, and put that into the first | |
| 1005 // 16 bits of a mask | |
| 1006 // If the mask is zero, no '\n' found. increment by 16 and try again | |
| 1007 // Else scan forward to find the first set bit. | |
| 1008 // Increment current by the index of the first set bit | |
| 1009 // (ffs returns index of first set bit + 1) | |
| 1010 __m128i msg_bytes = | |
| 1011 _mm_loadu_si128(const_cast<__m128i *>( | |
| 1012 reinterpret_cast<const __m128i *>(message_current))); | |
| 1013 __m128i newline_cmp = _mm_cmpeq_epi8(msg_bytes, newlines); | |
| 1014 int newline_msk = _mm_movemask_epi8(newline_cmp); | |
| 1015 if (newline_msk == 0) { | |
| 1016 message_current += 16; | |
| 1017 continue; | |
| 1018 } | |
| 1019 message_current += (ffs(newline_msk) - 1); | |
| 1020 const size_t relative_idx = message_current - message_start; | |
| 1021 const size_t message_current_idx = 1 + base_idx + relative_idx; | |
| 1022 lines_.push_back(std::make_pair(last_slash_n_idx_, | |
| 1023 message_current_idx)); | |
| 1024 if (lines_.size() == 1) { | |
| 1025 headers_->WriteFromFramer(checkpoint, | |
| 1026 1 + message_current - checkpoint); | |
| 1027 checkpoint = message_current + 1; | |
| 1028 const char* begin = headers_->OriginalHeaderStreamBegin(); | |
| 1029 #if DEBUGFRAMER | |
| 1030 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); | |
| 1031 LOG(INFO) << "is_request_: " << is_request_; | |
| 1032 #endif | |
| 1033 ProcessFirstLine(begin, begin + lines_[0].second); | |
| 1034 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) | |
| 1035 goto process_lines; | |
| 1036 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) | |
| 1037 goto bottom; | |
| 1038 } | |
| 1039 const size_t chars_since_last_slash_n = (message_current_idx - | |
| 1040 last_slash_n_idx_); | |
| 1041 last_slash_n_idx_ = message_current_idx; | |
| 1042 if (chars_since_last_slash_n > 2) { | |
| 1043 // We have a slash-n, but the last slash n was | |
| 1044 // more than 2 characters away from this. Thus, we know | |
| 1045 // that this cannot be an end-of-header. | |
| 1046 ++message_current; | |
| 1047 continue; | |
| 1048 } | |
| 1049 if ((chars_since_last_slash_n == 1) || | |
| 1050 (((message_current > message_start) && | |
| 1051 (*(message_current - 1) == '\r')) || | |
| 1052 (last_char_was_slash_r_))) { | |
| 1053 goto process_lines; | |
| 1054 } | |
| 1055 ++message_current; | |
| 1056 } | |
| 1057 } | |
| 1058 #endif // __SSE2__ | |
| 1059 while (message_current < message_end) { | |
| 1060 if (*message_current != '\n') { | |
| 1061 ++message_current; | |
| 1062 continue; | |
| 1063 } | |
| 1064 const size_t relative_idx = message_current - message_start; | |
| 1065 const size_t message_current_idx = 1 + base_idx + relative_idx; | |
| 1066 lines_.push_back(std::make_pair(last_slash_n_idx_, | |
| 1067 message_current_idx)); | |
| 1068 if (lines_.size() == 1) { | |
| 1069 headers_->WriteFromFramer(checkpoint, | |
| 1070 1 + message_current - checkpoint); | |
| 1071 checkpoint = message_current + 1; | |
| 1072 const char* begin = headers_->OriginalHeaderStreamBegin(); | |
| 1073 #if DEBUGFRAMER | |
| 1074 LOG(INFO) << "First line " << std::string(begin, lines_[0].second); | |
| 1075 LOG(INFO) << "is_request_: " << is_request_; | |
| 1076 #endif | |
| 1077 ProcessFirstLine(begin, begin + lines_[0].second); | |
| 1078 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) | |
| 1079 goto process_lines; | |
| 1080 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) | |
| 1081 goto bottom; | |
| 1082 } | |
| 1083 const size_t chars_since_last_slash_n = (message_current_idx - | |
| 1084 last_slash_n_idx_); | |
| 1085 last_slash_n_idx_ = message_current_idx; | |
| 1086 if (chars_since_last_slash_n > 2) { | |
| 1087 // false positive. | |
| 1088 ++message_current; | |
| 1089 continue; | |
| 1090 } | |
| 1091 if ((chars_since_last_slash_n == 1) || | |
| 1092 (((message_current > message_start) && | |
| 1093 (*(message_current - 1) == '\r')) || | |
| 1094 (last_char_was_slash_r_))) { | |
| 1095 goto process_lines; | |
| 1096 } | |
| 1097 ++message_current; | |
| 1098 } | |
| 1099 } | |
| 1100 continue; | |
| 1101 process_lines: | |
| 1102 ++message_current; | |
| 1103 DCHECK(message_current >= message_start); | |
| 1104 if (message_current > message_start) { | |
| 1105 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); | |
| 1106 } | |
| 1107 | |
| 1108 // Check if we have exceeded maximum headers length | |
| 1109 // Although we check for this limit before and after we call this function | |
| 1110 // we check it here as well to make sure that in case the visitor changed | |
| 1111 // the max_header_length_ (for example after processing the first line) | |
| 1112 // we handle it gracefully. | |
| 1113 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) { | |
| 1114 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1115 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; | |
| 1116 visitor_->HandleHeaderError(this); | |
| 1117 goto bottom; | |
| 1118 } | |
| 1119 | |
| 1120 // Since we know that we won't be writing any more bytes of the header, | |
| 1121 // we tell that to the headers object. The headers object may make | |
| 1122 // more efficient allocation decisions when this is signaled. | |
| 1123 headers_->DoneWritingFromFramer(); | |
| 1124 { | |
| 1125 const char* readable_ptr = NULL; | |
| 1126 size_t readable_size = 0; | |
| 1127 headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size); | |
| 1128 visitor_->ProcessHeaderInput(readable_ptr, readable_size); | |
| 1129 } | |
| 1130 | |
| 1131 // Ok, now that we've written everything into our header buffer, it is | |
| 1132 // time to process the header lines (extract proper values for headers | |
| 1133 // which are important for framing). | |
| 1134 ProcessHeaderLines(); | |
| 1135 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) { | |
| 1136 goto bottom; | |
| 1137 } | |
| 1138 AssignParseStateAfterHeadersHaveBeenParsed(); | |
| 1139 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) { | |
| 1140 goto bottom; | |
| 1141 } | |
| 1142 visitor_->ProcessHeaders(*headers_); | |
| 1143 visitor_->HeaderDone(); | |
| 1144 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) { | |
| 1145 visitor_->MessageDone(); | |
| 1146 } | |
| 1147 goto bottom; | |
| 1148 } | |
| 1149 // If we've gotten to here, it means that we've consumed all of the | |
| 1150 // available input. We need to record whether or not the last character we | |
| 1151 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds | |
| 1152 // a header framing that is split across the two calls. | |
| 1153 last_char_was_slash_r_ = (*(message_end - 1) == '\r'); | |
| 1154 DCHECK(message_current >= message_start); | |
| 1155 if (message_current > message_start) { | |
| 1156 headers_->WriteFromFramer(checkpoint, message_current - checkpoint); | |
| 1157 } | |
| 1158 bottom: | |
| 1159 return message_current - original_message_start; | |
| 1160 } | |
| 1161 | |
| 1162 | |
| 1163 size_t BalsaFrame::BytesSafeToSplice() const { | |
| 1164 switch (parse_state_) { | |
| 1165 case BalsaFrameEnums::READING_CHUNK_DATA: | |
| 1166 return chunk_length_remaining_; | |
| 1167 case BalsaFrameEnums::READING_UNTIL_CLOSE: | |
| 1168 return std::numeric_limits<size_t>::max(); | |
| 1169 case BalsaFrameEnums::READING_CONTENT: | |
| 1170 return content_length_remaining_; | |
| 1171 default: | |
| 1172 return 0; | |
| 1173 } | |
| 1174 } | |
| 1175 | |
| 1176 void BalsaFrame::BytesSpliced(size_t bytes_spliced) { | |
| 1177 switch (parse_state_) { | |
| 1178 case BalsaFrameEnums::READING_CHUNK_DATA: | |
| 1179 if (chunk_length_remaining_ >= bytes_spliced) { | |
| 1180 chunk_length_remaining_ -= bytes_spliced; | |
| 1181 if (chunk_length_remaining_ == 0) { | |
| 1182 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; | |
| 1183 } | |
| 1184 return; | |
| 1185 } else { | |
| 1186 last_error_ = | |
| 1187 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; | |
| 1188 goto error_exit; | |
| 1189 } | |
| 1190 | |
| 1191 case BalsaFrameEnums::READING_UNTIL_CLOSE: | |
| 1192 return; | |
| 1193 | |
| 1194 case BalsaFrameEnums::READING_CONTENT: | |
| 1195 if (content_length_remaining_ >= bytes_spliced) { | |
| 1196 content_length_remaining_ -= bytes_spliced; | |
| 1197 if (content_length_remaining_ == 0) { | |
| 1198 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 1199 visitor_->MessageDone(); | |
| 1200 } | |
| 1201 return; | |
| 1202 } else { | |
| 1203 last_error_ = | |
| 1204 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; | |
| 1205 goto error_exit; | |
| 1206 } | |
| 1207 | |
| 1208 default: | |
| 1209 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; | |
| 1210 goto error_exit; | |
| 1211 } | |
| 1212 | |
| 1213 error_exit: | |
| 1214 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1215 visitor_->HandleBodyError(this); | |
| 1216 }; | |
| 1217 | |
| 1218 // You may note that the state-machine contained within this function has both | |
| 1219 // switch and goto labels for nearly the same thing. For instance, the | |
| 1220 // following two labels refer to the same code block: | |
| 1221 // label_reading_chunk_data: | |
| 1222 // case BalsaFrameEnums::READING_CHUNK_DATA: | |
| 1223 // The 'case' statement is required for the switch statement which occurs when | |
| 1224 // ProcessInput is invoked. The goto label is required as the state-machine | |
| 1225 // does not use a computed goto in any subsequent operations. | |
| 1226 // | |
| 1227 // Since several states exit the state machine for various reasons, there is | |
| 1228 // also one label at the bottom of the function. When it is appropriate to | |
| 1229 // return from the function, that part of the state machine instead issues a | |
| 1230 // goto bottom; This results in less code duplication, and makes debugging | |
| 1231 // easier (as you can add a statement to a section of code which is guaranteed | |
| 1232 // to be invoked when the function is exiting. | |
| 1233 size_t BalsaFrame::ProcessInput(const char* input, size_t size) { | |
| 1234 const char* current = input; | |
| 1235 const char* on_entry = current; | |
| 1236 const char* end = current + size; | |
| 1237 #if DEBUGFRAMER | |
| 1238 LOG(INFO) << "\n==============" | |
| 1239 << BalsaFrameEnums::ParseStateToString(parse_state_) | |
| 1240 << "===============\n"; | |
| 1241 #endif // DEBUGFRAMER | |
| 1242 | |
| 1243 DCHECK(headers_ != NULL); | |
| 1244 if (headers_ == NULL) return 0; | |
| 1245 | |
| 1246 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { | |
| 1247 const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); | |
| 1248 // Yes, we still have to check this here as the user can change the | |
| 1249 // max_header_length amount! | |
| 1250 // Also it is possible that we have reached the maximum allowed header size, | |
| 1251 // and we have more to consume (remember we are still inside | |
| 1252 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. | |
| 1253 if (header_length > max_header_length_ || | |
| 1254 (header_length == max_header_length_ && size > 0)) { | |
| 1255 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1256 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; | |
| 1257 visitor_->HandleHeaderError(this); | |
| 1258 goto bottom; | |
| 1259 } | |
| 1260 size_t bytes_to_process = max_header_length_ - header_length; | |
| 1261 if (bytes_to_process > size) { | |
| 1262 bytes_to_process = size; | |
| 1263 } | |
| 1264 current += ProcessHeaders(input, bytes_to_process); | |
| 1265 // If we are still reading headers check if we have crossed the headers | |
| 1266 // limit. Note that we check for >= as opposed to >. This is because if | |
| 1267 // header_length_after equals max_header_length_ and we are still in the | |
| 1268 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for | |
| 1269 // sure that the headers limit will be crossed later on | |
| 1270 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { | |
| 1271 // Note that headers_ is valid only if we are still reading headers. | |
| 1272 const size_t header_length_after = | |
| 1273 headers_->GetReadableBytesFromHeaderStream(); | |
| 1274 if (header_length_after >= max_header_length_) { | |
| 1275 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1276 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; | |
| 1277 visitor_->HandleHeaderError(this); | |
| 1278 } | |
| 1279 } | |
| 1280 goto bottom; | |
| 1281 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || | |
| 1282 parse_state_ == BalsaFrameEnums::PARSE_ERROR) { | |
| 1283 // Can do nothing more 'till we're reset. | |
| 1284 goto bottom; | |
| 1285 } | |
| 1286 | |
| 1287 while (current < end) { | |
| 1288 switch (parse_state_) { | |
| 1289 label_reading_chunk_length: | |
| 1290 case BalsaFrameEnums::READING_CHUNK_LENGTH: | |
| 1291 // In this state we read the chunk length. | |
| 1292 // Note that once we hit a character which is not in: | |
| 1293 // [0-9;A-Fa-f\n], we transition to a different state. | |
| 1294 // | |
| 1295 { | |
| 1296 // If we used strtol, etc, we'd have to buffer this line. | |
| 1297 // This is more annoying than simply doing the conversion | |
| 1298 // here. This code accounts for overflow. | |
| 1299 static const signed char buf[] = { | |
| 1300 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f | |
| 1301 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, | |
| 1302 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f | |
| 1303 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1304 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f | |
| 1305 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1306 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f | |
| 1307 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, | |
| 1308 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f | |
| 1309 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1310 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f | |
| 1311 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1312 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f | |
| 1313 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1314 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f | |
| 1315 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| 1316 }; | |
| 1317 // valid cases: | |
| 1318 // "09123\n" // -> 09123 | |
| 1319 // "09123\r\n" // -> 09123 | |
| 1320 // "09123 \n" // -> 09123 | |
| 1321 // "09123 \r\n" // -> 09123 | |
| 1322 // "09123 12312\n" // -> 09123 | |
| 1323 // "09123 12312\r\n" // -> 09123 | |
| 1324 // "09123; foo=bar\n" // -> 09123 | |
| 1325 // "09123; foo=bar\r\n" // -> 09123 | |
| 1326 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF | |
| 1327 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF | |
| 1328 // invalid cases: | |
| 1329 // "[ \t]+[^\n]*\n" | |
| 1330 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow) | |
| 1331 // "\r\n" | |
| 1332 // "\n" | |
| 1333 while (current < end) { | |
| 1334 const char c = *current; | |
| 1335 ++current; | |
| 1336 const signed char addition = buf[static_cast<int>(c)]; | |
| 1337 if (addition >= 0) { | |
| 1338 chunk_length_character_extracted_ = true; | |
| 1339 size_t length_x_16 = chunk_length_remaining_ * 16; | |
| 1340 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16; | |
| 1341 if ((chunk_length_remaining_ > kMaxDiv16) || | |
| 1342 ((std::numeric_limits<size_t>::max() - length_x_16) < | |
| 1343 static_cast<size_t>(addition))) { | |
| 1344 // overflow -- asked for a chunk-length greater than 2^64 - 1!! | |
| 1345 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1346 last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW; | |
| 1347 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1348 visitor_->HandleChunkingError(this); | |
| 1349 goto bottom; | |
| 1350 } | |
| 1351 chunk_length_remaining_ = length_x_16 + addition; | |
| 1352 continue; | |
| 1353 } | |
| 1354 | |
| 1355 if (!chunk_length_character_extracted_ || addition == -1) { | |
| 1356 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no | |
| 1357 // characters were converted, or an unexpected character was | |
| 1358 // seen. | |
| 1359 parse_state_ = BalsaFrameEnums::PARSE_ERROR; | |
| 1360 last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH; | |
| 1361 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1362 visitor_->HandleChunkingError(this); | |
| 1363 goto bottom; | |
| 1364 } | |
| 1365 | |
| 1366 --current; | |
| 1367 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; | |
| 1368 visitor_->ProcessChunkLength(chunk_length_remaining_); | |
| 1369 goto label_reading_chunk_extension; | |
| 1370 } | |
| 1371 } | |
| 1372 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1373 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH | |
| 1374 | |
| 1375 label_reading_chunk_extension: | |
| 1376 case BalsaFrameEnums::READING_CHUNK_EXTENSION: | |
| 1377 { | |
| 1378 // TODO(phython): Convert this scanning to be 16 bytes at a time if | |
| 1379 // there is data to be read. | |
| 1380 const char* extensions_start = current; | |
| 1381 size_t extensions_length = 0; | |
| 1382 while (current < end) { | |
| 1383 const char c = *current; | |
| 1384 if (c == '\r' || c == '\n') { | |
| 1385 extensions_length = | |
| 1386 (extensions_start == current) ? | |
| 1387 0 : | |
| 1388 current - extensions_start - 1; | |
| 1389 } | |
| 1390 | |
| 1391 ++current; | |
| 1392 if (c == '\n') { | |
| 1393 chunk_length_character_extracted_ = false; | |
| 1394 visitor_->ProcessChunkExtensions( | |
| 1395 extensions_start, extensions_length); | |
| 1396 if (chunk_length_remaining_ != 0) { | |
| 1397 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; | |
| 1398 goto label_reading_chunk_data; | |
| 1399 } | |
| 1400 HeaderFramingFound('\n'); | |
| 1401 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; | |
| 1402 goto label_reading_last_chunk_term; | |
| 1403 } | |
| 1404 } | |
| 1405 visitor_->ProcessChunkExtensions( | |
| 1406 extensions_start, extensions_length); | |
| 1407 } | |
| 1408 | |
| 1409 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1410 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION | |
| 1411 | |
| 1412 label_reading_chunk_data: | |
| 1413 case BalsaFrameEnums::READING_CHUNK_DATA: | |
| 1414 while (current < end) { | |
| 1415 if (chunk_length_remaining_ == 0) { | |
| 1416 break; | |
| 1417 } | |
| 1418 // read in the chunk | |
| 1419 size_t bytes_remaining = end - current; | |
| 1420 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ? | |
| 1421 chunk_length_remaining_ : bytes_remaining; | |
| 1422 const char* tmp_current = current + consumed_bytes; | |
| 1423 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); | |
| 1424 visitor_->ProcessBodyData(current, consumed_bytes); | |
| 1425 on_entry = current = tmp_current; | |
| 1426 chunk_length_remaining_ -= consumed_bytes; | |
| 1427 } | |
| 1428 if (chunk_length_remaining_ == 0) { | |
| 1429 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; | |
| 1430 goto label_reading_chunk_term; | |
| 1431 } | |
| 1432 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1433 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA | |
| 1434 | |
| 1435 label_reading_chunk_term: | |
| 1436 case BalsaFrameEnums::READING_CHUNK_TERM: | |
| 1437 while (current < end) { | |
| 1438 const char c = *current; | |
| 1439 ++current; | |
| 1440 | |
| 1441 if (c == '\n') { | |
| 1442 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; | |
| 1443 goto label_reading_chunk_length; | |
| 1444 } | |
| 1445 } | |
| 1446 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1447 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM | |
| 1448 | |
| 1449 label_reading_last_chunk_term: | |
| 1450 case BalsaFrameEnums::READING_LAST_CHUNK_TERM: | |
| 1451 while (current < end) { | |
| 1452 const char c = *current; | |
| 1453 | |
| 1454 if (!HeaderFramingFound(c)) { | |
| 1455 // If not, however, since the spec only suggests that the | |
| 1456 // client SHOULD indicate the presence of trailers, we get to | |
| 1457 // *test* that they did or didn't. | |
| 1458 // If all of the bytes we've seen since: | |
| 1459 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF | |
| 1460 // are either '\r', or '\n', then we can assume that we don't yet | |
| 1461 // know if we need to parse headers, or if the next byte will make | |
| 1462 // the HeaderFramingFound condition (above) true. | |
| 1463 if (HeaderFramingMayBeFound()) { | |
| 1464 // If true, then we have seen only characters '\r' or '\n'. | |
| 1465 ++current; | |
| 1466 | |
| 1467 // Lets try again! There is no state change here. | |
| 1468 continue; | |
| 1469 } else { | |
| 1470 // If (!HeaderFramingMayBeFound()), then we know that we must be | |
| 1471 // reading the first non CRLF character of a trailer. | |
| 1472 parse_state_ = BalsaFrameEnums::READING_TRAILER; | |
| 1473 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1474 on_entry = current; | |
| 1475 goto label_reading_trailer; | |
| 1476 } | |
| 1477 } else { | |
| 1478 // If we've found a "\r\n\r\n", then the message | |
| 1479 // is done. | |
| 1480 ++current; | |
| 1481 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 1482 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1483 visitor_->MessageDone(); | |
| 1484 goto bottom; | |
| 1485 } | |
| 1486 break; // from while loop | |
| 1487 } | |
| 1488 visitor_->ProcessBodyInput(on_entry, current - on_entry); | |
| 1489 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM | |
| 1490 | |
| 1491 label_reading_trailer: | |
| 1492 case BalsaFrameEnums::READING_TRAILER: | |
| 1493 while (current < end) { | |
| 1494 const char c = *current; | |
| 1495 ++current; | |
| 1496 // TODO(fenix): If we ever care about trailers as part of framing, | |
| 1497 // deal with them here (see below for part of the 'solution') | |
| 1498 // if (LineFramingFound(c)) { | |
| 1499 // trailer_lines_.push_back(make_pair(start_of_line_, | |
| 1500 // trailer_length_ - 1)); | |
| 1501 // start_of_line_ = trailer_length_; | |
| 1502 // } | |
| 1503 if (HeaderFramingFound(c)) { | |
| 1504 // ProcessTrailers(visitor_, &trailers_); | |
| 1505 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 1506 visitor_->ProcessTrailerInput(on_entry, current - on_entry); | |
| 1507 visitor_->MessageDone(); | |
| 1508 goto bottom; | |
| 1509 } | |
| 1510 } | |
| 1511 visitor_->ProcessTrailerInput(on_entry, current - on_entry); | |
| 1512 break; // case BalsaFrameEnums::READING_TRAILER | |
| 1513 | |
| 1514 // Note that there is no label: | |
| 1515 // 'label_reading_until_close' | |
| 1516 // here. This is because the state-machine exists immediately after | |
| 1517 // reading the headers instead of transitioning here (as it would | |
| 1518 // do if it was consuming all the data it could, all the time). | |
| 1519 case BalsaFrameEnums::READING_UNTIL_CLOSE: | |
| 1520 { | |
| 1521 const size_t bytes_remaining = end - current; | |
| 1522 if (bytes_remaining > 0) { | |
| 1523 visitor_->ProcessBodyInput(current, bytes_remaining); | |
| 1524 visitor_->ProcessBodyData(current, bytes_remaining); | |
| 1525 current += bytes_remaining; | |
| 1526 } | |
| 1527 } | |
| 1528 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE | |
| 1529 | |
| 1530 // label_reading_content: | |
| 1531 case BalsaFrameEnums::READING_CONTENT: | |
| 1532 #if DEBUGFRAMER | |
| 1533 LOG(INFO) << "ReadingContent: " << content_length_remaining_; | |
| 1534 #endif // DEBUGFRAMER | |
| 1535 while (content_length_remaining_ && current < end) { | |
| 1536 // read in the content | |
| 1537 const size_t bytes_remaining = end - current; | |
| 1538 const size_t consumed_bytes = | |
| 1539 (content_length_remaining_ < bytes_remaining) ? | |
| 1540 content_length_remaining_ : bytes_remaining; | |
| 1541 visitor_->ProcessBodyInput(current, consumed_bytes); | |
| 1542 visitor_->ProcessBodyData(current, consumed_bytes); | |
| 1543 current += consumed_bytes; | |
| 1544 content_length_remaining_ -= consumed_bytes; | |
| 1545 } | |
| 1546 if (content_length_remaining_ == 0) { | |
| 1547 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; | |
| 1548 visitor_->MessageDone(); | |
| 1549 } | |
| 1550 goto bottom; // case BalsaFrameEnums::READING_CONTENT | |
| 1551 | |
| 1552 default: | |
| 1553 // The state-machine should never be in a state that isn't handled | |
| 1554 // above. This is a glaring logic error, and we should do something | |
| 1555 // drastic to ensure that this gets looked-at and fixed. | |
| 1556 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE | |
| 1557 << " memory corruption?!"; // COV_NF_LINE | |
| 1558 } | |
| 1559 } | |
| 1560 bottom: | |
| 1561 #if DEBUGFRAMER | |
| 1562 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" | |
| 1563 << std::string(input, current) | |
| 1564 << "\n$$$$$$$$$$$$$$" | |
| 1565 << BalsaFrameEnums::ParseStateToString(parse_state_) | |
| 1566 << "$$$$$$$$$$$$$$$" | |
| 1567 << " consumed: " << (current - input); | |
| 1568 if (Error()) { | |
| 1569 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); | |
| 1570 } | |
| 1571 #endif // DEBUGFRAMER | |
| 1572 return current - input; | |
| 1573 } | |
| 1574 | |
| 1575 } // namespace net | |
| OLD | NEW |