net/tools/balsa/balsa_frame.cc - Issue 2477703002: Remove now unused Balsa code.

Side by Side Diff: net/tools/balsa/balsa_frame.cc

Issue 2477703002: Remove now unused Balsa code. (Closed)

Patch Set: Rebase Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "net/tools/balsa/balsa_frame.h"

6

7 // Visual C++ defines _M_IX86_FP as 2 if the /arch:SSE2 compiler option is

8 // specified.

9 #if !defined(__SSE2__) && _M_IX86_FP == 2

10 #define __SSE2__ 1

11 #endif

12

13 #include <assert.h>

14 #if __SSE2__

15 #include <emmintrin.h>

16 #endif // __SSE2__

17

18 #include <limits>

19 #include <string>

20 #include <utility>

21 #include <vector>

22

23 #include "base/logging.h"

24 #include "base/strings/string_piece.h"

25 #include "base/strings/string_util.h"

26 #include "net/tools/balsa/balsa_enums.h"

27 #include "net/tools/balsa/balsa_headers.h"

28 #include "net/tools/balsa/balsa_visitor_interface.h"

29 #include "net/tools/balsa/buffer_interface.h"

30 #include "net/tools/balsa/simple_buffer.h"

31 #include "net/tools/balsa/string_piece_utils.h"

32

33 #if defined(COMPILER_MSVC)

34 #include <intrin.h>

35 #include <string.h>

36

37 #pragma intrinsic(_BitScanForward)

38

39 static int ffs(int i) {

40 unsigned long index;

41 return _BitScanForward(&index, i) ? index + 1 : 0;

42 }

43

44 #define strncasecmp _strnicmp

45 #else

46 #include <strings.h>

47 #endif

48

49 namespace net {

50

51 // Constants holding some header names for headers which can affect the way the

52 // HTTP message is framed, and so must be processed specially:

53 static const char kContentLength[] = "content-length";

54 static const size_t kContentLengthSize = sizeof(kContentLength) - 1;

55 static const char kTransferEncoding[] = "transfer-encoding";

56 static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;

57

58 BalsaFrame::BalsaFrame()

59 : last_char_was_slash_r_(false),

60 saw_non_newline_char_(false),

61 start_was_space_(true),

62 chunk_length_character_extracted_(false),

63 is_request_(true),

64 request_was_head_(false),

65 max_header_length_(16 * 1024),

66 max_request_uri_length_(2048),

67 visitor_(&do_nothing_visitor_),

68 chunk_length_remaining_(0),

69 content_length_remaining_(0),

70 last_slash_n_loc_(NULL),

71 last_recorded_slash_n_loc_(NULL),

72 last_slash_n_idx_(0),

73 term_chars_(0),

74 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),

75 last_error_(BalsaFrameEnums::NO_ERROR),

76 headers_(NULL) {

77 }

78

79 BalsaFrame::~BalsaFrame() {}

80

81 void BalsaFrame::Reset() {

82 last_char_was_slash_r_ = false;

83 saw_non_newline_char_ = false;

84 start_was_space_ = true;

85 chunk_length_character_extracted_ = false;

86 // is_request_ = true; // not reset between messages.

87 // request_was_head_ = false; // not reset between messages.

88 // max_header_length_ = 4096; // not reset between messages.

89 // max_request_uri_length_ = 2048; // not reset between messages.

90 // visitor_ = &do_nothing_visitor_; // not reset between messages.

91 chunk_length_remaining_ = 0;

92 content_length_remaining_ = 0;

93 last_slash_n_loc_ = NULL;

94 last_recorded_slash_n_loc_ = NULL;

95 last_slash_n_idx_ = 0;

96 term_chars_ = 0;

97 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;

98 last_error_ = BalsaFrameEnums::NO_ERROR;

99 lines_.clear();

100 if (headers_ != NULL) {

101 headers_->Clear();

102 }

103 }

104

105 const char* BalsaFrameEnums::ParseStateToString(

106 BalsaFrameEnums::ParseState error_code) {

107 switch (error_code) {

108 case PARSE_ERROR:

109 return "PARSE_ERROR";

110 case READING_HEADER_AND_FIRSTLINE:

111 return "READING_HEADER_AND_FIRSTLINE";

112 case READING_CHUNK_LENGTH:

113 return "READING_CHUNK_LENGTH";

114 case READING_CHUNK_EXTENSION:

115 return "READING_CHUNK_EXTENSION";

116 case READING_CHUNK_DATA:

117 return "READING_CHUNK_DATA";

118 case READING_CHUNK_TERM:

119 return "READING_CHUNK_TERM";

120 case READING_LAST_CHUNK_TERM:

121 return "READING_LAST_CHUNK_TERM";

122 case READING_TRAILER:

123 return "READING_TRAILER";

124 case READING_UNTIL_CLOSE:

125 return "READING_UNTIL_CLOSE";

126 case READING_CONTENT:

127 return "READING_CONTENT";

128 case MESSAGE_FULLY_READ:

129 return "MESSAGE_FULLY_READ";

130 case NUM_STATES:

131 return "UNKNOWN_STATE";

132 }

133 return "UNKNOWN_STATE";

134 }

135

136 const char* BalsaFrameEnums::ErrorCodeToString(

137 BalsaFrameEnums::ErrorCode error_code) {

138 switch (error_code) {

139 case NO_ERROR:

140 return "NO_ERROR";

141 case NO_STATUS_LINE_IN_RESPONSE:

142 return "NO_STATUS_LINE_IN_RESPONSE";

143 case NO_REQUEST_LINE_IN_REQUEST:

144 return "NO_REQUEST_LINE_IN_REQUEST";

145 case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:

146 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";

147 case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:

148 return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";

149 case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:

150 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";

151 case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:

152 return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";

153 case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:

154 return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";

155 case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:

156 return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";

157 case FAILED_CONVERTING_STATUS_CODE_TO_INT:

158 return "FAILED_CONVERTING_STATUS_CODE_TO_INT";

159 case REQUEST_URI_TOO_LONG:

160 return "REQUEST_URI_TOO_LONG";

161 case HEADERS_TOO_LONG:

162 return "HEADERS_TOO_LONG";

163 case UNPARSABLE_CONTENT_LENGTH:

164 return "UNPARSABLE_CONTENT_LENGTH";

165 case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:

166 return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";

167 case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:

168 return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";

169 case HEADER_MISSING_COLON:

170 return "HEADER_MISSING_COLON";

171 case INVALID_CHUNK_LENGTH:

172 return "INVALID_CHUNK_LENGTH";

173 case CHUNK_LENGTH_OVERFLOW:

174 return "CHUNK_LENGTH_OVERFLOW";

175 case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:

176 return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";

177 case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:

178 return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";

179 case MULTIPLE_CONTENT_LENGTH_KEYS:

180 return "MULTIPLE_CONTENT_LENGTH_KEYS";

181 case MULTIPLE_TRANSFER_ENCODING_KEYS:

182 return "MULTIPLE_TRANSFER_ENCODING_KEYS";

183 case UNKNOWN_TRANSFER_ENCODING:

184 return "UNKNOWN_TRANSFER_ENCODING";

185 case INVALID_HEADER_FORMAT:

186 return "INVALID_HEADER_FORMAT";

187 case INTERNAL_LOGIC_ERROR:

188 return "INTERNAL_LOGIC_ERROR";

189 case NUM_ERROR_CODES:

190 return "UNKNOWN_ERROR";

191 }

192 return "UNKNOWN_ERROR";

193 }

194

195 // Summary:

196 // Parses the first line of either a request or response.

197 // Note that in the case of a detected warning, error_code will be set

198 // but the function will not return false.

199 // Exactly zero or one warning or error (but not both) may be detected

200 // by this function.

201 // Note that this function will not write the data of the first-line

202 // into the header's buffer (that should already have been done elsewhere).

203 //

204 // Pre-conditions:

205 // begin != end

206 // *begin should be a character which is > ' '. This implies that there

207 // is at least one non-whitespace characters between [begin, end).

208 // headers is a valid pointer to a BalsaHeaders class.

209 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.

210 // Entire first line must exist between [begin, end)

211 // Exactly zero or one newlines -may- exist between [begin, end)

212 // [begin, end) should exist in the header's buffer.

213 //

214 // Side-effects:

215 // headers will be modified

216 // error_code may be modified if either a warning or error is detected

217 //

218 // Returns:

219 // True if no error (as opposed to warning) is detected.

220 // False if an error (as opposed to warning) is detected.

221

222 //

223 // If there is indeed non-whitespace in the line, then the following

224 // will take care of this for you:

225 // while (*begin <= ' ') ++begin;

226 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);

227 //

228 bool ParseHTTPFirstLine(const char* begin,

229 const char* end,

230 bool is_request,

231 size_t max_request_uri_length,

232 BalsaHeaders* headers,

233 BalsaFrameEnums::ErrorCode* error_code) {

234 const char* current = begin;

235 // HTTP firstlines all have the following structure:

236 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF

237 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"

238 // ws1 nws1 ws2 nws2 ws3 nws3 ws4

239 // \| [-------) [-------) [----------------)

240 // REQ: method request_uri version

241 // RESP: version statuscode reason

242 //

243 // The first NONWS->LWS component we'll call firstline_a.

244 // The second firstline_b, and the third firstline_c.

245 //

246 // firstline_a goes from nws1 to (but not including) ws2

247 // firstline_b goes from nws2 to (but not including) ws3

248 // firstline_c goes from nws3 to (but not including) ws4

249 //

250 // In the code:

251 // ws1 == whitespace_1_idx_

252 // nws1 == non_whitespace_1_idx_

253 // ws2 == whitespace_2_idx_

254 // nws2 == non_whitespace_2_idx_

255 // ws3 == whitespace_3_idx_

256 // nws3 == non_whitespace_3_idx_

257 // ws4 == whitespace_4_idx_

258

259 // Kill all whitespace (including '\r\n') at the end of the line.

260 --end;

261 if (*end != '\n') {

262 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

263 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

264 << headers->OriginalHeadersForDebugging();

265 return false;

266 }

267 while (begin < end && *end <= ' ') {

268 --end;

269 }

270 DCHECK(*end != '\n');

271 if (*end == '\n') {

272 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

273 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

274 << headers->OriginalHeadersForDebugging();

275 return false;

276 }

277 ++end;

278

279 // The two following statements should not be possible.

280 if (end == begin) {

281 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

282 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

283 << headers->OriginalHeadersForDebugging();

284 return false;

285 }

286

287 // whitespace_1_idx_

288 headers->whitespace_1_idx_ = current - begin;

289 // This loop is commented out as it is never used in current code. This is

290 // true only because we don't begin parsing the headers at all until we've

291 // encountered a non whitespace character at the beginning of the stream, at

292 // which point we begin our demarcation of header-start. If we did -not- do

293 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop

294 // would be necessary for the proper functioning of this parsing.

295 // This is left here as this function may (in the future) be refactored out

296 // of the BalsaFrame class so that it may be shared between code in

297 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the

298 // set_first_line() function (at which point it would be necessary).

299 #if 0

300 while (*current <= ' ') {

301 ++current;

302 }

303 #endif

304 // non_whitespace_1_idx_

305 headers->non_whitespace_1_idx_ = current - begin;

306 do {

307 // The first time through, we're guaranteed that the current character

308 // won't be a whitespace (else the loop above wouldn't have terminated).

309 // That implies that we're guaranteed to get at least one non-whitespace

310 // character if we get into this loop at all.

311 ++current;

312 if (current == end) {

313 headers->whitespace_2_idx_ = current - begin;

314 headers->non_whitespace_2_idx_ = current - begin;

315 headers->whitespace_3_idx_ = current - begin;

316 headers->non_whitespace_3_idx_ = current - begin;

317 headers->whitespace_4_idx_ = current - begin;

318 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request

319 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response

320 *error_code =

321 static_cast<BalsaFrameEnums::ErrorCode>(

322 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +

323 is_request);

324 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION

325 return false;

326 }

327 goto output_exhausted;

328 }

329 } while (*current > ' ');

330 // whitespace_2_idx_

331 headers->whitespace_2_idx_ = current - begin;

332 do {

333 ++current;

334 // Note that due to the loop which consumes all of the whitespace

335 // at the end of the line, current can never == end while in this function.

336 } while (*current <= ' ');

337 // non_whitespace_2_idx_

338 headers->non_whitespace_2_idx_ = current - begin;

339 do {

340 ++current;

341 if (current == end) {

342 headers->whitespace_3_idx_ = current - begin;

343 headers->non_whitespace_3_idx_ = current - begin;

344 headers->whitespace_4_idx_ = current - begin;

345 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request

346 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response

347 *error_code =

348 static_cast<BalsaFrameEnums::ErrorCode>(

349 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE

350 + is_request);

351 goto output_exhausted;

352 }

353 } while (*current > ' ');

354 // whitespace_3_idx_

355 headers->whitespace_3_idx_ = current - begin;

356 do {

357 ++current;

358 // Note that due to the loop which consumes all of the whitespace

359 // at the end of the line, current can never == end while in this function.

360 } while (*current <= ' ');

361 // non_whitespace_3_idx_

362 headers->non_whitespace_3_idx_ = current - begin;

363 headers->whitespace_4_idx_ = end - begin;

364

365 output_exhausted:

366 // Note that we don't fail the parse immediately when parsing of the

367 // firstline fails. Depending on the protocol type, we may want to accept

368 // a firstline with only one or two elements, e.g., for HTTP/0.9:

369 // GET\r\n

370 // or

371 // GET /\r\n

372 // should be parsed without issue (though the visitor should know that

373 // parsing the entire line was not exactly as it should be).

374 //

375 // Eventually, these errors may be removed alltogether, as the visitor can

376 // detect them on its own by examining the size of the various fields.

377 // headers->set_first_line(non_whitespace_1_idx_, current);

378

379 if (is_request) {

380 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >

381 max_request_uri_length) {

382 // For requests, we need at least the method. We could assume that a

383 // blank URI means "/". If version isn't stated, it should be assumed

384 // to be HTTP/0.9 by the visitor.

385 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;

386 return false;

387 }

388 } else {

389 headers->parsed_response_code_ = 0;

390 {

391 const char* parsed_response_code_current =

392 begin + headers->non_whitespace_2_idx_;

393 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;

394 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;

395

396 // Convert a string of [0-9]* into an int.

397 // Note that this allows for the conversion of response codes which

398 // are outside the bounds of normal HTTP response codes (no checking

399 // is done to ensure that these are valid-- they're merely parsed)!

400 while (parsed_response_code_current < parsed_response_code_end) {

401 if (*parsed_response_code_current < '0' \|\|

402 *parsed_response_code_current > '9') {

403 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;

404 return false;

405 }

406 size_t status_code_x_10 = headers->parsed_response_code_ * 10;

407 uint8_t c = *parsed_response_code_current - '0';

408 if ((headers->parsed_response_code_ > kMaxDiv10) \|\|

409 (std::numeric_limits<size_t>::max() - status_code_x_10) < c) {

410 // overflow.

411 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;

412 return false;

413 }

414 headers->parsed_response_code_ = status_code_x_10 + c;

415 ++parsed_response_code_current;

416 }

417 }

418 }

419 return true;

420 }

421

422 // begin - beginning of the firstline

423 // end - end of the firstline

424 //

425 // A precondition for this function is that there is non-whitespace between

426 // [begin, end). If this precondition is not met, the function will not perform

427 // as expected (and bad things may happen, and it will eat your first, second,

428 // and third unborn children!).

429 //

430 // Another precondition for this function is that [begin, end) includes

431 // at most one newline, which must be at the end of the line.

432 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {

433 BalsaFrameEnums::ErrorCode previous_error = last_error_;

434 if (!ParseHTTPFirstLine(begin,

435 end,

436 is_request_,

437 max_request_uri_length_,

438 headers_,

439 &last_error_)) {

440 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

441 visitor_->HandleHeaderError(this);

442 return;

443 }

444 if (previous_error != last_error_) {

445 visitor_->HandleHeaderWarning(this);

446 }

447

448 if (is_request_) {

449 size_t version_length =

450 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;

451 visitor_->ProcessRequestFirstLine(

452 begin + headers_->non_whitespace_1_idx_,

453 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,

454 begin + headers_->non_whitespace_1_idx_,

455 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,

456 begin + headers_->non_whitespace_2_idx_,

457 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,

458 begin + headers_->non_whitespace_3_idx_,

459 version_length);

460 if (version_length == 0)

461 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

462 } else {

463 visitor_->ProcessResponseFirstLine(

464 begin + headers_->non_whitespace_1_idx_,

465 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,

466 begin + headers_->non_whitespace_1_idx_,

467 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,

468 begin + headers_->non_whitespace_2_idx_,

469 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,

470 begin + headers_->non_whitespace_3_idx_,

471 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);

472 }

473 }

474

475 // 'stream_begin' points to the first character of the headers buffer.

476 // 'line_begin' points to the first character of the line.

477 // 'current' points to a char which is ':'.

478 // 'line_end' points to the position of '\n' + 1.

479 // 'line_begin' points to the position of first character of line.

480 void BalsaFrame::CleanUpKeyValueWhitespace(

481 const char* stream_begin,

482 const char* line_begin,

483 const char* current,

484 const char* line_end,

485 HeaderLineDescription* current_header_line) {

486 const char* colon_loc = current;

487 DCHECK_LT(colon_loc, line_end);

488 DCHECK_EQ(':', *colon_loc);

489 DCHECK_EQ(':', *current);

490 DCHECK_GE(' ', *line_end)

491 << "\"" << std::string(line_begin, line_end) << "\"";

492

493 // TODO(fenix): Investigate whether or not the bounds tests in the

494 // while loops here are redundant, and if so, remove them.

495 --current;

496 while (current > line_begin && *current <= ' ') --current;

497 current += (current != colon_loc);

498 current_header_line->key_end_idx = current - stream_begin;

499

500 current = colon_loc;

501 DCHECK_EQ(':', *current);

502 ++current;

503 while (current < line_end && *current <= ' ') ++current;

504 current_header_line->value_begin_idx = current - stream_begin;

505

506 DCHECK_GE(current_header_line->key_end_idx,

507 current_header_line->first_char_idx);

508 DCHECK_GE(current_header_line->value_begin_idx,

509 current_header_line->key_end_idx);

510 DCHECK_GE(current_header_line->last_char_idx,

511 current_header_line->value_begin_idx);

512 }

513

514 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {

515 DCHECK(!lines_.empty());

516 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

517 // The last line is always just a newline (and is uninteresting).

518 const Lines::size_type lines_size_m1 = lines_.size() - 1;

519 #if __SSE2__

520 const __m128i colons = _mm_set1_epi8(':');

521 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;

522 #endif // __SSE2__

523 const char* current = stream_begin + lines_[1].first;

524 // This code is a bit more subtle than it may appear at first glance.

525 // This code looks for a colon in the current line... but it also looks

526 // beyond the current line. If there is no colon in the current line, then

527 // for each subsequent line (until the colon which -has- been found is

528 // associated with a line), no searching for a colon will be performed. In

529 // this way, we minimize the amount of bytes we have scanned for a colon.

530 for (Lines::size_type i = 1; i < lines_size_m1;) {

531 const char* line_begin = stream_begin + lines_[i].first;

532

533 // Here we handle possible continuations. Note that we do not replace

534 // the '\n' in the line before a continuation (at least, as of now),

535 // which implies that any code which looks for a value must deal with

536 // "\r\n", etc -within- the line (and not just at the end of it).

537 for (++i; i < lines_size_m1; ++i) {

538 const char c = *(stream_begin + lines_[i].first);

539 if (c > ' ') {

540 // Not a continuation, so stop. Note that if the 'original' i = 1,

541 // and the next line is not a continuation, we'll end up with i = 2

542 // when we break. This handles the incrementing of i for the outer

543 // loop.

544 break;

545 }

546 }

547 const char* line_end = stream_begin + lines_[i - 1].second;

548 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);

549

550 // We cleanup the whitespace at the end of the line before doing anything

551 // else of interest as it allows us to do nothing when irregularly formatted

552 // headers are parsed (e.g. those with only keys, only values, or no colon).

553 //

554 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.

555 --line_end;

556 DCHECK_EQ('\n', *line_end)

557 << "\"" << std::string(line_begin, line_end) << "\"";

558 while (*line_end <= ' ' && line_end > line_begin) {

559 --line_end;

560 }

561 ++line_end;

562 DCHECK_GE(' ', *line_end);

563 DCHECK_LT(line_begin, line_end);

564

565 // We use '0' for the block idx, because we're always writing to the first

566 // block from the framer (we do this because the framer requires that the

567 // entire header sequence be in a contiguous buffer).

568 headers_->header_lines_.push_back(

569 HeaderLineDescription(line_begin - stream_begin,

570 line_end - stream_begin,

571 line_end - stream_begin,

572 line_end - stream_begin,

573 0));

574 if (current >= line_end) {

575 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;

576 visitor_->HandleHeaderWarning(this);

577 // Then the next colon will not be found within this header line-- time

578 // to try again with another header-line.

579 continue;

580 } else if (current < line_begin) {

581 // When this condition is true, the last detected colon was part of a

582 // previous line. We reset to the beginning of the line as we don't care

583 // about the presence of any colon before the beginning of the current

584 // line.

585 current = line_begin;

586 }

587 #if __SSE2__

588 while (current < header_lines_end_m16) {

589 __m128i header_bytes =

590 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current));

591 __m128i colon_cmp = _mm_cmpeq_epi8(header_bytes, colons);

592 int colon_msk = _mm_movemask_epi8(colon_cmp);

593 if (colon_msk == 0) {

594 current += 16;

595 continue;

596 }

597 current += (ffs(colon_msk) - 1);

598 if (current > line_end) {

599 break;

600 }

601 goto found_colon;

602 }

603 #endif // __SSE2__

604 for (; current < line_end; ++current) {

605 if (*current != ':') {

606 continue;

607 }

608 goto found_colon;

609 }

610 // If we've gotten to here, then there was no colon

611 // in the line. The arguments we passed into the construction

612 // for the HeaderLineDescription object should be OK-- it assumes

613 // that the entire content is 'key' by default (which is true, as

614 // there was no colon, there can be no value). Note that this is a

615 // construct which is technically not allowed by the spec.

616 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;

617 visitor_->HandleHeaderWarning(this);

618 continue;

619 found_colon:

620 DCHECK_EQ(*current, ':');

621 DCHECK_LE(current - stream_begin, line_end - stream_begin);

622 DCHECK_LE(stream_begin - stream_begin, current - stream_begin);

623

624 HeaderLineDescription& current_header_line = headers_->header_lines_.back();

625 current_header_line.key_end_idx = current - stream_begin;

626 current_header_line.value_begin_idx = current_header_line.key_end_idx;

627 if (current < line_end) {

628 ++current_header_line.key_end_idx;

629

630 CleanUpKeyValueWhitespace(stream_begin,

631 line_begin,

632 current,

633 line_end,

634 &current_header_line);

635 }

636 }

637 }

638

639 void BalsaFrame::ProcessContentLengthLine(

640 HeaderLines::size_type line_idx,

641 BalsaHeadersEnums::ContentLengthStatus* status,

642 size_t* length) {

643 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];

644 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

645 const char* line_end = stream_begin + header_line.last_char_idx;

646 const char* value_begin = (stream_begin + header_line.value_begin_idx);

647

648 if (value_begin >= line_end) {

649 // There is no non-whitespace value data.

650 #if DEBUGFRAMER

651 LOG(INFO) << "invalid content-length -- no non-whitespace value data";

652 #endif

653 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;

654 return;

655 }

656

657 *length = 0;

658 while (value_begin < line_end) {

659 if (value_begin < '0' \|\| value_begin > '9') {

660 // bad! content-length found, and couldn't parse all of it!

661 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;

662 #if DEBUGFRAMER

663 LOG(INFO) << "invalid content-length - non numeric character detected";

664 #endif // DEBUGFRAMER

665 return;

666 }

667 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;

668 size_t length_x_10 = length 10;

669 const unsigned char c = *value_begin - '0';

670 if (*length > kMaxDiv10 \|\|

671 (std::numeric_limits<size_t>::max() - length_x_10) < c) {

672 *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;

673 #if DEBUGFRAMER

674 LOG(INFO) << "content-length overflow";

675 #endif // DEBUGFRAMER

676 return;

677 }

678 *length = length_x_10 + c;

679 ++value_begin;

680 }

681 #if DEBUGFRAMER

682 LOG(INFO) << "content_length parsed: " << *length;

683 #endif // DEBUGFRAMER

684 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;

685 }

686

687 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {

688 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];

689 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

690 const char* line_end = stream_begin + header_line.last_char_idx;

691 const char* value_begin = stream_begin + header_line.value_begin_idx;

692 size_t value_length = line_end - value_begin;

693

694 if ((value_length == 7) &&

695 !strncasecmp(value_begin, "chunked", 7)) {

696 headers_->transfer_encoding_is_chunked_ = true;

697 } else if ((value_length == 8) &&

698 !strncasecmp(value_begin, "identity", 8)) {

699 headers_->transfer_encoding_is_chunked_ = false;

700 } else {

701 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;

702 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

703 visitor_->HandleHeaderError(this);

704 return;

705 }

706 }

707

708 namespace {

709 bool SplitStringPiece(base::StringPiece original, char delim,

710 base::StringPiece* before, base::StringPiece* after) {

711 const char* p = original.data();

712 const char* end = p + original.size();

713

714 while (p != end) {

715 if (*p == delim) {

716 ++p;

717 } else {

718 const char* start = p;

719 while (++p != end && *p != delim) {

720 // Skip to the next occurence of the delimiter.

721 }

722 *before = base::StringPiece(start, p - start);

723 if (p != end)

724 *after = base::StringPiece(p + 1, end - (p + 1));

725 else

726 *after = base::StringPiece("");

727 before = base::TrimWhitespaceASCII(before, base::TRIM_ALL);

728 after = base::TrimWhitespaceASCII(after, base::TRIM_ALL);

729 return true;

730 }

731 }

732

733 *before = original;

734 *after = "";

735 return false;

736 }

737

738 // TODO(phython): Fix this function to properly deal with quoted values.

739 // E.g. ";;foo", "\";;\"", or \"aa;

740 // The last example, the semi-colon is a separator between extensions.

741 void ProcessChunkExtensionsManual(base::StringPiece all_extensions,

742 BalsaHeaders* extensions) {

743 base::StringPiece extension;

744 base::StringPiece remaining;

745 all_extensions = base::TrimWhitespaceASCII(all_extensions, base::TRIM_ALL);

746 SplitStringPiece(all_extensions, ';', &extension, &remaining);

747 while (!extension.empty()) {

748 base::StringPiece key;

749 base::StringPiece value;

750 SplitStringPiece(extension, '=', &key, &value);

751 if (!value.empty()) {

752 // Strip quotation marks if they exist.

753 if (!value.empty() && value.front() == '"')

754 value.remove_prefix(1);

755 if (!value.empty() && value.back() == '"')

756 value.remove_suffix(1);

757 }

758

759 extensions->AppendHeader(key, value);

760

761 remaining = base::TrimWhitespaceASCII(remaining, base::TRIM_ALL);

762 SplitStringPiece(remaining, ';', &extension, &remaining);

763 }

764 }

765

766 } // anonymous namespace

767

768 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,

769 BalsaHeaders* extensions) {

770 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);

771 }

772

773 void BalsaFrame::ProcessHeaderLines() {

774 HeaderLines::size_type content_length_idx = 0;

775 HeaderLines::size_type transfer_encoding_idx = 0;

776

777 DCHECK(!lines_.empty());

778 #if DEBUGFRAMER

779 LOG(INFO) << "****@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@********\n";

780 #endif // DEBUGFRAMER

781

782 // There is no need to attempt to process headers if no header lines exist.

783 // There are at least two lines in the message which are not header lines.

784 // These two non-header lines are the first line of the message, and the

785 // last line of the message (which is an empty line).

786 // Thus, we test to see if we have more than two lines total before attempting

787 // to parse any header lines.

788 if (lines_.size() > 2) {

789 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

790

791 // Then, for the rest of the header data, we parse these into key-value

792 // pairs.

793 FindColonsAndParseIntoKeyValue();

794 // At this point, we've parsed all of the headers. Time to look for those

795 // headers which we require for framing.

796 const HeaderLines::size_type

797 header_lines_size = headers_->header_lines_.size();

798 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {

799 const HeaderLineDescription& current_header_line =

800 headers_->header_lines_[i];

801 const char* key_begin =

802 (stream_begin + current_header_line.first_char_idx);

803 const char* key_end = (stream_begin + current_header_line.key_end_idx);

804 const size_t key_len = key_end - key_begin;

805 const char c = *key_begin;

806 #if DEBUGFRAMER

807 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)

808 << " c: '" << c << "' key_len: " << key_len;

809 #endif // DEBUGFRAMER

810 // If a header begins with either lowercase or uppercase 'c' or 't', then

811 // the header may be one of content-length, connection, content-encoding

812 // or transfer-encoding. These headers are special, as they change the way

813 // that the message is framed, and so the framer is required to search

814 // for them.

815

816

817 if (c == 'c' \|\| c == 'C') {

818 if ((key_len == kContentLengthSize) &&

819 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {

820 BalsaHeadersEnums::ContentLengthStatus content_length_status =

821 BalsaHeadersEnums::NO_CONTENT_LENGTH;

822 size_t length = 0;

823 ProcessContentLengthLine(i, &content_length_status, &length);

824 if (content_length_idx != 0) { // then we've already seen one!

825 if ((headers_->content_length_status_ != content_length_status) \|\|

826 ((headers_->content_length_status_ ==

827 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&

828 length != headers_->content_length_)) {

829 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;

830 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

831 visitor_->HandleHeaderError(this);

832 return;

833 }

834 continue;

835 } else {

836 content_length_idx = i + 1;

837 headers_->content_length_status_ = content_length_status;

838 headers_->content_length_ = length;

839 content_length_remaining_ = length;

840 }

841

842 }

843 } else if (c == 't' \|\| c == 'T') {

844 if ((key_len == kTransferEncodingSize) &&

845 0 == strncasecmp(key_begin, kTransferEncoding,

846 kTransferEncodingSize)) {

847 if (transfer_encoding_idx != 0) {

848 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;

849 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

850 visitor_->HandleHeaderError(this);

851 return;

852 }

853 transfer_encoding_idx = i + 1;

854 }

855 } else if (i == 0 && (key_len == 0 \|\| c == ' ')) {

856 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;

857 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

858 visitor_->HandleHeaderError(this);

859 return;

860 }

861 }

862 if (headers_->transfer_encoding_is_chunked_) {

863 headers_->content_length_ = 0;

864 headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;

865 content_length_remaining_ = 0;

866 }

867 if (transfer_encoding_idx != 0) {

868 ProcessTransferEncodingLine(transfer_encoding_idx - 1);

869 }

870 }

871 }

872

873 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {

874 // For responses, can't have a body if the request was a HEAD, or if it is

875 // one of these response-codes. rfc2616 section 4.3

876 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

877 if (is_request_ \|\|

878 !(request_was_head_ \|\|

879 (headers_->parsed_response_code_ >= 100 &&

880 headers_->parsed_response_code_ < 200) \|\|

881 (headers_->parsed_response_code_ == 204) \|\|

882 (headers_->parsed_response_code_ == 304))) {

883 // Then we can have a body.

884 if (headers_->transfer_encoding_is_chunked_) {

885 // Note that

886 // if ( Transfer-Encoding: chunked && Content-length: )

887 // then Transfer-Encoding: chunked trumps.

888 // This is as specified in the spec.

889 // rfc2616 section 4.4.3

890 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;

891 } else {

892 // Errors parsing content-length definitely can cause

893 // protocol errors/warnings

894 switch (headers_->content_length_status_) {

895 // If we have a content-length, and it is parsed

896 // properly, there are two options.

897 // 1) zero content, in which case the message is done, and

898 // 2) nonzero content, in which case we have to

899 // consume the body.

900 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:

901 if (headers_->content_length_ == 0) {

902 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

903 } else {

904 parse_state_ = BalsaFrameEnums::READING_CONTENT;

905 }

906 break;

907 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:

908 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:

909 // If there were characters left-over after parsing the

910 // content length, we should flag an error and stop.

911 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

912 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;

913 visitor_->HandleHeaderError(this);

914 break;

915 // We can have: no transfer-encoding, no content length, and no

916 // connection: close...

917 // Unfortunately, this case doesn't seem to be covered in the spec.

918 // We'll assume that the safest thing to do here is what the google

919 // binaries before 2008 already do, which is to assume that

920 // everything until the connection is closed is body.

921 case BalsaHeadersEnums::NO_CONTENT_LENGTH:

922 if (is_request_) {

923 base::StringPiece method = headers_->request_method();

924 // POSTs and PUTs should have a detectable body length. If they

925 // do not we consider it an error.

926 if ((method.size() == 4 &&

927 strncmp(method.data(), "POST", 4) == 0) \|\|

928 (method.size() == 3 &&

929 strncmp(method.data(), "PUT", 3) == 0)) {

930 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

931 last_error_ =

932 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;

933 visitor_->HandleHeaderError(this);

934 break;

935 }

936 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

937 } else {

938 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;

939 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;

940 visitor_->HandleHeaderWarning(this);

941 }

942 break;

943 // The COV_NF_... statements here provide hints to the apparatus

944 // which computes coverage reports/ratios that this code is never

945 // intended to be executed, and should technically be impossible.

946 // COV_NF_START

947 default:

948 LOG(FATAL) << "Saw a content_length_status: "

949 << headers_->content_length_status_ << " which is unknown.";

950 // COV_NF_END

951 }

952 }

953 }

954 }

955

956 size_t BalsaFrame::ProcessHeaders(const char* message_start,

957 size_t message_length) {

958 const char* const original_message_start = message_start;

959 const char* const message_end = message_start + message_length;

960 const char* message_current = message_start;

961 const char* checkpoint = message_start;

962

963 if (message_length == 0) {

964 goto bottom;

965 }

966

967 while (message_current < message_end) {

968 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();

969

970 // Yes, we could use strchr (assuming null termination), or

971 // memchr, but as it turns out that is slower than this tight loop

972 // for the input that we see.

973 if (!saw_non_newline_char_) {

974 do {

975 const char c = *message_current;

976 if (c != '\r' && c != '\n') {

977 if (c <= ' ') {

978 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

979 last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;

980 visitor_->HandleHeaderError(this);

981 goto bottom;

982 } else {

983 saw_non_newline_char_ = true;

984 checkpoint = message_start = message_current;

985 goto read_real_message;

986 }

987 }

988 ++message_current;

989 } while (message_current < message_end);

990 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks

991 } else {

992 read_real_message:

993 // Note that SSE2 can be enabled on certain piii platforms.

994 #if __SSE2__

995 {

996 const char* const message_end_m16 = message_end - 16;

997 __m128i newlines = _mm_set1_epi8('\n');

998 while (message_current < message_end_m16) {

999 // What this does (using compiler intrinsics):

1000 //

1001 // Load 16 '\n's into an xmm register

1002 // Load 16 bytes of currennt message into an xmm register

1003 // Do byte-wise equals on those two xmm registers

1004 // Take the first bit of each byte, and put that into the first

1005 // 16 bits of a mask

1006 // If the mask is zero, no '\n' found. increment by 16 and try again

1007 // Else scan forward to find the first set bit.

1008 // Increment current by the index of the first set bit

1009 // (ffs returns index of first set bit + 1)

1010 __m128i msg_bytes =

1011 _mm_loadu_si128(const_cast<__m128i *>(

1012 reinterpret_cast<const __m128i *>(message_current)));

1013 __m128i newline_cmp = _mm_cmpeq_epi8(msg_bytes, newlines);

1014 int newline_msk = _mm_movemask_epi8(newline_cmp);

1015 if (newline_msk == 0) {

1016 message_current += 16;

1017 continue;

1018 }

1019 message_current += (ffs(newline_msk) - 1);

1020 const size_t relative_idx = message_current - message_start;

1021 const size_t message_current_idx = 1 + base_idx + relative_idx;

1022 lines_.push_back(std::make_pair(last_slash_n_idx_,

1023 message_current_idx));

1024 if (lines_.size() == 1) {

1025 headers_->WriteFromFramer(checkpoint,

1026 1 + message_current - checkpoint);

1027 checkpoint = message_current + 1;

1028 const char* begin = headers_->OriginalHeaderStreamBegin();

1029 #if DEBUGFRAMER

1030 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);

1031 LOG(INFO) << "is_request_: " << is_request_;

1032 #endif

1033 ProcessFirstLine(begin, begin + lines_[0].second);

1034 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)

1035 goto process_lines;

1036 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)

1037 goto bottom;

1038 }

1039 const size_t chars_since_last_slash_n = (message_current_idx -

1040 last_slash_n_idx_);

1041 last_slash_n_idx_ = message_current_idx;

1042 if (chars_since_last_slash_n > 2) {

1043 // We have a slash-n, but the last slash n was

1044 // more than 2 characters away from this. Thus, we know

1045 // that this cannot be an end-of-header.

1046 ++message_current;

1047 continue;

1048 }

1049 if ((chars_since_last_slash_n == 1) \|\|

1050 (((message_current > message_start) &&

1051 (*(message_current - 1) == '\r')) \|\|

1052 (last_char_was_slash_r_))) {

1053 goto process_lines;

1054 }

1055 ++message_current;

1056 }

1057 }

1058 #endif // __SSE2__

1059 while (message_current < message_end) {

1060 if (*message_current != '\n') {

1061 ++message_current;

1062 continue;

1063 }

1064 const size_t relative_idx = message_current - message_start;

1065 const size_t message_current_idx = 1 + base_idx + relative_idx;

1066 lines_.push_back(std::make_pair(last_slash_n_idx_,

1067 message_current_idx));

1068 if (lines_.size() == 1) {

1069 headers_->WriteFromFramer(checkpoint,

1070 1 + message_current - checkpoint);

1071 checkpoint = message_current + 1;

1072 const char* begin = headers_->OriginalHeaderStreamBegin();

1073 #if DEBUGFRAMER

1074 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);

1075 LOG(INFO) << "is_request_: " << is_request_;

1076 #endif

1077 ProcessFirstLine(begin, begin + lines_[0].second);

1078 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)

1079 goto process_lines;

1080 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)

1081 goto bottom;

1082 }

1083 const size_t chars_since_last_slash_n = (message_current_idx -

1084 last_slash_n_idx_);

1085 last_slash_n_idx_ = message_current_idx;

1086 if (chars_since_last_slash_n > 2) {

1087 // false positive.

1088 ++message_current;

1089 continue;

1090 }

1091 if ((chars_since_last_slash_n == 1) \|\|

1092 (((message_current > message_start) &&

1093 (*(message_current - 1) == '\r')) \|\|

1094 (last_char_was_slash_r_))) {

1095 goto process_lines;

1096 }

1097 ++message_current;

1098 }

1099 }

1100 continue;

1101 process_lines:

1102 ++message_current;

1103 DCHECK(message_current >= message_start);

1104 if (message_current > message_start) {

1105 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);

1106 }

1107

1108 // Check if we have exceeded maximum headers length

1109 // Although we check for this limit before and after we call this function

1110 // we check it here as well to make sure that in case the visitor changed

1111 // the max_header_length_ (for example after processing the first line)

1112 // we handle it gracefully.

1113 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {

1114 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1115 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1116 visitor_->HandleHeaderError(this);

1117 goto bottom;

1118 }

1119

1120 // Since we know that we won't be writing any more bytes of the header,

1121 // we tell that to the headers object. The headers object may make

1122 // more efficient allocation decisions when this is signaled.

1123 headers_->DoneWritingFromFramer();

1124 {

1125 const char* readable_ptr = NULL;

1126 size_t readable_size = 0;

1127 headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);

1128 visitor_->ProcessHeaderInput(readable_ptr, readable_size);

1129 }

1130

1131 // Ok, now that we've written everything into our header buffer, it is

1132 // time to process the header lines (extract proper values for headers

1133 // which are important for framing).

1134 ProcessHeaderLines();

1135 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1136 goto bottom;

1137 }

1138 AssignParseStateAfterHeadersHaveBeenParsed();

1139 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1140 goto bottom;

1141 }

1142 visitor_->ProcessHeaders(*headers_);

1143 visitor_->HeaderDone();

1144 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {

1145 visitor_->MessageDone();

1146 }

1147 goto bottom;

1148 }

1149 // If we've gotten to here, it means that we've consumed all of the

1150 // available input. We need to record whether or not the last character we

1151 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds

1152 // a header framing that is split across the two calls.

1153 last_char_was_slash_r_ = (*(message_end - 1) == '\r');

1154 DCHECK(message_current >= message_start);

1155 if (message_current > message_start) {

1156 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);

1157 }

1158 bottom:

1159 return message_current - original_message_start;

1160 }

1161

1162

1163 size_t BalsaFrame::BytesSafeToSplice() const {

1164 switch (parse_state_) {

1165 case BalsaFrameEnums::READING_CHUNK_DATA:

1166 return chunk_length_remaining_;

1167 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1168 return std::numeric_limits<size_t>::max();

1169 case BalsaFrameEnums::READING_CONTENT:

1170 return content_length_remaining_;

1171 default:

1172 return 0;

1173 }

1174 }

1175

1176 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {

1177 switch (parse_state_) {

1178 case BalsaFrameEnums::READING_CHUNK_DATA:

1179 if (chunk_length_remaining_ >= bytes_spliced) {

1180 chunk_length_remaining_ -= bytes_spliced;

1181 if (chunk_length_remaining_ == 0) {

1182 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;

1183 }

1184 return;

1185 } else {

1186 last_error_ =

1187 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;

1188 goto error_exit;

1189 }

1190

1191 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1192 return;

1193

1194 case BalsaFrameEnums::READING_CONTENT:

1195 if (content_length_remaining_ >= bytes_spliced) {

1196 content_length_remaining_ -= bytes_spliced;

1197 if (content_length_remaining_ == 0) {

1198 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1199 visitor_->MessageDone();

1200 }

1201 return;

1202 } else {

1203 last_error_ =

1204 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;

1205 goto error_exit;

1206 }

1207

1208 default:

1209 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;

1210 goto error_exit;

1211 }

1212

1213 error_exit:

1214 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1215 visitor_->HandleBodyError(this);

1216 };

1217

1218 // You may note that the state-machine contained within this function has both

1219 // switch and goto labels for nearly the same thing. For instance, the

1220 // following two labels refer to the same code block:

1221 // label_reading_chunk_data:

1222 // case BalsaFrameEnums::READING_CHUNK_DATA:

1223 // The 'case' statement is required for the switch statement which occurs when

1224 // ProcessInput is invoked. The goto label is required as the state-machine

1225 // does not use a computed goto in any subsequent operations.

1226 //

1227 // Since several states exit the state machine for various reasons, there is

1228 // also one label at the bottom of the function. When it is appropriate to

1229 // return from the function, that part of the state machine instead issues a

1230 // goto bottom; This results in less code duplication, and makes debugging

1231 // easier (as you can add a statement to a section of code which is guaranteed

1232 // to be invoked when the function is exiting.

1233 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {

1234 const char* current = input;

1235 const char* on_entry = current;

1236 const char* end = current + size;

1237 #if DEBUGFRAMER

1238 LOG(INFO) << "\n=============="

1239 << BalsaFrameEnums::ParseStateToString(parse_state_)

1240 << "===============\n";

1241 #endif // DEBUGFRAMER

1242

1243 DCHECK(headers_ != NULL);

1244 if (headers_ == NULL) return 0;

1245

1246 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {

1247 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();

1248 // Yes, we still have to check this here as the user can change the

1249 // max_header_length amount!

1250 // Also it is possible that we have reached the maximum allowed header size,

1251 // and we have more to consume (remember we are still inside

1252 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.

1253 if (header_length > max_header_length_ \|\|

1254 (header_length == max_header_length_ && size > 0)) {

1255 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1256 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1257 visitor_->HandleHeaderError(this);

1258 goto bottom;

1259 }

1260 size_t bytes_to_process = max_header_length_ - header_length;

1261 if (bytes_to_process > size) {

1262 bytes_to_process = size;

1263 }

1264 current += ProcessHeaders(input, bytes_to_process);

1265 // If we are still reading headers check if we have crossed the headers

1266 // limit. Note that we check for >= as opposed to >. This is because if

1267 // header_length_after equals max_header_length_ and we are still in the

1268 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for

1269 // sure that the headers limit will be crossed later on

1270 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {

1271 // Note that headers_ is valid only if we are still reading headers.

1272 const size_t header_length_after =

1273 headers_->GetReadableBytesFromHeaderStream();

1274 if (header_length_after >= max_header_length_) {

1275 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1276 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1277 visitor_->HandleHeaderError(this);

1278 }

1279 }

1280 goto bottom;

1281 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ \|\|

1282 parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1283 // Can do nothing more 'till we're reset.

1284 goto bottom;

1285 }

1286

1287 while (current < end) {

1288 switch (parse_state_) {

1289 label_reading_chunk_length:

1290 case BalsaFrameEnums::READING_CHUNK_LENGTH:

1291 // In this state we read the chunk length.

1292 // Note that once we hit a character which is not in:

1293 // [0-9;A-Fa-f\n], we transition to a different state.

1294 //

1295 {

1296 // If we used strtol, etc, we'd have to buffer this line.

1297 // This is more annoying than simply doing the conversion

1298 // here. This code accounts for overflow.

1299 static const signed char buf[] = {

1300 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f

1301 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,

1302 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f

1303 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1304 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f

1305 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1306 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f

1307 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,

1308 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f

1309 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1310 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f

1311 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1312 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f

1313 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1314 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f

1315 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1316 };

1317 // valid cases:

1318 // "09123\n" // -> 09123

1319 // "09123\r\n" // -> 09123

1320 // "09123 \n" // -> 09123

1321 // "09123 \r\n" // -> 09123

1322 // "09123 12312\n" // -> 09123

1323 // "09123 12312\r\n" // -> 09123

1324 // "09123; foo=bar\n" // -> 09123

1325 // "09123; foo=bar\r\n" // -> 09123

1326 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF

1327 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF

1328 // invalid cases:

1329 // "[ \t]+[^\n]*\n"

1330 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)

1331 // "\r\n"

1332 // "\n"

1333 while (current < end) {

1334 const char c = *current;

1335 ++current;

1336 const signed char addition = buf[static_cast<int>(c)];

1337 if (addition >= 0) {

1338 chunk_length_character_extracted_ = true;

1339 size_t length_x_16 = chunk_length_remaining_ * 16;

1340 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;

1341 if ((chunk_length_remaining_ > kMaxDiv16) \|\|

1342 ((std::numeric_limits<size_t>::max() - length_x_16) <

1343 static_cast<size_t>(addition))) {

1344 // overflow -- asked for a chunk-length greater than 2^64 - 1!!

1345 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1346 last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;

1347 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1348 visitor_->HandleChunkingError(this);

1349 goto bottom;

1350 }

1351 chunk_length_remaining_ = length_x_16 + addition;

1352 continue;

1353 }

1354

1355 if (!chunk_length_character_extracted_ \|\| addition == -1) {

1356 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no

1357 // characters were converted, or an unexpected character was

1358 // seen.

1359 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1360 last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;

1361 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1362 visitor_->HandleChunkingError(this);

1363 goto bottom;

1364 }

1365

1366 --current;

1367 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;

1368 visitor_->ProcessChunkLength(chunk_length_remaining_);

1369 goto label_reading_chunk_extension;

1370 }

1371 }

1372 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1373 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH

1374

1375 label_reading_chunk_extension:

1376 case BalsaFrameEnums::READING_CHUNK_EXTENSION:

1377 {

1378 // TODO(phython): Convert this scanning to be 16 bytes at a time if

1379 // there is data to be read.

1380 const char* extensions_start = current;

1381 size_t extensions_length = 0;

1382 while (current < end) {

1383 const char c = *current;

1384 if (c == '\r' \|\| c == '\n') {

1385 extensions_length =

1386 (extensions_start == current) ?

1387 0 :

1388 current - extensions_start - 1;

1389 }

1390

1391 ++current;

1392 if (c == '\n') {

1393 chunk_length_character_extracted_ = false;

1394 visitor_->ProcessChunkExtensions(

1395 extensions_start, extensions_length);

1396 if (chunk_length_remaining_ != 0) {

1397 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;

1398 goto label_reading_chunk_data;

1399 }

1400 HeaderFramingFound('\n');

1401 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;

1402 goto label_reading_last_chunk_term;

1403 }

1404 }

1405 visitor_->ProcessChunkExtensions(

1406 extensions_start, extensions_length);

1407 }

1408

1409 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1410 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION

1411

1412 label_reading_chunk_data:

1413 case BalsaFrameEnums::READING_CHUNK_DATA:

1414 while (current < end) {

1415 if (chunk_length_remaining_ == 0) {

1416 break;

1417 }

1418 // read in the chunk

1419 size_t bytes_remaining = end - current;

1420 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?

1421 chunk_length_remaining_ : bytes_remaining;

1422 const char* tmp_current = current + consumed_bytes;

1423 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);

1424 visitor_->ProcessBodyData(current, consumed_bytes);

1425 on_entry = current = tmp_current;

1426 chunk_length_remaining_ -= consumed_bytes;

1427 }

1428 if (chunk_length_remaining_ == 0) {

1429 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;

1430 goto label_reading_chunk_term;

1431 }

1432 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1433 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA

1434

1435 label_reading_chunk_term:

1436 case BalsaFrameEnums::READING_CHUNK_TERM:

1437 while (current < end) {

1438 const char c = *current;

1439 ++current;

1440

1441 if (c == '\n') {

1442 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;

1443 goto label_reading_chunk_length;

1444 }

1445 }

1446 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1447 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM

1448

1449 label_reading_last_chunk_term:

1450 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:

1451 while (current < end) {

1452 const char c = *current;

1453

1454 if (!HeaderFramingFound(c)) {

1455 // If not, however, since the spec only suggests that the

1456 // client SHOULD indicate the presence of trailers, we get to

1457 // test that they did or didn't.

1458 // If all of the bytes we've seen since:

1459 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF

1460 // are either '\r', or '\n', then we can assume that we don't yet

1461 // know if we need to parse headers, or if the next byte will make

1462 // the HeaderFramingFound condition (above) true.

1463 if (HeaderFramingMayBeFound()) {

1464 // If true, then we have seen only characters '\r' or '\n'.

1465 ++current;

1466

1467 // Lets try again! There is no state change here.

1468 continue;

1469 } else {

1470 // If (!HeaderFramingMayBeFound()), then we know that we must be

1471 // reading the first non CRLF character of a trailer.

1472 parse_state_ = BalsaFrameEnums::READING_TRAILER;

1473 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1474 on_entry = current;

1475 goto label_reading_trailer;

1476 }

1477 } else {

1478 // If we've found a "\r\n\r\n", then the message

1479 // is done.

1480 ++current;

1481 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1482 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1483 visitor_->MessageDone();

1484 goto bottom;

1485 }

1486 break; // from while loop

1487 }

1488 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1489 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM

1490

1491 label_reading_trailer:

1492 case BalsaFrameEnums::READING_TRAILER:

1493 while (current < end) {

1494 const char c = *current;

1495 ++current;

1496 // TODO(fenix): If we ever care about trailers as part of framing,

1497 // deal with them here (see below for part of the 'solution')

1498 // if (LineFramingFound(c)) {

1499 // trailer_lines_.push_back(make_pair(start_of_line_,

1500 // trailer_length_ - 1));

1501 // start_of_line_ = trailer_length_;

1502 // }

1503 if (HeaderFramingFound(c)) {

1504 // ProcessTrailers(visitor_, &trailers_);

1505 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1506 visitor_->ProcessTrailerInput(on_entry, current - on_entry);

1507 visitor_->MessageDone();

1508 goto bottom;

1509 }

1510 }

1511 visitor_->ProcessTrailerInput(on_entry, current - on_entry);

1512 break; // case BalsaFrameEnums::READING_TRAILER

1513

1514 // Note that there is no label:

1515 // 'label_reading_until_close'

1516 // here. This is because the state-machine exists immediately after

1517 // reading the headers instead of transitioning here (as it would

1518 // do if it was consuming all the data it could, all the time).

1519 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1520 {

1521 const size_t bytes_remaining = end - current;

1522 if (bytes_remaining > 0) {

1523 visitor_->ProcessBodyInput(current, bytes_remaining);

1524 visitor_->ProcessBodyData(current, bytes_remaining);

1525 current += bytes_remaining;

1526 }

1527 }

1528 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE

1529

1530 // label_reading_content:

1531 case BalsaFrameEnums::READING_CONTENT:

1532 #if DEBUGFRAMER

1533 LOG(INFO) << "ReadingContent: " << content_length_remaining_;

1534 #endif // DEBUGFRAMER

1535 while (content_length_remaining_ && current < end) {

1536 // read in the content

1537 const size_t bytes_remaining = end - current;

1538 const size_t consumed_bytes =

1539 (content_length_remaining_ < bytes_remaining) ?

1540 content_length_remaining_ : bytes_remaining;

1541 visitor_->ProcessBodyInput(current, consumed_bytes);

1542 visitor_->ProcessBodyData(current, consumed_bytes);

1543 current += consumed_bytes;

1544 content_length_remaining_ -= consumed_bytes;

1545 }

1546 if (content_length_remaining_ == 0) {

1547 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1548 visitor_->MessageDone();

1549 }

1550 goto bottom; // case BalsaFrameEnums::READING_CONTENT

1551

1552 default:

1553 // The state-machine should never be in a state that isn't handled

1554 // above. This is a glaring logic error, and we should do something

1555 // drastic to ensure that this gets looked-at and fixed.

1556 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE

1557 << " memory corruption?!"; // COV_NF_LINE

1558 }

1559 }

1560 bottom:

1561 #if DEBUGFRAMER

1562 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"

1563 << std::string(input, current)

1564 << "\n$$$$$$$$$$$$$$"

1565 << BalsaFrameEnums::ParseStateToString(parse_state_)

1566 << "$$$$$$$$$$$$$$$"

1567 << " consumed: " << (current - input);

1568 if (Error()) {

1569 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());

1570 }

1571 #endif // DEBUGFRAMER

1572 return current - input;

1573 }

1574

1575 } // namespace net

OLD	NEW

« no previous file with comments | « net/tools/balsa/balsa_frame.h ('k') | net/tools/balsa/balsa_frame_test.cc » ('j') | no next file with comments »