net/tools/balsa/balsa_frame.cc - Issue 992733002: Remove //net (except for Android test stuff) and sdch

Side by Side Diff: net/tools/balsa/balsa_frame.cc

Issue 992733002: Remove //net (except for Android test stuff) and sdch (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "net/tools/balsa/balsa_frame.h"

6

7 // Visual C++ defines _M_IX86_FP as 2 if the /arch:SSE2 compiler option is

8 // specified.

9 #if !defined(__SSE2__) && _M_IX86_FP == 2

10 #define __SSE2__ 1

11 #endif

12

13 #include <assert.h>

14 #if __SSE2__

15 #include <emmintrin.h>

16 #endif // __SSE2__

17

18 #include <limits>

19 #include <string>

20 #include <utility>

21 #include <vector>

22

23 #include "base/logging.h"

24 #include "base/port.h"

25 #include "base/strings/string_piece.h"

26 #include "net/tools/balsa/balsa_enums.h"

27 #include "net/tools/balsa/balsa_headers.h"

28 #include "net/tools/balsa/balsa_visitor_interface.h"

29 #include "net/tools/balsa/buffer_interface.h"

30 #include "net/tools/balsa/simple_buffer.h"

31 #include "net/tools/balsa/split.h"

32 #include "net/tools/balsa/string_piece_utils.h"

33

34 #if defined(COMPILER_MSVC)

35 #include <intrin.h>

36 #include <string.h>

37

38 #pragma intrinsic(_BitScanForward)

39

40 static int ffs(int i) {

41 unsigned long index;

42 return _BitScanForward(&index, i) ? index + 1 : 0;

43 }

44

45 #define strncasecmp _strnicmp

46 #else

47 #include <strings.h>

48 #endif

49

50 namespace net {

51

52 // Constants holding some header names for headers which can affect the way the

53 // HTTP message is framed, and so must be processed specially:

54 static const char kContentLength[] = "content-length";

55 static const size_t kContentLengthSize = sizeof(kContentLength) - 1;

56 static const char kTransferEncoding[] = "transfer-encoding";

57 static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;

58

59 BalsaFrame::BalsaFrame()

60 : last_char_was_slash_r_(false),

61 saw_non_newline_char_(false),

62 start_was_space_(true),

63 chunk_length_character_extracted_(false),

64 is_request_(true),

65 request_was_head_(false),

66 max_header_length_(16 * 1024),

67 max_request_uri_length_(2048),

68 visitor_(&do_nothing_visitor_),

69 chunk_length_remaining_(0),

70 content_length_remaining_(0),

71 last_slash_n_loc_(NULL),

72 last_recorded_slash_n_loc_(NULL),

73 last_slash_n_idx_(0),

74 term_chars_(0),

75 parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),

76 last_error_(BalsaFrameEnums::NO_ERROR),

77 headers_(NULL) {

78 }

79

80 BalsaFrame::~BalsaFrame() {}

81

82 void BalsaFrame::Reset() {

83 last_char_was_slash_r_ = false;

84 saw_non_newline_char_ = false;

85 start_was_space_ = true;

86 chunk_length_character_extracted_ = false;

87 // is_request_ = true; // not reset between messages.

88 // request_was_head_ = false; // not reset between messages.

89 // max_header_length_ = 4096; // not reset between messages.

90 // max_request_uri_length_ = 2048; // not reset between messages.

91 // visitor_ = &do_nothing_visitor_; // not reset between messages.

92 chunk_length_remaining_ = 0;

93 content_length_remaining_ = 0;

94 last_slash_n_loc_ = NULL;

95 last_recorded_slash_n_loc_ = NULL;

96 last_slash_n_idx_ = 0;

97 term_chars_ = 0;

98 parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;

99 last_error_ = BalsaFrameEnums::NO_ERROR;

100 lines_.clear();

101 if (headers_ != NULL) {

102 headers_->Clear();

103 }

104 }

105

106 const char* BalsaFrameEnums::ParseStateToString(

107 BalsaFrameEnums::ParseState error_code) {

108 switch (error_code) {

109 case PARSE_ERROR:

110 return "PARSE_ERROR";

111 case READING_HEADER_AND_FIRSTLINE:

112 return "READING_HEADER_AND_FIRSTLINE";

113 case READING_CHUNK_LENGTH:

114 return "READING_CHUNK_LENGTH";

115 case READING_CHUNK_EXTENSION:

116 return "READING_CHUNK_EXTENSION";

117 case READING_CHUNK_DATA:

118 return "READING_CHUNK_DATA";

119 case READING_CHUNK_TERM:

120 return "READING_CHUNK_TERM";

121 case READING_LAST_CHUNK_TERM:

122 return "READING_LAST_CHUNK_TERM";

123 case READING_TRAILER:

124 return "READING_TRAILER";

125 case READING_UNTIL_CLOSE:

126 return "READING_UNTIL_CLOSE";

127 case READING_CONTENT:

128 return "READING_CONTENT";

129 case MESSAGE_FULLY_READ:

130 return "MESSAGE_FULLY_READ";

131 case NUM_STATES:

132 return "UNKNOWN_STATE";

133 }

134 return "UNKNOWN_STATE";

135 }

136

137 const char* BalsaFrameEnums::ErrorCodeToString(

138 BalsaFrameEnums::ErrorCode error_code) {

139 switch (error_code) {

140 case NO_ERROR:

141 return "NO_ERROR";

142 case NO_STATUS_LINE_IN_RESPONSE:

143 return "NO_STATUS_LINE_IN_RESPONSE";

144 case NO_REQUEST_LINE_IN_REQUEST:

145 return "NO_REQUEST_LINE_IN_REQUEST";

146 case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:

147 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";

148 case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:

149 return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";

150 case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:

151 return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";

152 case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:

153 return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";

154 case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:

155 return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";

156 case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:

157 return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";

158 case FAILED_CONVERTING_STATUS_CODE_TO_INT:

159 return "FAILED_CONVERTING_STATUS_CODE_TO_INT";

160 case REQUEST_URI_TOO_LONG:

161 return "REQUEST_URI_TOO_LONG";

162 case HEADERS_TOO_LONG:

163 return "HEADERS_TOO_LONG";

164 case UNPARSABLE_CONTENT_LENGTH:

165 return "UNPARSABLE_CONTENT_LENGTH";

166 case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:

167 return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";

168 case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:

169 return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";

170 case HEADER_MISSING_COLON:

171 return "HEADER_MISSING_COLON";

172 case INVALID_CHUNK_LENGTH:

173 return "INVALID_CHUNK_LENGTH";

174 case CHUNK_LENGTH_OVERFLOW:

175 return "CHUNK_LENGTH_OVERFLOW";

176 case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:

177 return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";

178 case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:

179 return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";

180 case MULTIPLE_CONTENT_LENGTH_KEYS:

181 return "MULTIPLE_CONTENT_LENGTH_KEYS";

182 case MULTIPLE_TRANSFER_ENCODING_KEYS:

183 return "MULTIPLE_TRANSFER_ENCODING_KEYS";

184 case UNKNOWN_TRANSFER_ENCODING:

185 return "UNKNOWN_TRANSFER_ENCODING";

186 case INVALID_HEADER_FORMAT:

187 return "INVALID_HEADER_FORMAT";

188 case INTERNAL_LOGIC_ERROR:

189 return "INTERNAL_LOGIC_ERROR";

190 case NUM_ERROR_CODES:

191 return "UNKNOWN_ERROR";

192 }

193 return "UNKNOWN_ERROR";

194 }

195

196 // Summary:

197 // Parses the first line of either a request or response.

198 // Note that in the case of a detected warning, error_code will be set

199 // but the function will not return false.

200 // Exactly zero or one warning or error (but not both) may be detected

201 // by this function.

202 // Note that this function will not write the data of the first-line

203 // into the header's buffer (that should already have been done elsewhere).

204 //

205 // Pre-conditions:

206 // begin != end

207 // *begin should be a character which is > ' '. This implies that there

208 // is at least one non-whitespace characters between [begin, end).

209 // headers is a valid pointer to a BalsaHeaders class.

210 // error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.

211 // Entire first line must exist between [begin, end)

212 // Exactly zero or one newlines -may- exist between [begin, end)

213 // [begin, end) should exist in the header's buffer.

214 //

215 // Side-effects:

216 // headers will be modified

217 // error_code may be modified if either a warning or error is detected

218 //

219 // Returns:

220 // True if no error (as opposed to warning) is detected.

221 // False if an error (as opposed to warning) is detected.

222

223 //

224 // If there is indeed non-whitespace in the line, then the following

225 // will take care of this for you:

226 // while (*begin <= ' ') ++begin;

227 // ProcessFirstLine(begin, end, is_request, &headers, &error_code);

228 //

229 bool ParseHTTPFirstLine(const char* begin,

230 const char* end,

231 bool is_request,

232 size_t max_request_uri_length,

233 BalsaHeaders* headers,

234 BalsaFrameEnums::ErrorCode* error_code) {

235 const char* current = begin;

236 // HTTP firstlines all have the following structure:

237 // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF

238 // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"

239 // ws1 nws1 ws2 nws2 ws3 nws3 ws4

240 // \| [-------) [-------) [----------------)

241 // REQ: method request_uri version

242 // RESP: version statuscode reason

243 //

244 // The first NONWS->LWS component we'll call firstline_a.

245 // The second firstline_b, and the third firstline_c.

246 //

247 // firstline_a goes from nws1 to (but not including) ws2

248 // firstline_b goes from nws2 to (but not including) ws3

249 // firstline_c goes from nws3 to (but not including) ws4

250 //

251 // In the code:

252 // ws1 == whitespace_1_idx_

253 // nws1 == non_whitespace_1_idx_

254 // ws2 == whitespace_2_idx_

255 // nws2 == non_whitespace_2_idx_

256 // ws3 == whitespace_3_idx_

257 // nws3 == non_whitespace_3_idx_

258 // ws4 == whitespace_4_idx_

259

260 // Kill all whitespace (including '\r\n') at the end of the line.

261 --end;

262 if (*end != '\n') {

263 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

264 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

265 << headers->OriginalHeadersForDebugging();

266 return false;

267 }

268 while (begin < end && *end <= ' ') {

269 --end;

270 }

271 DCHECK(*end != '\n');

272 if (*end == '\n') {

273 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

274 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

275 << headers->OriginalHeadersForDebugging();

276 return false;

277 }

278 ++end;

279

280 // The two following statements should not be possible.

281 if (end == begin) {

282 *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;

283 LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"

284 << headers->OriginalHeadersForDebugging();

285 return false;

286 }

287

288 // whitespace_1_idx_

289 headers->whitespace_1_idx_ = current - begin;

290 // This loop is commented out as it is never used in current code. This is

291 // true only because we don't begin parsing the headers at all until we've

292 // encountered a non whitespace character at the beginning of the stream, at

293 // which point we begin our demarcation of header-start. If we did -not- do

294 // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop

295 // would be necessary for the proper functioning of this parsing.

296 // This is left here as this function may (in the future) be refactored out

297 // of the BalsaFrame class so that it may be shared between code in

298 // BalsaFrame and BalsaHeaders (where it would be used in some variant of the

299 // set_first_line() function (at which point it would be necessary).

300 #if 0

301 while (*current <= ' ') {

302 ++current;

303 }

304 #endif

305 // non_whitespace_1_idx_

306 headers->non_whitespace_1_idx_ = current - begin;

307 do {

308 // The first time through, we're guaranteed that the current character

309 // won't be a whitespace (else the loop above wouldn't have terminated).

310 // That implies that we're guaranteed to get at least one non-whitespace

311 // character if we get into this loop at all.

312 ++current;

313 if (current == end) {

314 headers->whitespace_2_idx_ = current - begin;

315 headers->non_whitespace_2_idx_ = current - begin;

316 headers->whitespace_3_idx_ = current - begin;

317 headers->non_whitespace_3_idx_ = current - begin;

318 headers->whitespace_4_idx_ = current - begin;

319 // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request

320 // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response

321 *error_code =

322 static_cast<BalsaFrameEnums::ErrorCode>(

323 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +

324 is_request);

325 if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION

326 return false;

327 }

328 goto output_exhausted;

329 }

330 } while (*current > ' ');

331 // whitespace_2_idx_

332 headers->whitespace_2_idx_ = current - begin;

333 do {

334 ++current;

335 // Note that due to the loop which consumes all of the whitespace

336 // at the end of the line, current can never == end while in this function.

337 } while (*current <= ' ');

338 // non_whitespace_2_idx_

339 headers->non_whitespace_2_idx_ = current - begin;

340 do {

341 ++current;

342 if (current == end) {

343 headers->whitespace_3_idx_ = current - begin;

344 headers->non_whitespace_3_idx_ = current - begin;

345 headers->whitespace_4_idx_ = current - begin;

346 // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request

347 // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response

348 *error_code =

349 static_cast<BalsaFrameEnums::ErrorCode>(

350 BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE

351 + is_request);

352 goto output_exhausted;

353 }

354 } while (*current > ' ');

355 // whitespace_3_idx_

356 headers->whitespace_3_idx_ = current - begin;

357 do {

358 ++current;

359 // Note that due to the loop which consumes all of the whitespace

360 // at the end of the line, current can never == end while in this function.

361 } while (*current <= ' ');

362 // non_whitespace_3_idx_

363 headers->non_whitespace_3_idx_ = current - begin;

364 headers->whitespace_4_idx_ = end - begin;

365

366 output_exhausted:

367 // Note that we don't fail the parse immediately when parsing of the

368 // firstline fails. Depending on the protocol type, we may want to accept

369 // a firstline with only one or two elements, e.g., for HTTP/0.9:

370 // GET\r\n

371 // or

372 // GET /\r\n

373 // should be parsed without issue (though the visitor should know that

374 // parsing the entire line was not exactly as it should be).

375 //

376 // Eventually, these errors may be removed alltogether, as the visitor can

377 // detect them on its own by examining the size of the various fields.

378 // headers->set_first_line(non_whitespace_1_idx_, current);

379

380 if (is_request) {

381 if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >

382 max_request_uri_length) {

383 // For requests, we need at least the method. We could assume that a

384 // blank URI means "/". If version isn't stated, it should be assumed

385 // to be HTTP/0.9 by the visitor.

386 *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;

387 return false;

388 }

389 } else {

390 headers->parsed_response_code_ = 0;

391 {

392 const char* parsed_response_code_current =

393 begin + headers->non_whitespace_2_idx_;

394 const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;

395 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;

396

397 // Convert a string of [0-9]* into an int.

398 // Note that this allows for the conversion of response codes which

399 // are outside the bounds of normal HTTP response codes (no checking

400 // is done to ensure that these are valid-- they're merely parsed)!

401 while (parsed_response_code_current < parsed_response_code_end) {

402 if (*parsed_response_code_current < '0' \|\|

403 *parsed_response_code_current > '9') {

404 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;

405 return false;

406 }

407 size_t status_code_x_10 = headers->parsed_response_code_ * 10;

408 uint8 c = *parsed_response_code_current - '0';

409 if ((headers->parsed_response_code_ > kMaxDiv10) \|\|

410 (std::numeric_limits<size_t>::max() - status_code_x_10) < c) {

411 // overflow.

412 *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;

413 return false;

414 }

415 headers->parsed_response_code_ = status_code_x_10 + c;

416 ++parsed_response_code_current;

417 }

418 }

419 }

420 return true;

421 }

422

423 // begin - beginning of the firstline

424 // end - end of the firstline

425 //

426 // A precondition for this function is that there is non-whitespace between

427 // [begin, end). If this precondition is not met, the function will not perform

428 // as expected (and bad things may happen, and it will eat your first, second,

429 // and third unborn children!).

430 //

431 // Another precondition for this function is that [begin, end) includes

432 // at most one newline, which must be at the end of the line.

433 void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {

434 BalsaFrameEnums::ErrorCode previous_error = last_error_;

435 if (!ParseHTTPFirstLine(begin,

436 end,

437 is_request_,

438 max_request_uri_length_,

439 headers_,

440 &last_error_)) {

441 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

442 visitor_->HandleHeaderError(this);

443 return;

444 }

445 if (previous_error != last_error_) {

446 visitor_->HandleHeaderWarning(this);

447 }

448

449 if (is_request_) {

450 size_t version_length =

451 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;

452 visitor_->ProcessRequestFirstLine(

453 begin + headers_->non_whitespace_1_idx_,

454 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,

455 begin + headers_->non_whitespace_1_idx_,

456 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,

457 begin + headers_->non_whitespace_2_idx_,

458 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,

459 begin + headers_->non_whitespace_3_idx_,

460 version_length);

461 if (version_length == 0)

462 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

463 } else {

464 visitor_->ProcessResponseFirstLine(

465 begin + headers_->non_whitespace_1_idx_,

466 headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,

467 begin + headers_->non_whitespace_1_idx_,

468 headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,

469 begin + headers_->non_whitespace_2_idx_,

470 headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,

471 begin + headers_->non_whitespace_3_idx_,

472 headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);

473 }

474 }

475

476 // 'stream_begin' points to the first character of the headers buffer.

477 // 'line_begin' points to the first character of the line.

478 // 'current' points to a char which is ':'.

479 // 'line_end' points to the position of '\n' + 1.

480 // 'line_begin' points to the position of first character of line.

481 void BalsaFrame::CleanUpKeyValueWhitespace(

482 const char* stream_begin,

483 const char* line_begin,

484 const char* current,

485 const char* line_end,

486 HeaderLineDescription* current_header_line) {

487 const char* colon_loc = current;

488 DCHECK_LT(colon_loc, line_end);

489 DCHECK_EQ(':', *colon_loc);

490 DCHECK_EQ(':', *current);

491 DCHECK_GE(' ', *line_end)

492 << "\"" << std::string(line_begin, line_end) << "\"";

493

494 // TODO(fenix): Investigate whether or not the bounds tests in the

495 // while loops here are redundant, and if so, remove them.

496 --current;

497 while (current > line_begin && *current <= ' ') --current;

498 current += (current != colon_loc);

499 current_header_line->key_end_idx = current - stream_begin;

500

501 current = colon_loc;

502 DCHECK_EQ(':', *current);

503 ++current;

504 while (current < line_end && *current <= ' ') ++current;

505 current_header_line->value_begin_idx = current - stream_begin;

506

507 DCHECK_GE(current_header_line->key_end_idx,

508 current_header_line->first_char_idx);

509 DCHECK_GE(current_header_line->value_begin_idx,

510 current_header_line->key_end_idx);

511 DCHECK_GE(current_header_line->last_char_idx,

512 current_header_line->value_begin_idx);

513 }

514

515 inline void BalsaFrame::FindColonsAndParseIntoKeyValue() {

516 DCHECK(!lines_.empty());

517 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

518 // The last line is always just a newline (and is uninteresting).

519 const Lines::size_type lines_size_m1 = lines_.size() - 1;

520 #if __SSE2__

521 const __m128i colons = _mm_set1_epi8(':');

522 const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;

523 #endif // __SSE2__

524 const char* current = stream_begin + lines_[1].first;

525 // This code is a bit more subtle than it may appear at first glance.

526 // This code looks for a colon in the current line... but it also looks

527 // beyond the current line. If there is no colon in the current line, then

528 // for each subsequent line (until the colon which -has- been found is

529 // associated with a line), no searching for a colon will be performed. In

530 // this way, we minimize the amount of bytes we have scanned for a colon.

531 for (Lines::size_type i = 1; i < lines_size_m1;) {

532 const char* line_begin = stream_begin + lines_[i].first;

533

534 // Here we handle possible continuations. Note that we do not replace

535 // the '\n' in the line before a continuation (at least, as of now),

536 // which implies that any code which looks for a value must deal with

537 // "\r\n", etc -within- the line (and not just at the end of it).

538 for (++i; i < lines_size_m1; ++i) {

539 const char c = *(stream_begin + lines_[i].first);

540 if (c > ' ') {

541 // Not a continuation, so stop. Note that if the 'original' i = 1,

542 // and the next line is not a continuation, we'll end up with i = 2

543 // when we break. This handles the incrementing of i for the outer

544 // loop.

545 break;

546 }

547 }

548 const char* line_end = stream_begin + lines_[i - 1].second;

549 DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);

550

551 // We cleanup the whitespace at the end of the line before doing anything

552 // else of interest as it allows us to do nothing when irregularly formatted

553 // headers are parsed (e.g. those with only keys, only values, or no colon).

554 //

555 // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.

556 --line_end;

557 DCHECK_EQ('\n', *line_end)

558 << "\"" << std::string(line_begin, line_end) << "\"";

559 while (*line_end <= ' ' && line_end > line_begin) {

560 --line_end;

561 }

562 ++line_end;

563 DCHECK_GE(' ', *line_end);

564 DCHECK_LT(line_begin, line_end);

565

566 // We use '0' for the block idx, because we're always writing to the first

567 // block from the framer (we do this because the framer requires that the

568 // entire header sequence be in a contiguous buffer).

569 headers_->header_lines_.push_back(

570 HeaderLineDescription(line_begin - stream_begin,

571 line_end - stream_begin,

572 line_end - stream_begin,

573 line_end - stream_begin,

574 0));

575 if (current >= line_end) {

576 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;

577 visitor_->HandleHeaderWarning(this);

578 // Then the next colon will not be found within this header line-- time

579 // to try again with another header-line.

580 continue;

581 } else if (current < line_begin) {

582 // When this condition is true, the last detected colon was part of a

583 // previous line. We reset to the beginning of the line as we don't care

584 // about the presence of any colon before the beginning of the current

585 // line.

586 current = line_begin;

587 }

588 #if __SSE2__

589 while (current < header_lines_end_m16) {

590 __m128i header_bytes =

591 _mm_loadu_si128(reinterpret_cast<const __m128i *>(current));

592 __m128i colon_cmp = _mm_cmpeq_epi8(header_bytes, colons);

593 int colon_msk = _mm_movemask_epi8(colon_cmp);

594 if (colon_msk == 0) {

595 current += 16;

596 continue;

597 }

598 current += (ffs(colon_msk) - 1);

599 if (current > line_end) {

600 break;

601 }

602 goto found_colon;

603 }

604 #endif // __SSE2__

605 for (; current < line_end; ++current) {

606 if (*current != ':') {

607 continue;

608 }

609 goto found_colon;

610 }

611 // If we've gotten to here, then there was no colon

612 // in the line. The arguments we passed into the construction

613 // for the HeaderLineDescription object should be OK-- it assumes

614 // that the entire content is 'key' by default (which is true, as

615 // there was no colon, there can be no value). Note that this is a

616 // construct which is technically not allowed by the spec.

617 last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;

618 visitor_->HandleHeaderWarning(this);

619 continue;

620 found_colon:

621 DCHECK_EQ(*current, ':');

622 DCHECK_LE(current - stream_begin, line_end - stream_begin);

623 DCHECK_LE(stream_begin - stream_begin, current - stream_begin);

624

625 HeaderLineDescription& current_header_line = headers_->header_lines_.back();

626 current_header_line.key_end_idx = current - stream_begin;

627 current_header_line.value_begin_idx = current_header_line.key_end_idx;

628 if (current < line_end) {

629 ++current_header_line.key_end_idx;

630

631 CleanUpKeyValueWhitespace(stream_begin,

632 line_begin,

633 current,

634 line_end,

635 &current_header_line);

636 }

637 }

638 }

639

640 void BalsaFrame::ProcessContentLengthLine(

641 HeaderLines::size_type line_idx,

642 BalsaHeadersEnums::ContentLengthStatus* status,

643 size_t* length) {

644 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];

645 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

646 const char* line_end = stream_begin + header_line.last_char_idx;

647 const char* value_begin = (stream_begin + header_line.value_begin_idx);

648

649 if (value_begin >= line_end) {

650 // There is no non-whitespace value data.

651 #if DEBUGFRAMER

652 LOG(INFO) << "invalid content-length -- no non-whitespace value data";

653 #endif

654 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;

655 return;

656 }

657

658 *length = 0;

659 while (value_begin < line_end) {

660 if (value_begin < '0' \|\| value_begin > '9') {

661 // bad! content-length found, and couldn't parse all of it!

662 *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;

663 #if DEBUGFRAMER

664 LOG(INFO) << "invalid content-length - non numeric character detected";

665 #endif // DEBUGFRAMER

666 return;

667 }

668 const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;

669 size_t length_x_10 = length 10;

670 const unsigned char c = *value_begin - '0';

671 if (*length > kMaxDiv10 \|\|

672 (std::numeric_limits<size_t>::max() - length_x_10) < c) {

673 *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;

674 #if DEBUGFRAMER

675 LOG(INFO) << "content-length overflow";

676 #endif // DEBUGFRAMER

677 return;

678 }

679 *length = length_x_10 + c;

680 ++value_begin;

681 }

682 #if DEBUGFRAMER

683 LOG(INFO) << "content_length parsed: " << *length;

684 #endif // DEBUGFRAMER

685 *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;

686 }

687

688 void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {

689 const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];

690 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

691 const char* line_end = stream_begin + header_line.last_char_idx;

692 const char* value_begin = stream_begin + header_line.value_begin_idx;

693 size_t value_length = line_end - value_begin;

694

695 if ((value_length == 7) &&

696 !strncasecmp(value_begin, "chunked", 7)) {

697 headers_->transfer_encoding_is_chunked_ = true;

698 } else if ((value_length == 8) &&

699 !strncasecmp(value_begin, "identity", 8)) {

700 headers_->transfer_encoding_is_chunked_ = false;

701 } else {

702 last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;

703 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

704 visitor_->HandleHeaderError(this);

705 return;

706 }

707 }

708

709 namespace {

710 bool SplitStringPiece(base::StringPiece original, char delim,

711 base::StringPiece* before, base::StringPiece* after) {

712 const char* p = original.data();

713 const char* end = p + original.size();

714

715 while (p != end) {

716 if (*p == delim) {

717 ++p;

718 } else {

719 const char* start = p;

720 while (++p != end && *p != delim) {

721 // Skip to the next occurence of the delimiter.

722 }

723 *before = base::StringPiece(start, p - start);

724 if (p != end)

725 *after = base::StringPiece(p + 1, end - (p + 1));

726 else

727 *after = base::StringPiece("");

728 StringPieceUtils::RemoveWhitespaceContext(before);

729 StringPieceUtils::RemoveWhitespaceContext(after);

730 return true;

731 }

732 }

733

734 *before = original;

735 *after = "";

736 return false;

737 }

738

739 // TODO(phython): Fix this function to properly deal with quoted values.

740 // E.g. ";;foo", "\";;\"", or \"aa;

741 // The last example, the semi-colon is a separator between extensions.

742 void ProcessChunkExtensionsManual(base::StringPiece all_extensions,

743 BalsaHeaders* extensions) {

744 base::StringPiece extension;

745 base::StringPiece remaining;

746 StringPieceUtils::RemoveWhitespaceContext(&all_extensions);

747 SplitStringPiece(all_extensions, ';', &extension, &remaining);

748 while (!extension.empty()) {

749 base::StringPiece key;

750 base::StringPiece value;

751 SplitStringPiece(extension, '=', &key, &value);

752 if (!value.empty()) {

753 // Strip quotation marks if they exist.

754 if (!value.empty() && value[0] == '"')

755 value.remove_prefix(1);

756 if (!value.empty() && value[value.length() - 1] == '"')

757 value.remove_suffix(1);

758 }

759

760 extensions->AppendHeader(key, value);

761

762 StringPieceUtils::RemoveWhitespaceContext(&remaining);

763 SplitStringPiece(remaining, ';', &extension, &remaining);

764 }

765 }

766

767 } // anonymous namespace

768

769 void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,

770 BalsaHeaders* extensions) {

771 ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);

772 }

773

774 void BalsaFrame::ProcessHeaderLines() {

775 HeaderLines::size_type content_length_idx = 0;

776 HeaderLines::size_type transfer_encoding_idx = 0;

777

778 DCHECK(!lines_.empty());

779 #if DEBUGFRAMER

780 LOG(INFO) << "****@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@********\n";

781 #endif // DEBUGFRAMER

782

783 // There is no need to attempt to process headers if no header lines exist.

784 // There are at least two lines in the message which are not header lines.

785 // These two non-header lines are the first line of the message, and the

786 // last line of the message (which is an empty line).

787 // Thus, we test to see if we have more than two lines total before attempting

788 // to parse any header lines.

789 if (lines_.size() > 2) {

790 const char* stream_begin = headers_->OriginalHeaderStreamBegin();

791

792 // Then, for the rest of the header data, we parse these into key-value

793 // pairs.

794 FindColonsAndParseIntoKeyValue();

795 // At this point, we've parsed all of the headers. Time to look for those

796 // headers which we require for framing.

797 const HeaderLines::size_type

798 header_lines_size = headers_->header_lines_.size();

799 for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {

800 const HeaderLineDescription& current_header_line =

801 headers_->header_lines_[i];

802 const char* key_begin =

803 (stream_begin + current_header_line.first_char_idx);

804 const char* key_end = (stream_begin + current_header_line.key_end_idx);

805 const size_t key_len = key_end - key_begin;

806 const char c = *key_begin;

807 #if DEBUGFRAMER

808 LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)

809 << " c: '" << c << "' key_len: " << key_len;

810 #endif // DEBUGFRAMER

811 // If a header begins with either lowercase or uppercase 'c' or 't', then

812 // the header may be one of content-length, connection, content-encoding

813 // or transfer-encoding. These headers are special, as they change the way

814 // that the message is framed, and so the framer is required to search

815 // for them.

816

817

818 if (c == 'c' \|\| c == 'C') {

819 if ((key_len == kContentLengthSize) &&

820 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {

821 BalsaHeadersEnums::ContentLengthStatus content_length_status =

822 BalsaHeadersEnums::NO_CONTENT_LENGTH;

823 size_t length = 0;

824 ProcessContentLengthLine(i, &content_length_status, &length);

825 if (content_length_idx != 0) { // then we've already seen one!

826 if ((headers_->content_length_status_ != content_length_status) \|\|

827 ((headers_->content_length_status_ ==

828 BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&

829 length != headers_->content_length_)) {

830 last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;

831 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

832 visitor_->HandleHeaderError(this);

833 return;

834 }

835 continue;

836 } else {

837 content_length_idx = i + 1;

838 headers_->content_length_status_ = content_length_status;

839 headers_->content_length_ = length;

840 content_length_remaining_ = length;

841 }

842

843 }

844 } else if (c == 't' \|\| c == 'T') {

845 if ((key_len == kTransferEncodingSize) &&

846 0 == strncasecmp(key_begin, kTransferEncoding,

847 kTransferEncodingSize)) {

848 if (transfer_encoding_idx != 0) {

849 last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;

850 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

851 visitor_->HandleHeaderError(this);

852 return;

853 }

854 transfer_encoding_idx = i + 1;

855 }

856 } else if (i == 0 && (key_len == 0 \|\| c == ' ')) {

857 last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;

858 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

859 visitor_->HandleHeaderError(this);

860 return;

861 }

862 }

863 if (headers_->transfer_encoding_is_chunked_) {

864 headers_->content_length_ = 0;

865 headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;

866 content_length_remaining_ = 0;

867 }

868 if (transfer_encoding_idx != 0) {

869 ProcessTransferEncodingLine(transfer_encoding_idx - 1);

870 }

871 }

872 }

873

874 void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {

875 // For responses, can't have a body if the request was a HEAD, or if it is

876 // one of these response-codes. rfc2616 section 4.3

877 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

878 if (is_request_ \|\|

879 !(request_was_head_ \|\|

880 (headers_->parsed_response_code_ >= 100 &&

881 headers_->parsed_response_code_ < 200) \|\|

882 (headers_->parsed_response_code_ == 204) \|\|

883 (headers_->parsed_response_code_ == 304))) {

884 // Then we can have a body.

885 if (headers_->transfer_encoding_is_chunked_) {

886 // Note that

887 // if ( Transfer-Encoding: chunked && Content-length: )

888 // then Transfer-Encoding: chunked trumps.

889 // This is as specified in the spec.

890 // rfc2616 section 4.4.3

891 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;

892 } else {

893 // Errors parsing content-length definitely can cause

894 // protocol errors/warnings

895 switch (headers_->content_length_status_) {

896 // If we have a content-length, and it is parsed

897 // properly, there are two options.

898 // 1) zero content, in which case the message is done, and

899 // 2) nonzero content, in which case we have to

900 // consume the body.

901 case BalsaHeadersEnums::VALID_CONTENT_LENGTH:

902 if (headers_->content_length_ == 0) {

903 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

904 } else {

905 parse_state_ = BalsaFrameEnums::READING_CONTENT;

906 }

907 break;

908 case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:

909 case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:

910 // If there were characters left-over after parsing the

911 // content length, we should flag an error and stop.

912 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

913 last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;

914 visitor_->HandleHeaderError(this);

915 break;

916 // We can have: no transfer-encoding, no content length, and no

917 // connection: close...

918 // Unfortunately, this case doesn't seem to be covered in the spec.

919 // We'll assume that the safest thing to do here is what the google

920 // binaries before 2008 already do, which is to assume that

921 // everything until the connection is closed is body.

922 case BalsaHeadersEnums::NO_CONTENT_LENGTH:

923 if (is_request_) {

924 base::StringPiece method = headers_->request_method();

925 // POSTs and PUTs should have a detectable body length. If they

926 // do not we consider it an error.

927 if ((method.size() == 4 &&

928 strncmp(method.data(), "POST", 4) == 0) \|\|

929 (method.size() == 3 &&

930 strncmp(method.data(), "PUT", 3) == 0)) {

931 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

932 last_error_ =

933 BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;

934 visitor_->HandleHeaderError(this);

935 break;

936 }

937 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

938 } else {

939 parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;

940 last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;

941 visitor_->HandleHeaderWarning(this);

942 }

943 break;

944 // The COV_NF_... statements here provide hints to the apparatus

945 // which computes coverage reports/ratios that this code is never

946 // intended to be executed, and should technically be impossible.

947 // COV_NF_START

948 default:

949 LOG(FATAL) << "Saw a content_length_status: "

950 << headers_->content_length_status_ << " which is unknown.";

951 // COV_NF_END

952 }

953 }

954 }

955 }

956

957 size_t BalsaFrame::ProcessHeaders(const char* message_start,

958 size_t message_length) {

959 const char* const original_message_start = message_start;

960 const char* const message_end = message_start + message_length;

961 const char* message_current = message_start;

962 const char* checkpoint = message_start;

963

964 if (message_length == 0) {

965 goto bottom;

966 }

967

968 while (message_current < message_end) {

969 size_t base_idx = headers_->GetReadableBytesFromHeaderStream();

970

971 // Yes, we could use strchr (assuming null termination), or

972 // memchr, but as it turns out that is slower than this tight loop

973 // for the input that we see.

974 if (!saw_non_newline_char_) {

975 do {

976 const char c = *message_current;

977 if (c != '\r' && c != '\n') {

978 if (c <= ' ') {

979 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

980 last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;

981 visitor_->HandleHeaderError(this);

982 goto bottom;

983 } else {

984 saw_non_newline_char_ = true;

985 checkpoint = message_start = message_current;

986 goto read_real_message;

987 }

988 }

989 ++message_current;

990 } while (message_current < message_end);

991 goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks

992 } else {

993 read_real_message:

994 // Note that SSE2 can be enabled on certain piii platforms.

995 #if __SSE2__

996 {

997 const char* const message_end_m16 = message_end - 16;

998 __m128i newlines = _mm_set1_epi8('\n');

999 while (message_current < message_end_m16) {

1000 // What this does (using compiler intrinsics):

1001 //

1002 // Load 16 '\n's into an xmm register

1003 // Load 16 bytes of currennt message into an xmm register

1004 // Do byte-wise equals on those two xmm registers

1005 // Take the first bit of each byte, and put that into the first

1006 // 16 bits of a mask

1007 // If the mask is zero, no '\n' found. increment by 16 and try again

1008 // Else scan forward to find the first set bit.

1009 // Increment current by the index of the first set bit

1010 // (ffs returns index of first set bit + 1)

1011 __m128i msg_bytes =

1012 _mm_loadu_si128(const_cast<__m128i *>(

1013 reinterpret_cast<const __m128i *>(message_current)));

1014 __m128i newline_cmp = _mm_cmpeq_epi8(msg_bytes, newlines);

1015 int newline_msk = _mm_movemask_epi8(newline_cmp);

1016 if (newline_msk == 0) {

1017 message_current += 16;

1018 continue;

1019 }

1020 message_current += (ffs(newline_msk) - 1);

1021 const size_t relative_idx = message_current - message_start;

1022 const size_t message_current_idx = 1 + base_idx + relative_idx;

1023 lines_.push_back(std::make_pair(last_slash_n_idx_,

1024 message_current_idx));

1025 if (lines_.size() == 1) {

1026 headers_->WriteFromFramer(checkpoint,

1027 1 + message_current - checkpoint);

1028 checkpoint = message_current + 1;

1029 const char* begin = headers_->OriginalHeaderStreamBegin();

1030 #if DEBUGFRAMER

1031 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);

1032 LOG(INFO) << "is_request_: " << is_request_;

1033 #endif

1034 ProcessFirstLine(begin, begin + lines_[0].second);

1035 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)

1036 goto process_lines;

1037 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)

1038 goto bottom;

1039 }

1040 const size_t chars_since_last_slash_n = (message_current_idx -

1041 last_slash_n_idx_);

1042 last_slash_n_idx_ = message_current_idx;

1043 if (chars_since_last_slash_n > 2) {

1044 // We have a slash-n, but the last slash n was

1045 // more than 2 characters away from this. Thus, we know

1046 // that this cannot be an end-of-header.

1047 ++message_current;

1048 continue;

1049 }

1050 if ((chars_since_last_slash_n == 1) \|\|

1051 (((message_current > message_start) &&

1052 (*(message_current - 1) == '\r')) \|\|

1053 (last_char_was_slash_r_))) {

1054 goto process_lines;

1055 }

1056 ++message_current;

1057 }

1058 }

1059 #endif // __SSE2__

1060 while (message_current < message_end) {

1061 if (*message_current != '\n') {

1062 ++message_current;

1063 continue;

1064 }

1065 const size_t relative_idx = message_current - message_start;

1066 const size_t message_current_idx = 1 + base_idx + relative_idx;

1067 lines_.push_back(std::make_pair(last_slash_n_idx_,

1068 message_current_idx));

1069 if (lines_.size() == 1) {

1070 headers_->WriteFromFramer(checkpoint,

1071 1 + message_current - checkpoint);

1072 checkpoint = message_current + 1;

1073 const char* begin = headers_->OriginalHeaderStreamBegin();

1074 #if DEBUGFRAMER

1075 LOG(INFO) << "First line " << std::string(begin, lines_[0].second);

1076 LOG(INFO) << "is_request_: " << is_request_;

1077 #endif

1078 ProcessFirstLine(begin, begin + lines_[0].second);

1079 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)

1080 goto process_lines;

1081 else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)

1082 goto bottom;

1083 }

1084 const size_t chars_since_last_slash_n = (message_current_idx -

1085 last_slash_n_idx_);

1086 last_slash_n_idx_ = message_current_idx;

1087 if (chars_since_last_slash_n > 2) {

1088 // false positive.

1089 ++message_current;

1090 continue;

1091 }

1092 if ((chars_since_last_slash_n == 1) \|\|

1093 (((message_current > message_start) &&

1094 (*(message_current - 1) == '\r')) \|\|

1095 (last_char_was_slash_r_))) {

1096 goto process_lines;

1097 }

1098 ++message_current;

1099 }

1100 }

1101 continue;

1102 process_lines:

1103 ++message_current;

1104 DCHECK(message_current >= message_start);

1105 if (message_current > message_start) {

1106 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);

1107 }

1108

1109 // Check if we have exceeded maximum headers length

1110 // Although we check for this limit before and after we call this function

1111 // we check it here as well to make sure that in case the visitor changed

1112 // the max_header_length_ (for example after processing the first line)

1113 // we handle it gracefully.

1114 if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {

1115 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1116 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1117 visitor_->HandleHeaderError(this);

1118 goto bottom;

1119 }

1120

1121 // Since we know that we won't be writing any more bytes of the header,

1122 // we tell that to the headers object. The headers object may make

1123 // more efficient allocation decisions when this is signaled.

1124 headers_->DoneWritingFromFramer();

1125 {

1126 const char* readable_ptr = NULL;

1127 size_t readable_size = 0;

1128 headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);

1129 visitor_->ProcessHeaderInput(readable_ptr, readable_size);

1130 }

1131

1132 // Ok, now that we've written everything into our header buffer, it is

1133 // time to process the header lines (extract proper values for headers

1134 // which are important for framing).

1135 ProcessHeaderLines();

1136 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1137 goto bottom;

1138 }

1139 AssignParseStateAfterHeadersHaveBeenParsed();

1140 if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1141 goto bottom;

1142 }

1143 visitor_->ProcessHeaders(*headers_);

1144 visitor_->HeaderDone();

1145 if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {

1146 visitor_->MessageDone();

1147 }

1148 goto bottom;

1149 }

1150 // If we've gotten to here, it means that we've consumed all of the

1151 // available input. We need to record whether or not the last character we

1152 // saw was a '\r' so that a subsequent call to ProcessInput correctly finds

1153 // a header framing that is split across the two calls.

1154 last_char_was_slash_r_ = (*(message_end - 1) == '\r');

1155 DCHECK(message_current >= message_start);

1156 if (message_current > message_start) {

1157 headers_->WriteFromFramer(checkpoint, message_current - checkpoint);

1158 }

1159 bottom:

1160 return message_current - original_message_start;

1161 }

1162

1163

1164 size_t BalsaFrame::BytesSafeToSplice() const {

1165 switch (parse_state_) {

1166 case BalsaFrameEnums::READING_CHUNK_DATA:

1167 return chunk_length_remaining_;

1168 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1169 return std::numeric_limits<size_t>::max();

1170 case BalsaFrameEnums::READING_CONTENT:

1171 return content_length_remaining_;

1172 default:

1173 return 0;

1174 }

1175 }

1176

1177 void BalsaFrame::BytesSpliced(size_t bytes_spliced) {

1178 switch (parse_state_) {

1179 case BalsaFrameEnums::READING_CHUNK_DATA:

1180 if (chunk_length_remaining_ >= bytes_spliced) {

1181 chunk_length_remaining_ -= bytes_spliced;

1182 if (chunk_length_remaining_ == 0) {

1183 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;

1184 }

1185 return;

1186 } else {

1187 last_error_ =

1188 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;

1189 goto error_exit;

1190 }

1191

1192 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1193 return;

1194

1195 case BalsaFrameEnums::READING_CONTENT:

1196 if (content_length_remaining_ >= bytes_spliced) {

1197 content_length_remaining_ -= bytes_spliced;

1198 if (content_length_remaining_ == 0) {

1199 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1200 visitor_->MessageDone();

1201 }

1202 return;

1203 } else {

1204 last_error_ =

1205 BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;

1206 goto error_exit;

1207 }

1208

1209 default:

1210 last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;

1211 goto error_exit;

1212 }

1213

1214 error_exit:

1215 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1216 visitor_->HandleBodyError(this);

1217 };

1218

1219 // You may note that the state-machine contained within this function has both

1220 // switch and goto labels for nearly the same thing. For instance, the

1221 // following two labels refer to the same code block:

1222 // label_reading_chunk_data:

1223 // case BalsaFrameEnums::READING_CHUNK_DATA:

1224 // The 'case' statement is required for the switch statement which occurs when

1225 // ProcessInput is invoked. The goto label is required as the state-machine

1226 // does not use a computed goto in any subsequent operations.

1227 //

1228 // Since several states exit the state machine for various reasons, there is

1229 // also one label at the bottom of the function. When it is appropriate to

1230 // return from the function, that part of the state machine instead issues a

1231 // goto bottom; This results in less code duplication, and makes debugging

1232 // easier (as you can add a statement to a section of code which is guaranteed

1233 // to be invoked when the function is exiting.

1234 size_t BalsaFrame::ProcessInput(const char* input, size_t size) {

1235 const char* current = input;

1236 const char* on_entry = current;

1237 const char* end = current + size;

1238 #if DEBUGFRAMER

1239 LOG(INFO) << "\n=============="

1240 << BalsaFrameEnums::ParseStateToString(parse_state_)

1241 << "===============\n";

1242 #endif // DEBUGFRAMER

1243

1244 DCHECK(headers_ != NULL);

1245 if (headers_ == NULL) return 0;

1246

1247 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {

1248 const size_t header_length = headers_->GetReadableBytesFromHeaderStream();

1249 // Yes, we still have to check this here as the user can change the

1250 // max_header_length amount!

1251 // Also it is possible that we have reached the maximum allowed header size,

1252 // and we have more to consume (remember we are still inside

1253 // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.

1254 if (header_length > max_header_length_ \|\|

1255 (header_length == max_header_length_ && size > 0)) {

1256 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1257 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1258 visitor_->HandleHeaderError(this);

1259 goto bottom;

1260 }

1261 size_t bytes_to_process = max_header_length_ - header_length;

1262 if (bytes_to_process > size) {

1263 bytes_to_process = size;

1264 }

1265 current += ProcessHeaders(input, bytes_to_process);

1266 // If we are still reading headers check if we have crossed the headers

1267 // limit. Note that we check for >= as opposed to >. This is because if

1268 // header_length_after equals max_header_length_ and we are still in the

1269 // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for

1270 // sure that the headers limit will be crossed later on

1271 if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {

1272 // Note that headers_ is valid only if we are still reading headers.

1273 const size_t header_length_after =

1274 headers_->GetReadableBytesFromHeaderStream();

1275 if (header_length_after >= max_header_length_) {

1276 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1277 last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;

1278 visitor_->HandleHeaderError(this);

1279 }

1280 }

1281 goto bottom;

1282 } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ \|\|

1283 parse_state_ == BalsaFrameEnums::PARSE_ERROR) {

1284 // Can do nothing more 'till we're reset.

1285 goto bottom;

1286 }

1287

1288 while (current < end) {

1289 switch (parse_state_) {

1290 label_reading_chunk_length:

1291 case BalsaFrameEnums::READING_CHUNK_LENGTH:

1292 // In this state we read the chunk length.

1293 // Note that once we hit a character which is not in:

1294 // [0-9;A-Fa-f\n], we transition to a different state.

1295 //

1296 {

1297 // If we used strtol, etc, we'd have to buffer this line.

1298 // This is more annoying than simply doing the conversion

1299 // here. This code accounts for overflow.

1300 static const signed char buf[] = {

1301 // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f

1302 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,

1303 // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f

1304 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1305 // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f

1306 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1307 // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f

1308 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1,

1309 // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f

1310 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1311 // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f

1312 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1313 // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f

1314 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1315 // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f

1316 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

1317 };

1318 // valid cases:

1319 // "09123\n" // -> 09123

1320 // "09123\r\n" // -> 09123

1321 // "09123 \n" // -> 09123

1322 // "09123 \r\n" // -> 09123

1323 // "09123 12312\n" // -> 09123

1324 // "09123 12312\r\n" // -> 09123

1325 // "09123; foo=bar\n" // -> 09123

1326 // "09123; foo=bar\r\n" // -> 09123

1327 // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF

1328 // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF

1329 // invalid cases:

1330 // "[ \t]+[^\n]*\n"

1331 // "FFFFFFFFFFFFFFFFF\r\n" (would overflow)

1332 // "\r\n"

1333 // "\n"

1334 while (current < end) {

1335 const char c = *current;

1336 ++current;

1337 const signed char addition = buf[static_cast<int>(c)];

1338 if (addition >= 0) {

1339 chunk_length_character_extracted_ = true;

1340 size_t length_x_16 = chunk_length_remaining_ * 16;

1341 const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;

1342 if ((chunk_length_remaining_ > kMaxDiv16) \|\|

1343 ((std::numeric_limits<size_t>::max() - length_x_16) <

1344 static_cast<size_t>(addition))) {

1345 // overflow -- asked for a chunk-length greater than 2^64 - 1!!

1346 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1347 last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;

1348 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1349 visitor_->HandleChunkingError(this);

1350 goto bottom;

1351 }

1352 chunk_length_remaining_ = length_x_16 + addition;

1353 continue;

1354 }

1355

1356 if (!chunk_length_character_extracted_ \|\| addition == -1) {

1357 // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no

1358 // characters were converted, or an unexpected character was

1359 // seen.

1360 parse_state_ = BalsaFrameEnums::PARSE_ERROR;

1361 last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;

1362 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1363 visitor_->HandleChunkingError(this);

1364 goto bottom;

1365 }

1366

1367 --current;

1368 parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;

1369 visitor_->ProcessChunkLength(chunk_length_remaining_);

1370 goto label_reading_chunk_extension;

1371 }

1372 }

1373 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1374 goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH

1375

1376 label_reading_chunk_extension:

1377 case BalsaFrameEnums::READING_CHUNK_EXTENSION:

1378 {

1379 // TODO(phython): Convert this scanning to be 16 bytes at a time if

1380 // there is data to be read.

1381 const char* extensions_start = current;

1382 size_t extensions_length = 0;

1383 while (current < end) {

1384 const char c = *current;

1385 if (c == '\r' \|\| c == '\n') {

1386 extensions_length =

1387 (extensions_start == current) ?

1388 0 :

1389 current - extensions_start - 1;

1390 }

1391

1392 ++current;

1393 if (c == '\n') {

1394 chunk_length_character_extracted_ = false;

1395 visitor_->ProcessChunkExtensions(

1396 extensions_start, extensions_length);

1397 if (chunk_length_remaining_ != 0) {

1398 parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;

1399 goto label_reading_chunk_data;

1400 }

1401 HeaderFramingFound('\n');

1402 parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;

1403 goto label_reading_last_chunk_term;

1404 }

1405 }

1406 visitor_->ProcessChunkExtensions(

1407 extensions_start, extensions_length);

1408 }

1409

1410 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1411 goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION

1412

1413 label_reading_chunk_data:

1414 case BalsaFrameEnums::READING_CHUNK_DATA:

1415 while (current < end) {

1416 if (chunk_length_remaining_ == 0) {

1417 break;

1418 }

1419 // read in the chunk

1420 size_t bytes_remaining = end - current;

1421 size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?

1422 chunk_length_remaining_ : bytes_remaining;

1423 const char* tmp_current = current + consumed_bytes;

1424 visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);

1425 visitor_->ProcessBodyData(current, consumed_bytes);

1426 on_entry = current = tmp_current;

1427 chunk_length_remaining_ -= consumed_bytes;

1428 }

1429 if (chunk_length_remaining_ == 0) {

1430 parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;

1431 goto label_reading_chunk_term;

1432 }

1433 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1434 goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA

1435

1436 label_reading_chunk_term:

1437 case BalsaFrameEnums::READING_CHUNK_TERM:

1438 while (current < end) {

1439 const char c = *current;

1440 ++current;

1441

1442 if (c == '\n') {

1443 parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;

1444 goto label_reading_chunk_length;

1445 }

1446 }

1447 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1448 goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM

1449

1450 label_reading_last_chunk_term:

1451 case BalsaFrameEnums::READING_LAST_CHUNK_TERM:

1452 while (current < end) {

1453 const char c = *current;

1454

1455 if (!HeaderFramingFound(c)) {

1456 // If not, however, since the spec only suggests that the

1457 // client SHOULD indicate the presence of trailers, we get to

1458 // test that they did or didn't.

1459 // If all of the bytes we've seen since:

1460 // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF

1461 // are either '\r', or '\n', then we can assume that we don't yet

1462 // know if we need to parse headers, or if the next byte will make

1463 // the HeaderFramingFound condition (above) true.

1464 if (HeaderFramingMayBeFound()) {

1465 // If true, then we have seen only characters '\r' or '\n'.

1466 ++current;

1467

1468 // Lets try again! There is no state change here.

1469 continue;

1470 } else {

1471 // If (!HeaderFramingMayBeFound()), then we know that we must be

1472 // reading the first non CRLF character of a trailer.

1473 parse_state_ = BalsaFrameEnums::READING_TRAILER;

1474 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1475 on_entry = current;

1476 goto label_reading_trailer;

1477 }

1478 } else {

1479 // If we've found a "\r\n\r\n", then the message

1480 // is done.

1481 ++current;

1482 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1483 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1484 visitor_->MessageDone();

1485 goto bottom;

1486 }

1487 break; // from while loop

1488 }

1489 visitor_->ProcessBodyInput(on_entry, current - on_entry);

1490 goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM

1491

1492 label_reading_trailer:

1493 case BalsaFrameEnums::READING_TRAILER:

1494 while (current < end) {

1495 const char c = *current;

1496 ++current;

1497 // TODO(fenix): If we ever care about trailers as part of framing,

1498 // deal with them here (see below for part of the 'solution')

1499 // if (LineFramingFound(c)) {

1500 // trailer_lines_.push_back(make_pair(start_of_line_,

1501 // trailer_length_ - 1));

1502 // start_of_line_ = trailer_length_;

1503 // }

1504 if (HeaderFramingFound(c)) {

1505 // ProcessTrailers(visitor_, &trailers_);

1506 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1507 visitor_->ProcessTrailerInput(on_entry, current - on_entry);

1508 visitor_->MessageDone();

1509 goto bottom;

1510 }

1511 }

1512 visitor_->ProcessTrailerInput(on_entry, current - on_entry);

1513 break; // case BalsaFrameEnums::READING_TRAILER

1514

1515 // Note that there is no label:

1516 // 'label_reading_until_close'

1517 // here. This is because the state-machine exists immediately after

1518 // reading the headers instead of transitioning here (as it would

1519 // do if it was consuming all the data it could, all the time).

1520 case BalsaFrameEnums::READING_UNTIL_CLOSE:

1521 {

1522 const size_t bytes_remaining = end - current;

1523 if (bytes_remaining > 0) {

1524 visitor_->ProcessBodyInput(current, bytes_remaining);

1525 visitor_->ProcessBodyData(current, bytes_remaining);

1526 current += bytes_remaining;

1527 }

1528 }

1529 goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE

1530

1531 // label_reading_content:

1532 case BalsaFrameEnums::READING_CONTENT:

1533 #if DEBUGFRAMER

1534 LOG(INFO) << "ReadingContent: " << content_length_remaining_;

1535 #endif // DEBUGFRAMER

1536 while (content_length_remaining_ && current < end) {

1537 // read in the content

1538 const size_t bytes_remaining = end - current;

1539 const size_t consumed_bytes =

1540 (content_length_remaining_ < bytes_remaining) ?

1541 content_length_remaining_ : bytes_remaining;

1542 visitor_->ProcessBodyInput(current, consumed_bytes);

1543 visitor_->ProcessBodyData(current, consumed_bytes);

1544 current += consumed_bytes;

1545 content_length_remaining_ -= consumed_bytes;

1546 }

1547 if (content_length_remaining_ == 0) {

1548 parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;

1549 visitor_->MessageDone();

1550 }

1551 goto bottom; // case BalsaFrameEnums::READING_CONTENT

1552

1553 default:

1554 // The state-machine should never be in a state that isn't handled

1555 // above. This is a glaring logic error, and we should do something

1556 // drastic to ensure that this gets looked-at and fixed.

1557 LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE

1558 << " memory corruption?!"; // COV_NF_LINE

1559 }

1560 }

1561 bottom:

1562 #if DEBUGFRAMER

1563 LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"

1564 << std::string(input, current)

1565 << "\n$$$$$$$$$$$$$$"

1566 << BalsaFrameEnums::ParseStateToString(parse_state_)

1567 << "$$$$$$$$$$$$$$$"

1568 << " consumed: " << (current - input);

1569 if (Error()) {

1570 LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());

1571 }

1572 #endif // DEBUGFRAMER

1573 return current - input;

1574 }

1575

1576 } // namespace net

OLD	NEW

« no previous file with comments | « net/tools/balsa/balsa_frame.h ('k') | net/tools/balsa/balsa_frame_test.cc » ('j') | no next file with comments »