src/scanner.h - Issue 3181036: Created collector class and used it to collect identifiers during scanning.

Side by Side Diff: src/scanner.h

Issue 3181036: Created collector class and used it to collect identifiers during scanning. (Closed)

Patch Set: Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 22 matching lines...) Expand all Loading...
33	33

34 namespace v8 {	34 namespace v8 {

35 namespace internal {	35 namespace internal {

36	36

37	37

38 class UTF8Buffer {	38 class UTF8Buffer {

39 public:	39 public:

40 UTF8Buffer();	40 UTF8Buffer();

41 ~UTF8Buffer();	41 ~UTF8Buffer();

42	42

43 void AddChar(uc32 c) {	43 inline void AddChar(uc32 c) {

44 ASSERT_NOT_NULL(data_);	44 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

45 if (cursor_ <= limit_ &&	45 buffer_.Add(static_cast<char>(c));

46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

47 *cursor_++ = static_cast<char>(c);

48 } else {	46 } else {

49 AddCharSlow(c);	47 AddCharSlow(c);

50 }	48 }

51 }	49 }

52	50

53 void Reset() {	51 void StartLiteral() {

54 if (data_ == NULL) {	52 buffer_.StartSequence();

55 data_ = NewArray<char>(kInitialCapacity);

56 limit_ = ComputeLimit(data_, kInitialCapacity);

57 }

58 cursor_ = data_;

59 }	53 }

60	54

61 int pos() const {	55 Vector<const char> EndLiteral() {

62 ASSERT_NOT_NULL(data_);	56 buffer_.Add(kEndMarker);

63 return static_cast<int>(cursor_ - data_);	57 Vector<char> sequence = buffer_.EndSequence();

	58 return Vector<const char>(sequence.start(), sequence.length());

64 }	59 }

65	60

66 char* data() const { return data_; }	61 // The end marker added after a parsed literal.

67	62 // Using zero allows the usage of strlen and similar functions on

	63 // identifiers and numbers (but not strings, since they may contain zero

	64 // bytes).

	65 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside

	66 // an utf-8 string. This requires changes in all places that uses

	67 // str-functions on the literals, but allows a single pointer to represent

	68 // the literal, even if it contains embedded zeros.

	69 static const char kEndMarker = '\x00';

68 private:	70 private:

69 static const int kInitialCapacity = 256;	71 static const int kInitialCapacity = 256;

70 char* data_;	72 SequenceCollector<char> buffer_;

71 char* cursor_;

72 char* limit_;

73

74 int Capacity() const {

75 ASSERT_NOT_NULL(data_);

76 return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;

77 }

78

79 static char* ComputeLimit(char* data, int capacity) {

80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;

81 }

82	73

83 void AddCharSlow(uc32 c);	74 void AddCharSlow(uc32 c);

84 };	75 };

85	76

86	77

87 // Interface through which the scanner reads characters from the input source.	78 // Interface through which the scanner reads characters from the input source.

88 class UTF16Buffer {	79 class UTF16Buffer {

89 public:	80 public:

90 UTF16Buffer();	81 UTF16Buffer();

91 virtual ~UTF16Buffer() {}	82 virtual ~UTF16Buffer() {}

(...skipping 215 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
307 Location location() const { return current_.location; }	298 Location location() const { return current_.location; }

308 Location peek_location() const { return next_.location; }	299 Location peek_location() const { return next_.location; }

309	300

310 // Returns the literal string, if any, for the current token (the	301 // Returns the literal string, if any, for the current token (the

311 // token returned by Next()). The string is 0-terminated and in	302 // token returned by Next()). The string is 0-terminated and in

312 // UTF-8 format; they may contain 0-characters. Literal strings are	303 // UTF-8 format; they may contain 0-characters. Literal strings are

313 // collected for identifiers, strings, and numbers.	304 // collected for identifiers, strings, and numbers.

314 // These functions only give the correct result if the literal	305 // These functions only give the correct result if the literal

315 // was scanned between calls to StartLiteral() and TerminateLiteral().	306 // was scanned between calls to StartLiteral() and TerminateLiteral().

316 const char* literal_string() const {	307 const char* literal_string() const {

317 return current_.literal_buffer->data();	308 return current_.literal_chars.start();

318 }	309 }

	310

319 int literal_length() const {	311 int literal_length() const {

320 // Excluding terminal '\0' added by TerminateLiteral().	312 // Excluding terminal '\x00' added by TerminateLiteral().

321 return current_.literal_buffer->pos() - 1;	313 return current_.literal_chars.length() - 1;

	314 }

	315

	316 Vector<const char> literal() const {

	317 return Vector<const char>(literal_string(), literal_length());

322 }	318 }

323	319

324 // Returns the literal string for the next token (the token that	320 // Returns the literal string for the next token (the token that

325 // would be returned if Next() were called).	321 // would be returned if Next() were called).

326 const char* next_literal_string() const {	322 const char* next_literal_string() const {

327 return next_.literal_buffer->data();	323 return next_.literal_chars.start();

328 }	324 }

	325

	326

329 // Returns the length of the next token (that would be returned if	327 // Returns the length of the next token (that would be returned if

330 // Next() were called).	328 // Next() were called).

331 int next_literal_length() const {	329 int next_literal_length() const {

332 return next_.literal_buffer->pos() - 1;	330 // Excluding terminal '\x00' added by TerminateLiteral().

	331 return next_.literal_chars.length() - 1;

333 }	332 }

334	333

335 Vector<const char> next_literal() const {	334 Vector<const char> next_literal() const {

336 return Vector<const char>(next_literal_string(),	335 return Vector<const char>(next_literal_string(), next_literal_length());

337 next_literal_length());

338 }	336 }

339	337

340 // Scans the input as a regular expression pattern, previous	338 // Scans the input as a regular expression pattern, previous

341 // character(s) must be /(=). Returns true if a pattern is scanned.	339 // character(s) must be /(=). Returns true if a pattern is scanned.

342 bool ScanRegExpPattern(bool seen_equal);	340 bool ScanRegExpPattern(bool seen_equal);

343 // Returns true if regexp flags are scanned (always since flags can	341 // Returns true if regexp flags are scanned (always since flags can

344 // be empty).	342 // be empty).

345 bool ScanRegExpFlags();	343 bool ScanRegExpFlags();

346	344

347 // Seek forward to the given position. This operation does not	345 // Seek forward to the given position. This operation does not

(...skipping 16 matching lines...) Expand all Loading...
364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;	362 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

365	363

366 static const int kCharacterLookaheadBufferSize = 1;	364 static const int kCharacterLookaheadBufferSize = 1;

367 static const int kNoEndPosition = 1;	365 static const int kNoEndPosition = 1;

368	366

369 private:	367 private:

370 // The current and look-ahead token.	368 // The current and look-ahead token.

371 struct TokenDesc {	369 struct TokenDesc {

372 Token::Value token;	370 Token::Value token;

373 Location location;	371 Location location;

374 UTF8Buffer* literal_buffer;	372 Vector<const char> literal_chars;

375 };	373 };

376	374

377 void Init(Handle<String> source,	375 void Init(Handle<String> source,

378 unibrow::CharacterStream* stream,	376 unibrow::CharacterStream* stream,

379 int start_position, int end_position,	377 int start_position, int end_position,

380 ParserLanguage language);	378 ParserLanguage language);

381	379

382 // Literal buffer support	380 // Literal buffer support

383 void StartLiteral();	381 inline void StartLiteral();

384 void AddChar(uc32 ch);	382 inline void AddChar(uc32 ch);

385 void AddCharAdvance();	383 inline void AddCharAdvance();

386 void TerminateLiteral();	384 inline void TerminateLiteral();

387	385

388 // Low-level scanning support.	386 // Low-level scanning support.

389 void Advance() { c0_ = source_->Advance(); }	387 void Advance() { c0_ = source_->Advance(); }

390 void PushBack(uc32 ch) {	388 void PushBack(uc32 ch) {

391 source_->PushBack(ch);	389 source_->PushBack(ch);

392 c0_ = ch;	390 c0_ = ch;

393 }	391 }

394	392

395 bool SkipWhiteSpace() {	393 bool SkipWhiteSpace() {

396 if (is_parsing_json_) {	394 if (is_parsing_json_) {

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;	478 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;

481	479

482 // Source. Will point to one of the buffers declared above.	480 // Source. Will point to one of the buffers declared above.

483 UTF16Buffer* source_;	481 UTF16Buffer* source_;

484	482

485 // Used to convert the source string into a character stream when a stream	483 // Used to convert the source string into a character stream when a stream

486 // is not passed to the scanner.	484 // is not passed to the scanner.

487 SafeStringInputBuffer safe_string_input_buffer_;	485 SafeStringInputBuffer safe_string_input_buffer_;

488	486

489 // Buffer to hold literal values (identifiers, strings, numbers)	487 // Buffer to hold literal values (identifiers, strings, numbers)

490 // using 0-terminated UTF-8 encoding.	488 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.

491 UTF8Buffer literal_buffer_1_;	489 UTF8Buffer literal_buffer_;

492 UTF8Buffer literal_buffer_2_;

493	490

494 bool stack_overflow_;	491 bool stack_overflow_;

495 static StaticResource<Utf8Decoder> utf8_decoder_;	492 static StaticResource<Utf8Decoder> utf8_decoder_;

496	493

497 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	494 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

498 uc32 c0_;	495 uc32 c0_;

499 };	496 };

500	497

501 } } // namespace v8::internal	498 } } // namespace v8::internal

502	499

503 #endif // V8_SCANNER_H_	500 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/runtime.cc ('k') | src/scanner.cc » ('j') | src/utils.h » ('J')