src/scanner.h - Issue 1481613002: Create ast/ and parsing/ subdirectories and move appropriate files

Side by Side Diff: src/scanner.h

Issue 1481613002: Create ast/ and parsing/ subdirectories and move appropriate files (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Rebase Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 // Features shared by parsing and pre-parsing scanners.

6

7 #ifndef V8_SCANNER_H_

8 #define V8_SCANNER_H_

9

10 #include "src/allocation.h"

11 #include "src/base/logging.h"

12 #include "src/char-predicates.h"

13 #include "src/globals.h"

14 #include "src/hashmap.h"

15 #include "src/list.h"

16 #include "src/token.h"

17 #include "src/unicode.h"

18 #include "src/unicode-decoder.h"

19 #include "src/utils.h"

20

21 namespace v8 {

22 namespace internal {

23

24

25 class AstRawString;

26 class AstValueFactory;

27 class ParserRecorder;

28 class UnicodeCache;

29

30

31 // Returns the value (0 .. 15) of a hexadecimal character c.

32 // If c is not a legal hexadecimal character, returns a value < 0.

33 inline int HexValue(uc32 c) {

34 c -= '0';

35 if (static_cast<unsigned>(c) <= 9) return c;

36 c = (c \| 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.

37 if (static_cast<unsigned>(c) <= 5) return c + 10;

38 return -1;

39 }

40

41

42 // ---------------------------------------------------------------------

43 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.

44 // A code unit is a 16 bit value representing either a 16 bit code point

45 // or one part of a surrogate pair that make a single 21 bit code point.

46

47 class Utf16CharacterStream {

48 public:

49 Utf16CharacterStream() : pos_(0) { }

50 virtual ~Utf16CharacterStream() { }

51

52 // Returns and advances past the next UTF-16 code unit in the input

53 // stream. If there are no more code units, it returns a negative

54 // value.

55 inline uc32 Advance() {

56 if (buffer_cursor_ < buffer_end_ \|\| ReadBlock()) {

57 pos_++;

58 return static_cast<uc32>(*(buffer_cursor_++));

59 }

60 // Note: currently the following increment is necessary to avoid a

61 // parser problem! The scanner treats the final kEndOfInput as

62 // a code unit with a position, and does math relative to that

63 // position.

64 pos_++;

65

66 return kEndOfInput;

67 }

68

69 // Return the current position in the code unit stream.

70 // Starts at zero.

71 inline size_t pos() const { return pos_; }

72

73 // Skips forward past the next code_unit_count UTF-16 code units

74 // in the input, or until the end of input if that comes sooner.

75 // Returns the number of code units actually skipped. If less

76 // than code_unit_count,

77 inline size_t SeekForward(size_t code_unit_count) {

78 size_t buffered_chars = buffer_end_ - buffer_cursor_;

79 if (code_unit_count <= buffered_chars) {

80 buffer_cursor_ += code_unit_count;

81 pos_ += code_unit_count;

82 return code_unit_count;

83 }

84 return SlowSeekForward(code_unit_count);

85 }

86

87 // Pushes back the most recently read UTF-16 code unit (or negative

88 // value if at end of input), i.e., the value returned by the most recent

89 // call to Advance.

90 // Must not be used right after calling SeekForward.

91 virtual void PushBack(int32_t code_unit) = 0;

92

93 virtual bool SetBookmark();

94 virtual void ResetToBookmark();

95

96 protected:

97 static const uc32 kEndOfInput = -1;

98

99 // Ensures that the buffer_cursor_ points to the code_unit at

100 // position pos_ of the input, if possible. If the position

101 // is at or after the end of the input, return false. If there

102 // are more code_units available, return true.

103 virtual bool ReadBlock() = 0;

104 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;

105

106 const uint16_t* buffer_cursor_;

107 const uint16_t* buffer_end_;

108 size_t pos_;

109 };

110

111

112 // ---------------------------------------------------------------------

113 // DuplicateFinder discovers duplicate symbols.

114

115 class DuplicateFinder {

116 public:

117 explicit DuplicateFinder(UnicodeCache* constants)

118 : unicode_constants_(constants),

119 backing_store_(16),

120 map_(&Match) { }

121

122 int AddOneByteSymbol(Vector<const uint8_t> key, int value);

123 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);

124 // Add a a number literal by converting it (if necessary)

125 // to the string that ToString(ToNumber(literal)) would generate.

126 // and then adding that string with AddOneByteSymbol.

127 // This string is the actual value used as key in an object literal,

128 // and the one that must be different from the other keys.

129 int AddNumber(Vector<const uint8_t> key, int value);

130

131 private:

132 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);

133 // Backs up the key and its length in the backing store.

134 // The backup is stored with a base 127 encoding of the

135 // length (plus a bit saying whether the string is one byte),

136 // followed by the bytes of the key.

137 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);

138

139 // Compare two encoded keys (both pointing into the backing store)

140 // for having the same base-127 encoded lengths and representation.

141 // and then having the same 'length' bytes following.

142 static bool Match(void* first, void* second);

143 // Creates a hash from a sequence of bytes.

144 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);

145 // Checks whether a string containing a JS number is its canonical

146 // form.

147 static bool IsNumberCanonical(Vector<const uint8_t> key);

148

149 // Size of buffer. Sufficient for using it to call DoubleToCString in

150 // from conversions.h.

151 static const int kBufferSize = 100;

152

153 UnicodeCache* unicode_constants_;

154 // Backing store used to store strings used as hashmap keys.

155 SequenceCollector<unsigned char> backing_store_;

156 HashMap map_;

157 // Buffer used for string->number->canonical string conversions.

158 char number_buffer_[kBufferSize];

159 };

160

161

162 // ----------------------------------------------------------------------------

163 // LiteralBuffer - Collector of chars of literals.

164

165 class LiteralBuffer {

166 public:

167 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }

168

169 ~LiteralBuffer() { backing_store_.Dispose(); }

170

171 INLINE(void AddChar(uint32_t code_unit)) {

172 if (position_ >= backing_store_.length()) ExpandBuffer();

173 if (is_one_byte_) {

174 if (code_unit <= unibrow::Latin1::kMaxChar) {

175 backing_store_[position_] = static_cast<byte>(code_unit);

176 position_ += kOneByteSize;

177 return;

178 }

179 ConvertToTwoByte();

180 }

181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

182 reinterpret_cast<uint16_t>(&backing_store_[position_]) = code_unit;

183 position_ += kUC16Size;

184 } else {

185 reinterpret_cast<uint16_t>(&backing_store_[position_]) =

186 unibrow::Utf16::LeadSurrogate(code_unit);

187 position_ += kUC16Size;

188 if (position_ >= backing_store_.length()) ExpandBuffer();

189 reinterpret_cast<uint16_t>(&backing_store_[position_]) =

190 unibrow::Utf16::TrailSurrogate(code_unit);

191 position_ += kUC16Size;

192 }

193 }

194

195 bool is_one_byte() const { return is_one_byte_; }

196

197 bool is_contextual_keyword(Vector<const char> keyword) const {

198 return is_one_byte() && keyword.length() == position_ &&

199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);

200 }

201

202 Vector<const uint16_t> two_byte_literal() const {

203 DCHECK(!is_one_byte_);

204 DCHECK((position_ & 0x1) == 0);

205 return Vector<const uint16_t>(

206 reinterpret_cast<const uint16_t*>(backing_store_.start()),

207 position_ >> 1);

208 }

209

210 Vector<const uint8_t> one_byte_literal() const {

211 DCHECK(is_one_byte_);

212 return Vector<const uint8_t>(

213 reinterpret_cast<const uint8_t*>(backing_store_.start()),

214 position_);

215 }

216

217 int length() const {

218 return is_one_byte_ ? position_ : (position_ >> 1);

219 }

220

221 void ReduceLength(int delta) {

222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);

223 }

224

225 void Reset() {

226 position_ = 0;

227 is_one_byte_ = true;

228 }

229

230 Handle<String> Internalize(Isolate* isolate) const;

231

232 void CopyFrom(const LiteralBuffer* other) {

233 if (other == nullptr) {

234 Reset();

235 } else {

236 is_one_byte_ = other->is_one_byte_;

237 position_ = other->position_;

238 backing_store_.Dispose();

239 backing_store_ = other->backing_store_.Clone();

240 }

241 }

242

243 private:

244 static const int kInitialCapacity = 16;

245 static const int kGrowthFactory = 4;

246 static const int kMinConversionSlack = 256;

247 static const int kMaxGrowth = 1 * MB;

248 inline int NewCapacity(int min_capacity) {

249 int capacity = Max(min_capacity, backing_store_.length());

250 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);

251 return new_capacity;

252 }

253

254 void ExpandBuffer() {

255 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));

256 MemCopy(new_store.start(), backing_store_.start(), position_);

257 backing_store_.Dispose();

258 backing_store_ = new_store;

259 }

260

261 void ConvertToTwoByte() {

262 DCHECK(is_one_byte_);

263 Vector<byte> new_store;

264 int new_content_size = position_ * kUC16Size;

265 if (new_content_size >= backing_store_.length()) {

266 // Ensure room for all currently read code units as UC16 as well

267 // as the code unit about to be stored.

268 new_store = Vector<byte>::New(NewCapacity(new_content_size));

269 } else {

270 new_store = backing_store_;

271 }

272 uint8_t* src = backing_store_.start();

273 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());

274 for (int i = position_ - 1; i >= 0; i--) {

275 dst[i] = src[i];

276 }

277 if (new_store.start() != backing_store_.start()) {

278 backing_store_.Dispose();

279 backing_store_ = new_store;

280 }

281 position_ = new_content_size;

282 is_one_byte_ = false;

283 }

284

285 bool is_one_byte_;

286 int position_;

287 Vector<byte> backing_store_;

288

289 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);

290 };

291

292

293 // ----------------------------------------------------------------------------

294 // JavaScript Scanner.

295

296 class Scanner {

297 public:

298 // Scoped helper for literal recording. Automatically drops the literal

299 // if aborting the scanning before it's complete.

300 class LiteralScope {

301 public:

302 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {

303 scanner_->StartLiteral();

304 }

305 ~LiteralScope() {

306 if (!complete_) scanner_->DropLiteral();

307 }

308 void Complete() {

309 complete_ = true;

310 }

311

312 private:

313 Scanner* scanner_;

314 bool complete_;

315 };

316

317 // Scoped helper for a re-settable bookmark.

318 class BookmarkScope {

319 public:

320 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {

321 DCHECK_NOT_NULL(scanner_);

322 }

323 ~BookmarkScope() { scanner_->DropBookmark(); }

324

325 bool Set() { return scanner_->SetBookmark(); }

326 void Reset() { scanner_->ResetToBookmark(); }

327 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }

328 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }

329

330 private:

331 Scanner* scanner_;

332

333 DISALLOW_COPY_AND_ASSIGN(BookmarkScope);

334 };

335

336 // Representation of an interval of source positions.

337 struct Location {

338 Location(int b, int e) : beg_pos(b), end_pos(e) { }

339 Location() : beg_pos(0), end_pos(0) { }

340

341 bool IsValid() const {

342 return beg_pos >= 0 && end_pos >= beg_pos;

343 }

344

345 static Location invalid() { return Location(-1, -1); }

346

347 int beg_pos;

348 int end_pos;

349 };

350

351 // -1 is outside of the range of any real source code.

352 static const int kNoOctalLocation = -1;

353

354 explicit Scanner(UnicodeCache* scanner_contants);

355

356 void Initialize(Utf16CharacterStream* source);

357

358 // Returns the next token and advances input.

359 Token::Value Next();

360 // Returns the token following peek()

361 Token::Value PeekAhead();

362 // Returns the current token again.

363 Token::Value current_token() { return current_.token; }

364 // Returns the location information for the current token

365 // (the token last returned by Next()).

366 Location location() const { return current_.location; }

367

368 // Similar functions for the upcoming token.

369

370 // One token look-ahead (past the token returned by Next()).

371 Token::Value peek() const { return next_.token; }

372

373 Location peek_location() const { return next_.location; }

374

375 bool literal_contains_escapes() const {

376 return LiteralContainsEscapes(current_);

377 }

378 bool next_literal_contains_escapes() const {

379 return LiteralContainsEscapes(next_);

380 }

381 bool is_literal_contextual_keyword(Vector<const char> keyword) {

382 DCHECK_NOT_NULL(current_.literal_chars);

383 return current_.literal_chars->is_contextual_keyword(keyword);

384 }

385 bool is_next_contextual_keyword(Vector<const char> keyword) {

386 DCHECK_NOT_NULL(next_.literal_chars);

387 return next_.literal_chars->is_contextual_keyword(keyword);

388 }

389

390 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);

391 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);

392 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);

393

394 double DoubleValue();

395 bool ContainsDot();

396 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {

397 if (is_literal_one_byte() &&

398 literal_length() == length &&

399 (allow_escapes \|\| !literal_contains_escapes())) {

400 const char* token =

401 reinterpret_cast<const char*>(literal_one_byte_string().start());

402 return !strncmp(token, data, length);

403 }

404 return false;

405 }

406 inline bool UnescapedLiteralMatches(const char* data, int length) {

407 return LiteralMatches(data, length, false);

408 }

409

410 void IsGetOrSet(bool* is_get, bool* is_set) {

411 if (is_literal_one_byte() &&

412 literal_length() == 3 &&

413 !literal_contains_escapes()) {

414 const char* token =

415 reinterpret_cast<const char*>(literal_one_byte_string().start());

416 *is_get = strncmp(token, "get", 3) == 0;

417 is_set = !is_get && strncmp(token, "set", 3) == 0;

418 }

419 }

420

421 int FindSymbol(DuplicateFinder* finder, int value);

422

423 UnicodeCache* unicode_cache() { return unicode_cache_; }

424

425 // Returns the location of the last seen octal literal.

426 Location octal_position() const { return octal_pos_; }

427 void clear_octal_position() { octal_pos_ = Location::invalid(); }

428

429 // Returns the value of the last smi that was scanned.

430 int smi_value() const { return current_.smi_value_; }

431

432 // Seek forward to the given position. This operation does not

433 // work in general, for instance when there are pushed back

434 // characters, but works for seeking forward until simple delimiter

435 // tokens, which is what it is used for.

436 void SeekForward(int pos);

437

438 // Returns true if there was a line terminator before the peek'ed token,

439 // possibly inside a multi-line comment.

440 bool HasAnyLineTerminatorBeforeNext() const {

441 return has_line_terminator_before_next_ \|\|

442 has_multiline_comment_before_next_;

443 }

444

445 // Scans the input as a regular expression pattern, previous

446 // character(s) must be /(=). Returns true if a pattern is scanned.

447 bool ScanRegExpPattern(bool seen_equal);

448 // Scans the input as regular expression flags. Returns the flags on success.

449 Maybe<RegExp::Flags> ScanRegExpFlags();

450

451 // Scans the input as a template literal

452 Token::Value ScanTemplateStart();

453 Token::Value ScanTemplateContinuation();

454

455 const LiteralBuffer* source_url() const { return &source_url_; }

456 const LiteralBuffer* source_mapping_url() const {

457 return &source_mapping_url_;

458 }

459

460 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;

461

462 private:

463 // The current and look-ahead token.

464 struct TokenDesc {

465 Token::Value token;

466 Location location;

467 LiteralBuffer* literal_chars;

468 LiteralBuffer* raw_literal_chars;

469 int smi_value_;

470 };

471

472 static const int kCharacterLookaheadBufferSize = 1;

473

474 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

475 template <bool capture_raw>

476 uc32 ScanOctalEscape(uc32 c, int length);

477

478 // Call this after setting source_ to the input.

479 void Init() {

480 // Set c0_ (one character ahead)

481 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);

482 Advance();

483 // Initialize current_ to not refer to a literal.

484 current_.literal_chars = NULL;

485 current_.raw_literal_chars = NULL;

486 next_next_.token = Token::UNINITIALIZED;

487 }

488

489 // Support BookmarkScope functionality.

490 bool SetBookmark();

491 void ResetToBookmark();

492 bool BookmarkHasBeenSet();

493 bool BookmarkHasBeenReset();

494 void DropBookmark();

495 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);

496

497 // Literal buffer support

498 inline void StartLiteral() {

499 LiteralBuffer* free_buffer =

500 (current_.literal_chars == &literal_buffer0_)

501 ? &literal_buffer1_

502 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_

503 : &literal_buffer0_;

504 free_buffer->Reset();

505 next_.literal_chars = free_buffer;

506 }

507

508 inline void StartRawLiteral() {

509 LiteralBuffer* free_buffer =

510 (current_.raw_literal_chars == &raw_literal_buffer0_)

511 ? &raw_literal_buffer1_

512 : (current_.raw_literal_chars == &raw_literal_buffer1_)

513 ? &raw_literal_buffer2_

514 : &raw_literal_buffer0_;

515 free_buffer->Reset();

516 next_.raw_literal_chars = free_buffer;

517 }

518

519 INLINE(void AddLiteralChar(uc32 c)) {

520 DCHECK_NOT_NULL(next_.literal_chars);

521 next_.literal_chars->AddChar(c);

522 }

523

524 INLINE(void AddRawLiteralChar(uc32 c)) {

525 DCHECK_NOT_NULL(next_.raw_literal_chars);

526 next_.raw_literal_chars->AddChar(c);

527 }

528

529 INLINE(void ReduceRawLiteralLength(int delta)) {

530 DCHECK_NOT_NULL(next_.raw_literal_chars);

531 next_.raw_literal_chars->ReduceLength(delta);

532 }

533

534 // Stops scanning of a literal and drop the collected characters,

535 // e.g., due to an encountered error.

536 inline void DropLiteral() {

537 next_.literal_chars = NULL;

538 next_.raw_literal_chars = NULL;

539 }

540

541 inline void AddLiteralCharAdvance() {

542 AddLiteralChar(c0_);

543 Advance();

544 }

545

546 // Low-level scanning support.

547 template <bool capture_raw = false, bool check_surrogate = true>

548 void Advance() {

549 if (capture_raw) {

550 AddRawLiteralChar(c0_);

551 }

552 c0_ = source_->Advance();

553 if (check_surrogate) HandleLeadSurrogate();

554 }

555

556 void HandleLeadSurrogate() {

557 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {

558 uc32 c1 = source_->Advance();

559 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {

560 source_->PushBack(c1);

561 } else {

562 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);

563 }

564 }

565 }

566

567 void PushBack(uc32 ch) {

568 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {

569 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));

570 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));

571 } else {

572 source_->PushBack(c0_);

573 }

574 c0_ = ch;

575 }

576

577 inline Token::Value Select(Token::Value tok) {

578 Advance();

579 return tok;

580 }

581

582 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {

583 Advance();

584 if (c0_ == next) {

585 Advance();

586 return then;

587 } else {

588 return else_;

589 }

590 }

591

592 // Returns the literal string, if any, for the current token (the

593 // token last returned by Next()). The string is 0-terminated.

594 // Literal strings are collected for identifiers, strings, numbers as well

595 // as for template literals. For template literals we also collect the raw

596 // form.

597 // These functions only give the correct result if the literal was scanned

598 // when a LiteralScope object is alive.

599 Vector<const uint8_t> literal_one_byte_string() {

600 DCHECK_NOT_NULL(current_.literal_chars);

601 return current_.literal_chars->one_byte_literal();

602 }

603 Vector<const uint16_t> literal_two_byte_string() {

604 DCHECK_NOT_NULL(current_.literal_chars);

605 return current_.literal_chars->two_byte_literal();

606 }

607 bool is_literal_one_byte() {

608 DCHECK_NOT_NULL(current_.literal_chars);

609 return current_.literal_chars->is_one_byte();

610 }

611 int literal_length() const {

612 DCHECK_NOT_NULL(current_.literal_chars);

613 return current_.literal_chars->length();

614 }

615 // Returns the literal string for the next token (the token that

616 // would be returned if Next() were called).

617 Vector<const uint8_t> next_literal_one_byte_string() {

618 DCHECK_NOT_NULL(next_.literal_chars);

619 return next_.literal_chars->one_byte_literal();

620 }

621 Vector<const uint16_t> next_literal_two_byte_string() {

622 DCHECK_NOT_NULL(next_.literal_chars);

623 return next_.literal_chars->two_byte_literal();

624 }

625 bool is_next_literal_one_byte() {

626 DCHECK_NOT_NULL(next_.literal_chars);

627 return next_.literal_chars->is_one_byte();

628 }

629 Vector<const uint8_t> raw_literal_one_byte_string() {

630 DCHECK_NOT_NULL(current_.raw_literal_chars);

631 return current_.raw_literal_chars->one_byte_literal();

632 }

633 Vector<const uint16_t> raw_literal_two_byte_string() {

634 DCHECK_NOT_NULL(current_.raw_literal_chars);

635 return current_.raw_literal_chars->two_byte_literal();

636 }

637 bool is_raw_literal_one_byte() {

638 DCHECK_NOT_NULL(current_.raw_literal_chars);

639 return current_.raw_literal_chars->is_one_byte();

640 }

641

642 template <bool capture_raw>

643 uc32 ScanHexNumber(int expected_length);

644 // Scan a number of any length but not bigger than max_value. For example, the

645 // number can be 000000001, so it's very long in characters but its value is

646 // small.

647 template <bool capture_raw>

648 uc32 ScanUnlimitedLengthHexNumber(int max_value);

649

650 // Scans a single JavaScript token.

651 void Scan();

652

653 bool SkipWhiteSpace();

654 Token::Value SkipSingleLineComment();

655 Token::Value SkipSourceURLComment();

656 void TryToParseSourceURLComment();

657 Token::Value SkipMultiLineComment();

658 // Scans a possible HTML comment -- begins with '<!'.

659 Token::Value ScanHtmlComment();

660

661 void ScanDecimalDigits();

662 Token::Value ScanNumber(bool seen_period);

663 Token::Value ScanIdentifierOrKeyword();

664 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);

665

666 Token::Value ScanString();

667

668 // Scans an escape-sequence which is part of a string and adds the

669 // decoded character to the current literal. Returns true if a pattern

670 // is scanned.

671 template <bool capture_raw, bool in_template_literal>

672 bool ScanEscape();

673

674 // Decodes a Unicode escape-sequence which is part of an identifier.

675 // If the escape sequence cannot be decoded the result is kBadChar.

676 uc32 ScanIdentifierUnicodeEscape();

677 // Helper for the above functions.

678 template <bool capture_raw>

679 uc32 ScanUnicodeEscape();

680

681 Token::Value ScanTemplateSpan();

682

683 // Return the current source position.

684 int source_pos() {

685 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;

686 }

687

688 static bool LiteralContainsEscapes(const TokenDesc& token) {

689 Location location = token.location;

690 int source_length = (location.end_pos - location.beg_pos);

691 if (token.token == Token::STRING) {

692 // Subtract delimiters.

693 source_length -= 2;

694 }

695 return token.literal_chars->length() != source_length;

696 }

697

698 UnicodeCache* unicode_cache_;

699

700 // Buffers collecting literal strings, numbers, etc.

701 LiteralBuffer literal_buffer0_;

702 LiteralBuffer literal_buffer1_;

703 LiteralBuffer literal_buffer2_;

704

705 // Values parsed from magic comments.

706 LiteralBuffer source_url_;

707 LiteralBuffer source_mapping_url_;

708

709 // Buffer to store raw string values

710 LiteralBuffer raw_literal_buffer0_;

711 LiteralBuffer raw_literal_buffer1_;

712 LiteralBuffer raw_literal_buffer2_;

713

714 TokenDesc current_; // desc for current token (as returned by Next())

715 TokenDesc next_; // desc for next token (one token look-ahead)

716 TokenDesc next_next_; // desc for the token after next (after PeakAhead())

717

718 // Variables for Scanner::BookmarkScope and the *Bookmark implementation.

719 // These variables contain the scanner state when a bookmark is set.

720 //

721 // We will use bookmark_c0_ as a 'control' variable, where:

722 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.

723 // - bookmark_c0_ == -1: No bookmark has been set.

724 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).

725 //

726 // Which state is being bookmarked? The parser state is distributed over

727 // several variables, roughly like this:

728 // ... 1234 + 5678 ..... [character stream]

729 // [current_] [next_] c0_ \| [scanner state]

730 // So when the scanner is logically at the beginning of an expression

731 // like "1234 + 4567", then:

732 // - current_ contains "1234"

733 // - next_ contains "+"

734 // - c0_ contains ' ' (the space between "+" and "5678",

735 // - the source_ character stream points to the beginning of "5678".

736 // To be able to restore this state, we will keep copies of current_, next_,

737 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the

738 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.

739 static const uc32 kNoBookmark = -1;

740 static const uc32 kBookmarkWasApplied = -2;

741 uc32 bookmark_c0_;

742 TokenDesc bookmark_current_;

743 TokenDesc bookmark_next_;

744 LiteralBuffer bookmark_current_literal_;

745 LiteralBuffer bookmark_current_raw_literal_;

746 LiteralBuffer bookmark_next_literal_;

747 LiteralBuffer bookmark_next_raw_literal_;

748

749 // Input stream. Must be initialized to an Utf16CharacterStream.

750 Utf16CharacterStream* source_;

751

752

753 // Start position of the octal literal last scanned.

754 Location octal_pos_;

755

756 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

757 uc32 c0_;

758

759 // Whether there is a line terminator whitespace character after

760 // the current token, and before the next. Does not count newlines

761 // inside multiline comments.

762 bool has_line_terminator_before_next_;

763 // Whether there is a multi-line comment that contains a

764 // line-terminator after the current token, and before the next.

765 bool has_multiline_comment_before_next_;

766 };

767

768 } // namespace internal

769 } // namespace v8

770

771 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/runtime/runtime-scopes.cc ('k') | src/scanner.cc » ('j') | no next file with comments »