Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: src/scanner.h

Issue 1481613002: Create ast/ and parsing/ subdirectories and move appropriate files (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Rebase Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/runtime/runtime-scopes.cc ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Features shared by parsing and pre-parsing scanners.
6
7 #ifndef V8_SCANNER_H_
8 #define V8_SCANNER_H_
9
10 #include "src/allocation.h"
11 #include "src/base/logging.h"
12 #include "src/char-predicates.h"
13 #include "src/globals.h"
14 #include "src/hashmap.h"
15 #include "src/list.h"
16 #include "src/token.h"
17 #include "src/unicode.h"
18 #include "src/unicode-decoder.h"
19 #include "src/utils.h"
20
21 namespace v8 {
22 namespace internal {
23
24
25 class AstRawString;
26 class AstValueFactory;
27 class ParserRecorder;
28 class UnicodeCache;
29
30
31 // Returns the value (0 .. 15) of a hexadecimal character c.
32 // If c is not a legal hexadecimal character, returns a value < 0.
33 inline int HexValue(uc32 c) {
34 c -= '0';
35 if (static_cast<unsigned>(c) <= 9) return c;
36 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
37 if (static_cast<unsigned>(c) <= 5) return c + 10;
38 return -1;
39 }
40
41
42 // ---------------------------------------------------------------------
43 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
44 // A code unit is a 16 bit value representing either a 16 bit code point
45 // or one part of a surrogate pair that make a single 21 bit code point.
46
47 class Utf16CharacterStream {
48 public:
49 Utf16CharacterStream() : pos_(0) { }
50 virtual ~Utf16CharacterStream() { }
51
52 // Returns and advances past the next UTF-16 code unit in the input
53 // stream. If there are no more code units, it returns a negative
54 // value.
55 inline uc32 Advance() {
56 if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
57 pos_++;
58 return static_cast<uc32>(*(buffer_cursor_++));
59 }
60 // Note: currently the following increment is necessary to avoid a
61 // parser problem! The scanner treats the final kEndOfInput as
62 // a code unit with a position, and does math relative to that
63 // position.
64 pos_++;
65
66 return kEndOfInput;
67 }
68
69 // Return the current position in the code unit stream.
70 // Starts at zero.
71 inline size_t pos() const { return pos_; }
72
73 // Skips forward past the next code_unit_count UTF-16 code units
74 // in the input, or until the end of input if that comes sooner.
75 // Returns the number of code units actually skipped. If less
76 // than code_unit_count,
77 inline size_t SeekForward(size_t code_unit_count) {
78 size_t buffered_chars = buffer_end_ - buffer_cursor_;
79 if (code_unit_count <= buffered_chars) {
80 buffer_cursor_ += code_unit_count;
81 pos_ += code_unit_count;
82 return code_unit_count;
83 }
84 return SlowSeekForward(code_unit_count);
85 }
86
87 // Pushes back the most recently read UTF-16 code unit (or negative
88 // value if at end of input), i.e., the value returned by the most recent
89 // call to Advance.
90 // Must not be used right after calling SeekForward.
91 virtual void PushBack(int32_t code_unit) = 0;
92
93 virtual bool SetBookmark();
94 virtual void ResetToBookmark();
95
96 protected:
97 static const uc32 kEndOfInput = -1;
98
99 // Ensures that the buffer_cursor_ points to the code_unit at
100 // position pos_ of the input, if possible. If the position
101 // is at or after the end of the input, return false. If there
102 // are more code_units available, return true.
103 virtual bool ReadBlock() = 0;
104 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
105
106 const uint16_t* buffer_cursor_;
107 const uint16_t* buffer_end_;
108 size_t pos_;
109 };
110
111
112 // ---------------------------------------------------------------------
113 // DuplicateFinder discovers duplicate symbols.
114
115 class DuplicateFinder {
116 public:
117 explicit DuplicateFinder(UnicodeCache* constants)
118 : unicode_constants_(constants),
119 backing_store_(16),
120 map_(&Match) { }
121
122 int AddOneByteSymbol(Vector<const uint8_t> key, int value);
123 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
124 // Add a a number literal by converting it (if necessary)
125 // to the string that ToString(ToNumber(literal)) would generate.
126 // and then adding that string with AddOneByteSymbol.
127 // This string is the actual value used as key in an object literal,
128 // and the one that must be different from the other keys.
129 int AddNumber(Vector<const uint8_t> key, int value);
130
131 private:
132 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
133 // Backs up the key and its length in the backing store.
134 // The backup is stored with a base 127 encoding of the
135 // length (plus a bit saying whether the string is one byte),
136 // followed by the bytes of the key.
137 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
138
139 // Compare two encoded keys (both pointing into the backing store)
140 // for having the same base-127 encoded lengths and representation.
141 // and then having the same 'length' bytes following.
142 static bool Match(void* first, void* second);
143 // Creates a hash from a sequence of bytes.
144 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
145 // Checks whether a string containing a JS number is its canonical
146 // form.
147 static bool IsNumberCanonical(Vector<const uint8_t> key);
148
149 // Size of buffer. Sufficient for using it to call DoubleToCString in
150 // from conversions.h.
151 static const int kBufferSize = 100;
152
153 UnicodeCache* unicode_constants_;
154 // Backing store used to store strings used as hashmap keys.
155 SequenceCollector<unsigned char> backing_store_;
156 HashMap map_;
157 // Buffer used for string->number->canonical string conversions.
158 char number_buffer_[kBufferSize];
159 };
160
161
162 // ----------------------------------------------------------------------------
163 // LiteralBuffer - Collector of chars of literals.
164
165 class LiteralBuffer {
166 public:
167 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
168
169 ~LiteralBuffer() { backing_store_.Dispose(); }
170
171 INLINE(void AddChar(uint32_t code_unit)) {
172 if (position_ >= backing_store_.length()) ExpandBuffer();
173 if (is_one_byte_) {
174 if (code_unit <= unibrow::Latin1::kMaxChar) {
175 backing_store_[position_] = static_cast<byte>(code_unit);
176 position_ += kOneByteSize;
177 return;
178 }
179 ConvertToTwoByte();
180 }
181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
182 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
183 position_ += kUC16Size;
184 } else {
185 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
186 unibrow::Utf16::LeadSurrogate(code_unit);
187 position_ += kUC16Size;
188 if (position_ >= backing_store_.length()) ExpandBuffer();
189 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
190 unibrow::Utf16::TrailSurrogate(code_unit);
191 position_ += kUC16Size;
192 }
193 }
194
195 bool is_one_byte() const { return is_one_byte_; }
196
197 bool is_contextual_keyword(Vector<const char> keyword) const {
198 return is_one_byte() && keyword.length() == position_ &&
199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
200 }
201
202 Vector<const uint16_t> two_byte_literal() const {
203 DCHECK(!is_one_byte_);
204 DCHECK((position_ & 0x1) == 0);
205 return Vector<const uint16_t>(
206 reinterpret_cast<const uint16_t*>(backing_store_.start()),
207 position_ >> 1);
208 }
209
210 Vector<const uint8_t> one_byte_literal() const {
211 DCHECK(is_one_byte_);
212 return Vector<const uint8_t>(
213 reinterpret_cast<const uint8_t*>(backing_store_.start()),
214 position_);
215 }
216
217 int length() const {
218 return is_one_byte_ ? position_ : (position_ >> 1);
219 }
220
221 void ReduceLength(int delta) {
222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
223 }
224
225 void Reset() {
226 position_ = 0;
227 is_one_byte_ = true;
228 }
229
230 Handle<String> Internalize(Isolate* isolate) const;
231
232 void CopyFrom(const LiteralBuffer* other) {
233 if (other == nullptr) {
234 Reset();
235 } else {
236 is_one_byte_ = other->is_one_byte_;
237 position_ = other->position_;
238 backing_store_.Dispose();
239 backing_store_ = other->backing_store_.Clone();
240 }
241 }
242
243 private:
244 static const int kInitialCapacity = 16;
245 static const int kGrowthFactory = 4;
246 static const int kMinConversionSlack = 256;
247 static const int kMaxGrowth = 1 * MB;
248 inline int NewCapacity(int min_capacity) {
249 int capacity = Max(min_capacity, backing_store_.length());
250 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
251 return new_capacity;
252 }
253
254 void ExpandBuffer() {
255 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
256 MemCopy(new_store.start(), backing_store_.start(), position_);
257 backing_store_.Dispose();
258 backing_store_ = new_store;
259 }
260
261 void ConvertToTwoByte() {
262 DCHECK(is_one_byte_);
263 Vector<byte> new_store;
264 int new_content_size = position_ * kUC16Size;
265 if (new_content_size >= backing_store_.length()) {
266 // Ensure room for all currently read code units as UC16 as well
267 // as the code unit about to be stored.
268 new_store = Vector<byte>::New(NewCapacity(new_content_size));
269 } else {
270 new_store = backing_store_;
271 }
272 uint8_t* src = backing_store_.start();
273 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
274 for (int i = position_ - 1; i >= 0; i--) {
275 dst[i] = src[i];
276 }
277 if (new_store.start() != backing_store_.start()) {
278 backing_store_.Dispose();
279 backing_store_ = new_store;
280 }
281 position_ = new_content_size;
282 is_one_byte_ = false;
283 }
284
285 bool is_one_byte_;
286 int position_;
287 Vector<byte> backing_store_;
288
289 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
290 };
291
292
293 // ----------------------------------------------------------------------------
294 // JavaScript Scanner.
295
296 class Scanner {
297 public:
298 // Scoped helper for literal recording. Automatically drops the literal
299 // if aborting the scanning before it's complete.
300 class LiteralScope {
301 public:
302 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
303 scanner_->StartLiteral();
304 }
305 ~LiteralScope() {
306 if (!complete_) scanner_->DropLiteral();
307 }
308 void Complete() {
309 complete_ = true;
310 }
311
312 private:
313 Scanner* scanner_;
314 bool complete_;
315 };
316
317 // Scoped helper for a re-settable bookmark.
318 class BookmarkScope {
319 public:
320 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
321 DCHECK_NOT_NULL(scanner_);
322 }
323 ~BookmarkScope() { scanner_->DropBookmark(); }
324
325 bool Set() { return scanner_->SetBookmark(); }
326 void Reset() { scanner_->ResetToBookmark(); }
327 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
328 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
329
330 private:
331 Scanner* scanner_;
332
333 DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
334 };
335
336 // Representation of an interval of source positions.
337 struct Location {
338 Location(int b, int e) : beg_pos(b), end_pos(e) { }
339 Location() : beg_pos(0), end_pos(0) { }
340
341 bool IsValid() const {
342 return beg_pos >= 0 && end_pos >= beg_pos;
343 }
344
345 static Location invalid() { return Location(-1, -1); }
346
347 int beg_pos;
348 int end_pos;
349 };
350
351 // -1 is outside of the range of any real source code.
352 static const int kNoOctalLocation = -1;
353
354 explicit Scanner(UnicodeCache* scanner_contants);
355
356 void Initialize(Utf16CharacterStream* source);
357
358 // Returns the next token and advances input.
359 Token::Value Next();
360 // Returns the token following peek()
361 Token::Value PeekAhead();
362 // Returns the current token again.
363 Token::Value current_token() { return current_.token; }
364 // Returns the location information for the current token
365 // (the token last returned by Next()).
366 Location location() const { return current_.location; }
367
368 // Similar functions for the upcoming token.
369
370 // One token look-ahead (past the token returned by Next()).
371 Token::Value peek() const { return next_.token; }
372
373 Location peek_location() const { return next_.location; }
374
375 bool literal_contains_escapes() const {
376 return LiteralContainsEscapes(current_);
377 }
378 bool next_literal_contains_escapes() const {
379 return LiteralContainsEscapes(next_);
380 }
381 bool is_literal_contextual_keyword(Vector<const char> keyword) {
382 DCHECK_NOT_NULL(current_.literal_chars);
383 return current_.literal_chars->is_contextual_keyword(keyword);
384 }
385 bool is_next_contextual_keyword(Vector<const char> keyword) {
386 DCHECK_NOT_NULL(next_.literal_chars);
387 return next_.literal_chars->is_contextual_keyword(keyword);
388 }
389
390 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
391 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
392 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
393
394 double DoubleValue();
395 bool ContainsDot();
396 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
397 if (is_literal_one_byte() &&
398 literal_length() == length &&
399 (allow_escapes || !literal_contains_escapes())) {
400 const char* token =
401 reinterpret_cast<const char*>(literal_one_byte_string().start());
402 return !strncmp(token, data, length);
403 }
404 return false;
405 }
406 inline bool UnescapedLiteralMatches(const char* data, int length) {
407 return LiteralMatches(data, length, false);
408 }
409
410 void IsGetOrSet(bool* is_get, bool* is_set) {
411 if (is_literal_one_byte() &&
412 literal_length() == 3 &&
413 !literal_contains_escapes()) {
414 const char* token =
415 reinterpret_cast<const char*>(literal_one_byte_string().start());
416 *is_get = strncmp(token, "get", 3) == 0;
417 *is_set = !*is_get && strncmp(token, "set", 3) == 0;
418 }
419 }
420
421 int FindSymbol(DuplicateFinder* finder, int value);
422
423 UnicodeCache* unicode_cache() { return unicode_cache_; }
424
425 // Returns the location of the last seen octal literal.
426 Location octal_position() const { return octal_pos_; }
427 void clear_octal_position() { octal_pos_ = Location::invalid(); }
428
429 // Returns the value of the last smi that was scanned.
430 int smi_value() const { return current_.smi_value_; }
431
432 // Seek forward to the given position. This operation does not
433 // work in general, for instance when there are pushed back
434 // characters, but works for seeking forward until simple delimiter
435 // tokens, which is what it is used for.
436 void SeekForward(int pos);
437
438 // Returns true if there was a line terminator before the peek'ed token,
439 // possibly inside a multi-line comment.
440 bool HasAnyLineTerminatorBeforeNext() const {
441 return has_line_terminator_before_next_ ||
442 has_multiline_comment_before_next_;
443 }
444
445 // Scans the input as a regular expression pattern, previous
446 // character(s) must be /(=). Returns true if a pattern is scanned.
447 bool ScanRegExpPattern(bool seen_equal);
448 // Scans the input as regular expression flags. Returns the flags on success.
449 Maybe<RegExp::Flags> ScanRegExpFlags();
450
451 // Scans the input as a template literal
452 Token::Value ScanTemplateStart();
453 Token::Value ScanTemplateContinuation();
454
455 const LiteralBuffer* source_url() const { return &source_url_; }
456 const LiteralBuffer* source_mapping_url() const {
457 return &source_mapping_url_;
458 }
459
460 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
461
462 private:
463 // The current and look-ahead token.
464 struct TokenDesc {
465 Token::Value token;
466 Location location;
467 LiteralBuffer* literal_chars;
468 LiteralBuffer* raw_literal_chars;
469 int smi_value_;
470 };
471
472 static const int kCharacterLookaheadBufferSize = 1;
473
474 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
475 template <bool capture_raw>
476 uc32 ScanOctalEscape(uc32 c, int length);
477
478 // Call this after setting source_ to the input.
479 void Init() {
480 // Set c0_ (one character ahead)
481 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
482 Advance();
483 // Initialize current_ to not refer to a literal.
484 current_.literal_chars = NULL;
485 current_.raw_literal_chars = NULL;
486 next_next_.token = Token::UNINITIALIZED;
487 }
488
489 // Support BookmarkScope functionality.
490 bool SetBookmark();
491 void ResetToBookmark();
492 bool BookmarkHasBeenSet();
493 bool BookmarkHasBeenReset();
494 void DropBookmark();
495 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
496
497 // Literal buffer support
498 inline void StartLiteral() {
499 LiteralBuffer* free_buffer =
500 (current_.literal_chars == &literal_buffer0_)
501 ? &literal_buffer1_
502 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
503 : &literal_buffer0_;
504 free_buffer->Reset();
505 next_.literal_chars = free_buffer;
506 }
507
508 inline void StartRawLiteral() {
509 LiteralBuffer* free_buffer =
510 (current_.raw_literal_chars == &raw_literal_buffer0_)
511 ? &raw_literal_buffer1_
512 : (current_.raw_literal_chars == &raw_literal_buffer1_)
513 ? &raw_literal_buffer2_
514 : &raw_literal_buffer0_;
515 free_buffer->Reset();
516 next_.raw_literal_chars = free_buffer;
517 }
518
519 INLINE(void AddLiteralChar(uc32 c)) {
520 DCHECK_NOT_NULL(next_.literal_chars);
521 next_.literal_chars->AddChar(c);
522 }
523
524 INLINE(void AddRawLiteralChar(uc32 c)) {
525 DCHECK_NOT_NULL(next_.raw_literal_chars);
526 next_.raw_literal_chars->AddChar(c);
527 }
528
529 INLINE(void ReduceRawLiteralLength(int delta)) {
530 DCHECK_NOT_NULL(next_.raw_literal_chars);
531 next_.raw_literal_chars->ReduceLength(delta);
532 }
533
534 // Stops scanning of a literal and drop the collected characters,
535 // e.g., due to an encountered error.
536 inline void DropLiteral() {
537 next_.literal_chars = NULL;
538 next_.raw_literal_chars = NULL;
539 }
540
541 inline void AddLiteralCharAdvance() {
542 AddLiteralChar(c0_);
543 Advance();
544 }
545
546 // Low-level scanning support.
547 template <bool capture_raw = false, bool check_surrogate = true>
548 void Advance() {
549 if (capture_raw) {
550 AddRawLiteralChar(c0_);
551 }
552 c0_ = source_->Advance();
553 if (check_surrogate) HandleLeadSurrogate();
554 }
555
556 void HandleLeadSurrogate() {
557 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
558 uc32 c1 = source_->Advance();
559 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
560 source_->PushBack(c1);
561 } else {
562 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
563 }
564 }
565 }
566
567 void PushBack(uc32 ch) {
568 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
569 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
570 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
571 } else {
572 source_->PushBack(c0_);
573 }
574 c0_ = ch;
575 }
576
577 inline Token::Value Select(Token::Value tok) {
578 Advance();
579 return tok;
580 }
581
582 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
583 Advance();
584 if (c0_ == next) {
585 Advance();
586 return then;
587 } else {
588 return else_;
589 }
590 }
591
592 // Returns the literal string, if any, for the current token (the
593 // token last returned by Next()). The string is 0-terminated.
594 // Literal strings are collected for identifiers, strings, numbers as well
595 // as for template literals. For template literals we also collect the raw
596 // form.
597 // These functions only give the correct result if the literal was scanned
598 // when a LiteralScope object is alive.
599 Vector<const uint8_t> literal_one_byte_string() {
600 DCHECK_NOT_NULL(current_.literal_chars);
601 return current_.literal_chars->one_byte_literal();
602 }
603 Vector<const uint16_t> literal_two_byte_string() {
604 DCHECK_NOT_NULL(current_.literal_chars);
605 return current_.literal_chars->two_byte_literal();
606 }
607 bool is_literal_one_byte() {
608 DCHECK_NOT_NULL(current_.literal_chars);
609 return current_.literal_chars->is_one_byte();
610 }
611 int literal_length() const {
612 DCHECK_NOT_NULL(current_.literal_chars);
613 return current_.literal_chars->length();
614 }
615 // Returns the literal string for the next token (the token that
616 // would be returned if Next() were called).
617 Vector<const uint8_t> next_literal_one_byte_string() {
618 DCHECK_NOT_NULL(next_.literal_chars);
619 return next_.literal_chars->one_byte_literal();
620 }
621 Vector<const uint16_t> next_literal_two_byte_string() {
622 DCHECK_NOT_NULL(next_.literal_chars);
623 return next_.literal_chars->two_byte_literal();
624 }
625 bool is_next_literal_one_byte() {
626 DCHECK_NOT_NULL(next_.literal_chars);
627 return next_.literal_chars->is_one_byte();
628 }
629 Vector<const uint8_t> raw_literal_one_byte_string() {
630 DCHECK_NOT_NULL(current_.raw_literal_chars);
631 return current_.raw_literal_chars->one_byte_literal();
632 }
633 Vector<const uint16_t> raw_literal_two_byte_string() {
634 DCHECK_NOT_NULL(current_.raw_literal_chars);
635 return current_.raw_literal_chars->two_byte_literal();
636 }
637 bool is_raw_literal_one_byte() {
638 DCHECK_NOT_NULL(current_.raw_literal_chars);
639 return current_.raw_literal_chars->is_one_byte();
640 }
641
642 template <bool capture_raw>
643 uc32 ScanHexNumber(int expected_length);
644 // Scan a number of any length but not bigger than max_value. For example, the
645 // number can be 000000001, so it's very long in characters but its value is
646 // small.
647 template <bool capture_raw>
648 uc32 ScanUnlimitedLengthHexNumber(int max_value);
649
650 // Scans a single JavaScript token.
651 void Scan();
652
653 bool SkipWhiteSpace();
654 Token::Value SkipSingleLineComment();
655 Token::Value SkipSourceURLComment();
656 void TryToParseSourceURLComment();
657 Token::Value SkipMultiLineComment();
658 // Scans a possible HTML comment -- begins with '<!'.
659 Token::Value ScanHtmlComment();
660
661 void ScanDecimalDigits();
662 Token::Value ScanNumber(bool seen_period);
663 Token::Value ScanIdentifierOrKeyword();
664 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
665
666 Token::Value ScanString();
667
668 // Scans an escape-sequence which is part of a string and adds the
669 // decoded character to the current literal. Returns true if a pattern
670 // is scanned.
671 template <bool capture_raw, bool in_template_literal>
672 bool ScanEscape();
673
674 // Decodes a Unicode escape-sequence which is part of an identifier.
675 // If the escape sequence cannot be decoded the result is kBadChar.
676 uc32 ScanIdentifierUnicodeEscape();
677 // Helper for the above functions.
678 template <bool capture_raw>
679 uc32 ScanUnicodeEscape();
680
681 Token::Value ScanTemplateSpan();
682
683 // Return the current source position.
684 int source_pos() {
685 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
686 }
687
688 static bool LiteralContainsEscapes(const TokenDesc& token) {
689 Location location = token.location;
690 int source_length = (location.end_pos - location.beg_pos);
691 if (token.token == Token::STRING) {
692 // Subtract delimiters.
693 source_length -= 2;
694 }
695 return token.literal_chars->length() != source_length;
696 }
697
698 UnicodeCache* unicode_cache_;
699
700 // Buffers collecting literal strings, numbers, etc.
701 LiteralBuffer literal_buffer0_;
702 LiteralBuffer literal_buffer1_;
703 LiteralBuffer literal_buffer2_;
704
705 // Values parsed from magic comments.
706 LiteralBuffer source_url_;
707 LiteralBuffer source_mapping_url_;
708
709 // Buffer to store raw string values
710 LiteralBuffer raw_literal_buffer0_;
711 LiteralBuffer raw_literal_buffer1_;
712 LiteralBuffer raw_literal_buffer2_;
713
714 TokenDesc current_; // desc for current token (as returned by Next())
715 TokenDesc next_; // desc for next token (one token look-ahead)
716 TokenDesc next_next_; // desc for the token after next (after PeakAhead())
717
718 // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
719 // These variables contain the scanner state when a bookmark is set.
720 //
721 // We will use bookmark_c0_ as a 'control' variable, where:
722 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
723 // - bookmark_c0_ == -1: No bookmark has been set.
724 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
725 //
726 // Which state is being bookmarked? The parser state is distributed over
727 // several variables, roughly like this:
728 // ... 1234 + 5678 ..... [character stream]
729 // [current_] [next_] c0_ | [scanner state]
730 // So when the scanner is logically at the beginning of an expression
731 // like "1234 + 4567", then:
732 // - current_ contains "1234"
733 // - next_ contains "+"
734 // - c0_ contains ' ' (the space between "+" and "5678",
735 // - the source_ character stream points to the beginning of "5678".
736 // To be able to restore this state, we will keep copies of current_, next_,
737 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
738 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
739 static const uc32 kNoBookmark = -1;
740 static const uc32 kBookmarkWasApplied = -2;
741 uc32 bookmark_c0_;
742 TokenDesc bookmark_current_;
743 TokenDesc bookmark_next_;
744 LiteralBuffer bookmark_current_literal_;
745 LiteralBuffer bookmark_current_raw_literal_;
746 LiteralBuffer bookmark_next_literal_;
747 LiteralBuffer bookmark_next_raw_literal_;
748
749 // Input stream. Must be initialized to an Utf16CharacterStream.
750 Utf16CharacterStream* source_;
751
752
753 // Start position of the octal literal last scanned.
754 Location octal_pos_;
755
756 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
757 uc32 c0_;
758
759 // Whether there is a line terminator whitespace character after
760 // the current token, and before the next. Does not count newlines
761 // inside multiline comments.
762 bool has_line_terminator_before_next_;
763 // Whether there is a multi-line comment that contains a
764 // line-terminator after the current token, and before the next.
765 bool has_multiline_comment_before_next_;
766 };
767
768 } // namespace internal
769 } // namespace v8
770
771 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/runtime/runtime-scopes.cc ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698