OLD | NEW |
| (Empty) |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Features shared by parsing and pre-parsing scanners. | |
6 | |
7 #ifndef V8_SCANNER_H_ | |
8 #define V8_SCANNER_H_ | |
9 | |
10 #include "src/allocation.h" | |
11 #include "src/base/logging.h" | |
12 #include "src/char-predicates.h" | |
13 #include "src/globals.h" | |
14 #include "src/hashmap.h" | |
15 #include "src/list.h" | |
16 #include "src/token.h" | |
17 #include "src/unicode.h" | |
18 #include "src/unicode-decoder.h" | |
19 #include "src/utils.h" | |
20 | |
21 namespace v8 { | |
22 namespace internal { | |
23 | |
24 | |
25 class AstRawString; | |
26 class AstValueFactory; | |
27 class ParserRecorder; | |
28 class UnicodeCache; | |
29 | |
30 | |
31 // Returns the value (0 .. 15) of a hexadecimal character c. | |
32 // If c is not a legal hexadecimal character, returns a value < 0. | |
33 inline int HexValue(uc32 c) { | |
34 c -= '0'; | |
35 if (static_cast<unsigned>(c) <= 9) return c; | |
36 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | |
37 if (static_cast<unsigned>(c) <= 5) return c + 10; | |
38 return -1; | |
39 } | |
40 | |
41 | |
42 // --------------------------------------------------------------------- | |
43 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. | |
44 // A code unit is a 16 bit value representing either a 16 bit code point | |
45 // or one part of a surrogate pair that make a single 21 bit code point. | |
46 | |
47 class Utf16CharacterStream { | |
48 public: | |
49 Utf16CharacterStream() : pos_(0) { } | |
50 virtual ~Utf16CharacterStream() { } | |
51 | |
52 // Returns and advances past the next UTF-16 code unit in the input | |
53 // stream. If there are no more code units, it returns a negative | |
54 // value. | |
55 inline uc32 Advance() { | |
56 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { | |
57 pos_++; | |
58 return static_cast<uc32>(*(buffer_cursor_++)); | |
59 } | |
60 // Note: currently the following increment is necessary to avoid a | |
61 // parser problem! The scanner treats the final kEndOfInput as | |
62 // a code unit with a position, and does math relative to that | |
63 // position. | |
64 pos_++; | |
65 | |
66 return kEndOfInput; | |
67 } | |
68 | |
69 // Return the current position in the code unit stream. | |
70 // Starts at zero. | |
71 inline size_t pos() const { return pos_; } | |
72 | |
73 // Skips forward past the next code_unit_count UTF-16 code units | |
74 // in the input, or until the end of input if that comes sooner. | |
75 // Returns the number of code units actually skipped. If less | |
76 // than code_unit_count, | |
77 inline size_t SeekForward(size_t code_unit_count) { | |
78 size_t buffered_chars = buffer_end_ - buffer_cursor_; | |
79 if (code_unit_count <= buffered_chars) { | |
80 buffer_cursor_ += code_unit_count; | |
81 pos_ += code_unit_count; | |
82 return code_unit_count; | |
83 } | |
84 return SlowSeekForward(code_unit_count); | |
85 } | |
86 | |
87 // Pushes back the most recently read UTF-16 code unit (or negative | |
88 // value if at end of input), i.e., the value returned by the most recent | |
89 // call to Advance. | |
90 // Must not be used right after calling SeekForward. | |
91 virtual void PushBack(int32_t code_unit) = 0; | |
92 | |
93 virtual bool SetBookmark(); | |
94 virtual void ResetToBookmark(); | |
95 | |
96 protected: | |
97 static const uc32 kEndOfInput = -1; | |
98 | |
99 // Ensures that the buffer_cursor_ points to the code_unit at | |
100 // position pos_ of the input, if possible. If the position | |
101 // is at or after the end of the input, return false. If there | |
102 // are more code_units available, return true. | |
103 virtual bool ReadBlock() = 0; | |
104 virtual size_t SlowSeekForward(size_t code_unit_count) = 0; | |
105 | |
106 const uint16_t* buffer_cursor_; | |
107 const uint16_t* buffer_end_; | |
108 size_t pos_; | |
109 }; | |
110 | |
111 | |
112 // --------------------------------------------------------------------- | |
113 // DuplicateFinder discovers duplicate symbols. | |
114 | |
115 class DuplicateFinder { | |
116 public: | |
117 explicit DuplicateFinder(UnicodeCache* constants) | |
118 : unicode_constants_(constants), | |
119 backing_store_(16), | |
120 map_(&Match) { } | |
121 | |
122 int AddOneByteSymbol(Vector<const uint8_t> key, int value); | |
123 int AddTwoByteSymbol(Vector<const uint16_t> key, int value); | |
124 // Add a a number literal by converting it (if necessary) | |
125 // to the string that ToString(ToNumber(literal)) would generate. | |
126 // and then adding that string with AddOneByteSymbol. | |
127 // This string is the actual value used as key in an object literal, | |
128 // and the one that must be different from the other keys. | |
129 int AddNumber(Vector<const uint8_t> key, int value); | |
130 | |
131 private: | |
132 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value); | |
133 // Backs up the key and its length in the backing store. | |
134 // The backup is stored with a base 127 encoding of the | |
135 // length (plus a bit saying whether the string is one byte), | |
136 // followed by the bytes of the key. | |
137 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte); | |
138 | |
139 // Compare two encoded keys (both pointing into the backing store) | |
140 // for having the same base-127 encoded lengths and representation. | |
141 // and then having the same 'length' bytes following. | |
142 static bool Match(void* first, void* second); | |
143 // Creates a hash from a sequence of bytes. | |
144 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte); | |
145 // Checks whether a string containing a JS number is its canonical | |
146 // form. | |
147 static bool IsNumberCanonical(Vector<const uint8_t> key); | |
148 | |
149 // Size of buffer. Sufficient for using it to call DoubleToCString in | |
150 // from conversions.h. | |
151 static const int kBufferSize = 100; | |
152 | |
153 UnicodeCache* unicode_constants_; | |
154 // Backing store used to store strings used as hashmap keys. | |
155 SequenceCollector<unsigned char> backing_store_; | |
156 HashMap map_; | |
157 // Buffer used for string->number->canonical string conversions. | |
158 char number_buffer_[kBufferSize]; | |
159 }; | |
160 | |
161 | |
162 // ---------------------------------------------------------------------------- | |
163 // LiteralBuffer - Collector of chars of literals. | |
164 | |
165 class LiteralBuffer { | |
166 public: | |
167 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } | |
168 | |
169 ~LiteralBuffer() { backing_store_.Dispose(); } | |
170 | |
171 INLINE(void AddChar(uint32_t code_unit)) { | |
172 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
173 if (is_one_byte_) { | |
174 if (code_unit <= unibrow::Latin1::kMaxChar) { | |
175 backing_store_[position_] = static_cast<byte>(code_unit); | |
176 position_ += kOneByteSize; | |
177 return; | |
178 } | |
179 ConvertToTwoByte(); | |
180 } | |
181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
182 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; | |
183 position_ += kUC16Size; | |
184 } else { | |
185 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
186 unibrow::Utf16::LeadSurrogate(code_unit); | |
187 position_ += kUC16Size; | |
188 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
189 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
190 unibrow::Utf16::TrailSurrogate(code_unit); | |
191 position_ += kUC16Size; | |
192 } | |
193 } | |
194 | |
195 bool is_one_byte() const { return is_one_byte_; } | |
196 | |
197 bool is_contextual_keyword(Vector<const char> keyword) const { | |
198 return is_one_byte() && keyword.length() == position_ && | |
199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | |
200 } | |
201 | |
202 Vector<const uint16_t> two_byte_literal() const { | |
203 DCHECK(!is_one_byte_); | |
204 DCHECK((position_ & 0x1) == 0); | |
205 return Vector<const uint16_t>( | |
206 reinterpret_cast<const uint16_t*>(backing_store_.start()), | |
207 position_ >> 1); | |
208 } | |
209 | |
210 Vector<const uint8_t> one_byte_literal() const { | |
211 DCHECK(is_one_byte_); | |
212 return Vector<const uint8_t>( | |
213 reinterpret_cast<const uint8_t*>(backing_store_.start()), | |
214 position_); | |
215 } | |
216 | |
217 int length() const { | |
218 return is_one_byte_ ? position_ : (position_ >> 1); | |
219 } | |
220 | |
221 void ReduceLength(int delta) { | |
222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
223 } | |
224 | |
225 void Reset() { | |
226 position_ = 0; | |
227 is_one_byte_ = true; | |
228 } | |
229 | |
230 Handle<String> Internalize(Isolate* isolate) const; | |
231 | |
232 void CopyFrom(const LiteralBuffer* other) { | |
233 if (other == nullptr) { | |
234 Reset(); | |
235 } else { | |
236 is_one_byte_ = other->is_one_byte_; | |
237 position_ = other->position_; | |
238 backing_store_.Dispose(); | |
239 backing_store_ = other->backing_store_.Clone(); | |
240 } | |
241 } | |
242 | |
243 private: | |
244 static const int kInitialCapacity = 16; | |
245 static const int kGrowthFactory = 4; | |
246 static const int kMinConversionSlack = 256; | |
247 static const int kMaxGrowth = 1 * MB; | |
248 inline int NewCapacity(int min_capacity) { | |
249 int capacity = Max(min_capacity, backing_store_.length()); | |
250 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | |
251 return new_capacity; | |
252 } | |
253 | |
254 void ExpandBuffer() { | |
255 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | |
256 MemCopy(new_store.start(), backing_store_.start(), position_); | |
257 backing_store_.Dispose(); | |
258 backing_store_ = new_store; | |
259 } | |
260 | |
261 void ConvertToTwoByte() { | |
262 DCHECK(is_one_byte_); | |
263 Vector<byte> new_store; | |
264 int new_content_size = position_ * kUC16Size; | |
265 if (new_content_size >= backing_store_.length()) { | |
266 // Ensure room for all currently read code units as UC16 as well | |
267 // as the code unit about to be stored. | |
268 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | |
269 } else { | |
270 new_store = backing_store_; | |
271 } | |
272 uint8_t* src = backing_store_.start(); | |
273 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); | |
274 for (int i = position_ - 1; i >= 0; i--) { | |
275 dst[i] = src[i]; | |
276 } | |
277 if (new_store.start() != backing_store_.start()) { | |
278 backing_store_.Dispose(); | |
279 backing_store_ = new_store; | |
280 } | |
281 position_ = new_content_size; | |
282 is_one_byte_ = false; | |
283 } | |
284 | |
285 bool is_one_byte_; | |
286 int position_; | |
287 Vector<byte> backing_store_; | |
288 | |
289 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | |
290 }; | |
291 | |
292 | |
293 // ---------------------------------------------------------------------------- | |
294 // JavaScript Scanner. | |
295 | |
296 class Scanner { | |
297 public: | |
298 // Scoped helper for literal recording. Automatically drops the literal | |
299 // if aborting the scanning before it's complete. | |
300 class LiteralScope { | |
301 public: | |
302 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | |
303 scanner_->StartLiteral(); | |
304 } | |
305 ~LiteralScope() { | |
306 if (!complete_) scanner_->DropLiteral(); | |
307 } | |
308 void Complete() { | |
309 complete_ = true; | |
310 } | |
311 | |
312 private: | |
313 Scanner* scanner_; | |
314 bool complete_; | |
315 }; | |
316 | |
317 // Scoped helper for a re-settable bookmark. | |
318 class BookmarkScope { | |
319 public: | |
320 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { | |
321 DCHECK_NOT_NULL(scanner_); | |
322 } | |
323 ~BookmarkScope() { scanner_->DropBookmark(); } | |
324 | |
325 bool Set() { return scanner_->SetBookmark(); } | |
326 void Reset() { scanner_->ResetToBookmark(); } | |
327 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); } | |
328 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); } | |
329 | |
330 private: | |
331 Scanner* scanner_; | |
332 | |
333 DISALLOW_COPY_AND_ASSIGN(BookmarkScope); | |
334 }; | |
335 | |
336 // Representation of an interval of source positions. | |
337 struct Location { | |
338 Location(int b, int e) : beg_pos(b), end_pos(e) { } | |
339 Location() : beg_pos(0), end_pos(0) { } | |
340 | |
341 bool IsValid() const { | |
342 return beg_pos >= 0 && end_pos >= beg_pos; | |
343 } | |
344 | |
345 static Location invalid() { return Location(-1, -1); } | |
346 | |
347 int beg_pos; | |
348 int end_pos; | |
349 }; | |
350 | |
351 // -1 is outside of the range of any real source code. | |
352 static const int kNoOctalLocation = -1; | |
353 | |
354 explicit Scanner(UnicodeCache* scanner_contants); | |
355 | |
356 void Initialize(Utf16CharacterStream* source); | |
357 | |
358 // Returns the next token and advances input. | |
359 Token::Value Next(); | |
360 // Returns the token following peek() | |
361 Token::Value PeekAhead(); | |
362 // Returns the current token again. | |
363 Token::Value current_token() { return current_.token; } | |
364 // Returns the location information for the current token | |
365 // (the token last returned by Next()). | |
366 Location location() const { return current_.location; } | |
367 | |
368 // Similar functions for the upcoming token. | |
369 | |
370 // One token look-ahead (past the token returned by Next()). | |
371 Token::Value peek() const { return next_.token; } | |
372 | |
373 Location peek_location() const { return next_.location; } | |
374 | |
375 bool literal_contains_escapes() const { | |
376 return LiteralContainsEscapes(current_); | |
377 } | |
378 bool next_literal_contains_escapes() const { | |
379 return LiteralContainsEscapes(next_); | |
380 } | |
381 bool is_literal_contextual_keyword(Vector<const char> keyword) { | |
382 DCHECK_NOT_NULL(current_.literal_chars); | |
383 return current_.literal_chars->is_contextual_keyword(keyword); | |
384 } | |
385 bool is_next_contextual_keyword(Vector<const char> keyword) { | |
386 DCHECK_NOT_NULL(next_.literal_chars); | |
387 return next_.literal_chars->is_contextual_keyword(keyword); | |
388 } | |
389 | |
390 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | |
391 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | |
392 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | |
393 | |
394 double DoubleValue(); | |
395 bool ContainsDot(); | |
396 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | |
397 if (is_literal_one_byte() && | |
398 literal_length() == length && | |
399 (allow_escapes || !literal_contains_escapes())) { | |
400 const char* token = | |
401 reinterpret_cast<const char*>(literal_one_byte_string().start()); | |
402 return !strncmp(token, data, length); | |
403 } | |
404 return false; | |
405 } | |
406 inline bool UnescapedLiteralMatches(const char* data, int length) { | |
407 return LiteralMatches(data, length, false); | |
408 } | |
409 | |
410 void IsGetOrSet(bool* is_get, bool* is_set) { | |
411 if (is_literal_one_byte() && | |
412 literal_length() == 3 && | |
413 !literal_contains_escapes()) { | |
414 const char* token = | |
415 reinterpret_cast<const char*>(literal_one_byte_string().start()); | |
416 *is_get = strncmp(token, "get", 3) == 0; | |
417 *is_set = !*is_get && strncmp(token, "set", 3) == 0; | |
418 } | |
419 } | |
420 | |
421 int FindSymbol(DuplicateFinder* finder, int value); | |
422 | |
423 UnicodeCache* unicode_cache() { return unicode_cache_; } | |
424 | |
425 // Returns the location of the last seen octal literal. | |
426 Location octal_position() const { return octal_pos_; } | |
427 void clear_octal_position() { octal_pos_ = Location::invalid(); } | |
428 | |
429 // Returns the value of the last smi that was scanned. | |
430 int smi_value() const { return current_.smi_value_; } | |
431 | |
432 // Seek forward to the given position. This operation does not | |
433 // work in general, for instance when there are pushed back | |
434 // characters, but works for seeking forward until simple delimiter | |
435 // tokens, which is what it is used for. | |
436 void SeekForward(int pos); | |
437 | |
438 // Returns true if there was a line terminator before the peek'ed token, | |
439 // possibly inside a multi-line comment. | |
440 bool HasAnyLineTerminatorBeforeNext() const { | |
441 return has_line_terminator_before_next_ || | |
442 has_multiline_comment_before_next_; | |
443 } | |
444 | |
445 // Scans the input as a regular expression pattern, previous | |
446 // character(s) must be /(=). Returns true if a pattern is scanned. | |
447 bool ScanRegExpPattern(bool seen_equal); | |
448 // Scans the input as regular expression flags. Returns the flags on success. | |
449 Maybe<RegExp::Flags> ScanRegExpFlags(); | |
450 | |
451 // Scans the input as a template literal | |
452 Token::Value ScanTemplateStart(); | |
453 Token::Value ScanTemplateContinuation(); | |
454 | |
455 const LiteralBuffer* source_url() const { return &source_url_; } | |
456 const LiteralBuffer* source_mapping_url() const { | |
457 return &source_mapping_url_; | |
458 } | |
459 | |
460 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | |
461 | |
462 private: | |
463 // The current and look-ahead token. | |
464 struct TokenDesc { | |
465 Token::Value token; | |
466 Location location; | |
467 LiteralBuffer* literal_chars; | |
468 LiteralBuffer* raw_literal_chars; | |
469 int smi_value_; | |
470 }; | |
471 | |
472 static const int kCharacterLookaheadBufferSize = 1; | |
473 | |
474 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | |
475 template <bool capture_raw> | |
476 uc32 ScanOctalEscape(uc32 c, int length); | |
477 | |
478 // Call this after setting source_ to the input. | |
479 void Init() { | |
480 // Set c0_ (one character ahead) | |
481 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | |
482 Advance(); | |
483 // Initialize current_ to not refer to a literal. | |
484 current_.literal_chars = NULL; | |
485 current_.raw_literal_chars = NULL; | |
486 next_next_.token = Token::UNINITIALIZED; | |
487 } | |
488 | |
489 // Support BookmarkScope functionality. | |
490 bool SetBookmark(); | |
491 void ResetToBookmark(); | |
492 bool BookmarkHasBeenSet(); | |
493 bool BookmarkHasBeenReset(); | |
494 void DropBookmark(); | |
495 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from); | |
496 | |
497 // Literal buffer support | |
498 inline void StartLiteral() { | |
499 LiteralBuffer* free_buffer = | |
500 (current_.literal_chars == &literal_buffer0_) | |
501 ? &literal_buffer1_ | |
502 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_ | |
503 : &literal_buffer0_; | |
504 free_buffer->Reset(); | |
505 next_.literal_chars = free_buffer; | |
506 } | |
507 | |
508 inline void StartRawLiteral() { | |
509 LiteralBuffer* free_buffer = | |
510 (current_.raw_literal_chars == &raw_literal_buffer0_) | |
511 ? &raw_literal_buffer1_ | |
512 : (current_.raw_literal_chars == &raw_literal_buffer1_) | |
513 ? &raw_literal_buffer2_ | |
514 : &raw_literal_buffer0_; | |
515 free_buffer->Reset(); | |
516 next_.raw_literal_chars = free_buffer; | |
517 } | |
518 | |
519 INLINE(void AddLiteralChar(uc32 c)) { | |
520 DCHECK_NOT_NULL(next_.literal_chars); | |
521 next_.literal_chars->AddChar(c); | |
522 } | |
523 | |
524 INLINE(void AddRawLiteralChar(uc32 c)) { | |
525 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
526 next_.raw_literal_chars->AddChar(c); | |
527 } | |
528 | |
529 INLINE(void ReduceRawLiteralLength(int delta)) { | |
530 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
531 next_.raw_literal_chars->ReduceLength(delta); | |
532 } | |
533 | |
534 // Stops scanning of a literal and drop the collected characters, | |
535 // e.g., due to an encountered error. | |
536 inline void DropLiteral() { | |
537 next_.literal_chars = NULL; | |
538 next_.raw_literal_chars = NULL; | |
539 } | |
540 | |
541 inline void AddLiteralCharAdvance() { | |
542 AddLiteralChar(c0_); | |
543 Advance(); | |
544 } | |
545 | |
546 // Low-level scanning support. | |
547 template <bool capture_raw = false, bool check_surrogate = true> | |
548 void Advance() { | |
549 if (capture_raw) { | |
550 AddRawLiteralChar(c0_); | |
551 } | |
552 c0_ = source_->Advance(); | |
553 if (check_surrogate) HandleLeadSurrogate(); | |
554 } | |
555 | |
556 void HandleLeadSurrogate() { | |
557 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | |
558 uc32 c1 = source_->Advance(); | |
559 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | |
560 source_->PushBack(c1); | |
561 } else { | |
562 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | |
563 } | |
564 } | |
565 } | |
566 | |
567 void PushBack(uc32 ch) { | |
568 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | |
569 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | |
570 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | |
571 } else { | |
572 source_->PushBack(c0_); | |
573 } | |
574 c0_ = ch; | |
575 } | |
576 | |
577 inline Token::Value Select(Token::Value tok) { | |
578 Advance(); | |
579 return tok; | |
580 } | |
581 | |
582 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | |
583 Advance(); | |
584 if (c0_ == next) { | |
585 Advance(); | |
586 return then; | |
587 } else { | |
588 return else_; | |
589 } | |
590 } | |
591 | |
592 // Returns the literal string, if any, for the current token (the | |
593 // token last returned by Next()). The string is 0-terminated. | |
594 // Literal strings are collected for identifiers, strings, numbers as well | |
595 // as for template literals. For template literals we also collect the raw | |
596 // form. | |
597 // These functions only give the correct result if the literal was scanned | |
598 // when a LiteralScope object is alive. | |
599 Vector<const uint8_t> literal_one_byte_string() { | |
600 DCHECK_NOT_NULL(current_.literal_chars); | |
601 return current_.literal_chars->one_byte_literal(); | |
602 } | |
603 Vector<const uint16_t> literal_two_byte_string() { | |
604 DCHECK_NOT_NULL(current_.literal_chars); | |
605 return current_.literal_chars->two_byte_literal(); | |
606 } | |
607 bool is_literal_one_byte() { | |
608 DCHECK_NOT_NULL(current_.literal_chars); | |
609 return current_.literal_chars->is_one_byte(); | |
610 } | |
611 int literal_length() const { | |
612 DCHECK_NOT_NULL(current_.literal_chars); | |
613 return current_.literal_chars->length(); | |
614 } | |
615 // Returns the literal string for the next token (the token that | |
616 // would be returned if Next() were called). | |
617 Vector<const uint8_t> next_literal_one_byte_string() { | |
618 DCHECK_NOT_NULL(next_.literal_chars); | |
619 return next_.literal_chars->one_byte_literal(); | |
620 } | |
621 Vector<const uint16_t> next_literal_two_byte_string() { | |
622 DCHECK_NOT_NULL(next_.literal_chars); | |
623 return next_.literal_chars->two_byte_literal(); | |
624 } | |
625 bool is_next_literal_one_byte() { | |
626 DCHECK_NOT_NULL(next_.literal_chars); | |
627 return next_.literal_chars->is_one_byte(); | |
628 } | |
629 Vector<const uint8_t> raw_literal_one_byte_string() { | |
630 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
631 return current_.raw_literal_chars->one_byte_literal(); | |
632 } | |
633 Vector<const uint16_t> raw_literal_two_byte_string() { | |
634 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
635 return current_.raw_literal_chars->two_byte_literal(); | |
636 } | |
637 bool is_raw_literal_one_byte() { | |
638 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
639 return current_.raw_literal_chars->is_one_byte(); | |
640 } | |
641 | |
642 template <bool capture_raw> | |
643 uc32 ScanHexNumber(int expected_length); | |
644 // Scan a number of any length but not bigger than max_value. For example, the | |
645 // number can be 000000001, so it's very long in characters but its value is | |
646 // small. | |
647 template <bool capture_raw> | |
648 uc32 ScanUnlimitedLengthHexNumber(int max_value); | |
649 | |
650 // Scans a single JavaScript token. | |
651 void Scan(); | |
652 | |
653 bool SkipWhiteSpace(); | |
654 Token::Value SkipSingleLineComment(); | |
655 Token::Value SkipSourceURLComment(); | |
656 void TryToParseSourceURLComment(); | |
657 Token::Value SkipMultiLineComment(); | |
658 // Scans a possible HTML comment -- begins with '<!'. | |
659 Token::Value ScanHtmlComment(); | |
660 | |
661 void ScanDecimalDigits(); | |
662 Token::Value ScanNumber(bool seen_period); | |
663 Token::Value ScanIdentifierOrKeyword(); | |
664 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped); | |
665 | |
666 Token::Value ScanString(); | |
667 | |
668 // Scans an escape-sequence which is part of a string and adds the | |
669 // decoded character to the current literal. Returns true if a pattern | |
670 // is scanned. | |
671 template <bool capture_raw, bool in_template_literal> | |
672 bool ScanEscape(); | |
673 | |
674 // Decodes a Unicode escape-sequence which is part of an identifier. | |
675 // If the escape sequence cannot be decoded the result is kBadChar. | |
676 uc32 ScanIdentifierUnicodeEscape(); | |
677 // Helper for the above functions. | |
678 template <bool capture_raw> | |
679 uc32 ScanUnicodeEscape(); | |
680 | |
681 Token::Value ScanTemplateSpan(); | |
682 | |
683 // Return the current source position. | |
684 int source_pos() { | |
685 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; | |
686 } | |
687 | |
688 static bool LiteralContainsEscapes(const TokenDesc& token) { | |
689 Location location = token.location; | |
690 int source_length = (location.end_pos - location.beg_pos); | |
691 if (token.token == Token::STRING) { | |
692 // Subtract delimiters. | |
693 source_length -= 2; | |
694 } | |
695 return token.literal_chars->length() != source_length; | |
696 } | |
697 | |
698 UnicodeCache* unicode_cache_; | |
699 | |
700 // Buffers collecting literal strings, numbers, etc. | |
701 LiteralBuffer literal_buffer0_; | |
702 LiteralBuffer literal_buffer1_; | |
703 LiteralBuffer literal_buffer2_; | |
704 | |
705 // Values parsed from magic comments. | |
706 LiteralBuffer source_url_; | |
707 LiteralBuffer source_mapping_url_; | |
708 | |
709 // Buffer to store raw string values | |
710 LiteralBuffer raw_literal_buffer0_; | |
711 LiteralBuffer raw_literal_buffer1_; | |
712 LiteralBuffer raw_literal_buffer2_; | |
713 | |
714 TokenDesc current_; // desc for current token (as returned by Next()) | |
715 TokenDesc next_; // desc for next token (one token look-ahead) | |
716 TokenDesc next_next_; // desc for the token after next (after PeakAhead()) | |
717 | |
718 // Variables for Scanner::BookmarkScope and the *Bookmark implementation. | |
719 // These variables contain the scanner state when a bookmark is set. | |
720 // | |
721 // We will use bookmark_c0_ as a 'control' variable, where: | |
722 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_. | |
723 // - bookmark_c0_ == -1: No bookmark has been set. | |
724 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark). | |
725 // | |
726 // Which state is being bookmarked? The parser state is distributed over | |
727 // several variables, roughly like this: | |
728 // ... 1234 + 5678 ..... [character stream] | |
729 // [current_] [next_] c0_ | [scanner state] | |
730 // So when the scanner is logically at the beginning of an expression | |
731 // like "1234 + 4567", then: | |
732 // - current_ contains "1234" | |
733 // - next_ contains "+" | |
734 // - c0_ contains ' ' (the space between "+" and "5678", | |
735 // - the source_ character stream points to the beginning of "5678". | |
736 // To be able to restore this state, we will keep copies of current_, next_, | |
737 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the | |
738 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_. | |
739 static const uc32 kNoBookmark = -1; | |
740 static const uc32 kBookmarkWasApplied = -2; | |
741 uc32 bookmark_c0_; | |
742 TokenDesc bookmark_current_; | |
743 TokenDesc bookmark_next_; | |
744 LiteralBuffer bookmark_current_literal_; | |
745 LiteralBuffer bookmark_current_raw_literal_; | |
746 LiteralBuffer bookmark_next_literal_; | |
747 LiteralBuffer bookmark_next_raw_literal_; | |
748 | |
749 // Input stream. Must be initialized to an Utf16CharacterStream. | |
750 Utf16CharacterStream* source_; | |
751 | |
752 | |
753 // Start position of the octal literal last scanned. | |
754 Location octal_pos_; | |
755 | |
756 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | |
757 uc32 c0_; | |
758 | |
759 // Whether there is a line terminator whitespace character after | |
760 // the current token, and before the next. Does not count newlines | |
761 // inside multiline comments. | |
762 bool has_line_terminator_before_next_; | |
763 // Whether there is a multi-line comment that contains a | |
764 // line-terminator after the current token, and before the next. | |
765 bool has_multiline_comment_before_next_; | |
766 }; | |
767 | |
768 } // namespace internal | |
769 } // namespace v8 | |
770 | |
771 #endif // V8_SCANNER_H_ | |
OLD | NEW |