Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Side by Side Diff: src/prescanner.h

Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base. (Closed)
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parser.cc ('k') | src/scanner.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 22 matching lines...) Expand all
33 #include "utils.h" 33 #include "utils.h"
34 #include "scanner-base.h" 34 #include "scanner-base.h"
35 35
36 namespace v8 { 36 namespace v8 {
37 namespace preparser { 37 namespace preparser {
38 38
39 namespace i = v8::internal; 39 namespace i = v8::internal;
40 40
41 typedef int uc32; 41 typedef int uc32;
42 42
43 int HexValue(uc32 c) {
44 int res = c | 0x20; // Uppercase letters.
45 int is_digit = (c & 0x10) >> 4; // 0 if non-digit, 1 if digit.
46 // What to add to digits to make them consecutive with 'a'-'f' letters.
47 int kDelta = 'a' - '9' - 1;
48 // What to subtract to digits and letters to get them back to the range 0..15.
49 int kStart = '0' + kDelta;
50 res -= kStart;
51 res += kDelta * is_digit;
52 return res;
53 }
54
55
56 class PreScannerStackGuard { 43 class PreScannerStackGuard {
57 public: 44 public:
58 explicit PreScannerStackGuard(int max_size) 45 explicit PreScannerStackGuard(int max_size)
59 : limit_(StackPoint().at() - max_size) { } 46 : limit_(StackPoint().at() - max_size) { }
60 bool has_overflowed() { 47 bool has_overflowed() {
61 return StackPoint().at() < limit_; 48 return StackPoint().at() < limit_;
62 } 49 }
63 private: 50 private:
64 class StackPoint { 51 class StackPoint {
65 public: 52 public:
66 char* at() { return reinterpret_cast<char*>(this); } 53 char* at() { return reinterpret_cast<char*>(this); }
67 }; 54 };
68 char* limit_; 55 char* limit_;
69 }; 56 };
70 57
71 58
72 // Scanner for preparsing. 59 // Scanner for preparsing.
73 // InputStream is a source of UC16 characters with limited push-back. 60 // InputStream is a source of UC16 characters with limited push-back.
74 // LiteralsBuffer is a collector of (UTF-8) characters used to capture literals. 61 // LiteralsBuffer is a collector of (UTF-8) characters used to capture literals.
75 template <typename InputStream, typename LiteralsBuffer>
76 class Scanner { 62 class Scanner {
77 public: 63 public:
78 enum LiteralType { 64 enum LiteralType {
79 kLiteralNumber, 65 kLiteralNumber,
80 kLiteralIdentifier, 66 kLiteralIdentifier,
81 kLiteralString, 67 kLiteralString,
82 kLiteralRegExp, 68 kLiteralRegExp,
83 kLiteralRegExpFlags 69 kLiteralRegExpFlags
84 }; 70 };
85 71
86 class LiteralScope { 72 class LiteralScope {
87 public: 73 public:
88 explicit LiteralScope(Scanner* self, LiteralType type); 74 explicit LiteralScope(Scanner* self, LiteralType type);
89 ~LiteralScope(); 75 ~LiteralScope();
90 void Complete(); 76 void Complete();
91 77
92 private: 78 private:
93 Scanner* scanner_; 79 Scanner* scanner_;
94 bool complete_; 80 bool complete_;
95 }; 81 };
96 82
97 Scanner(); 83 Scanner();
98 84
99 void Initialize(InputStream* stream); 85 void Initialize(i::UTF16Buffer* stream);
100 86
101 // Returns the next token. 87 // Returns the next token.
102 i::Token::Value Next(); 88 i::Token::Value Next();
103 89
104 // Returns the current token again. 90 // Returns the current token again.
105 i::Token::Value current_token() { return current_.token; } 91 i::Token::Value current_token() { return current_.token; }
106 92
107 // One token look-ahead (past the token returned by Next()). 93 // One token look-ahead (past the token returned by Next()).
108 i::Token::Value peek() const { return next_.token; } 94 i::Token::Value peek() const { return next_.token; }
109 95
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
144 i::Vector<const char> literal() const { 130 i::Vector<const char> literal() const {
145 return i::Vector<const char>(literal_string(), literal_length()); 131 return i::Vector<const char>(literal_string(), literal_length());
146 } 132 }
147 133
148 // Returns the literal string for the next token (the token that 134 // Returns the literal string for the next token (the token that
149 // would be returned if Next() were called). 135 // would be returned if Next() were called).
150 const char* next_literal_string() const { 136 const char* next_literal_string() const {
151 return next_.literal_chars; 137 return next_.literal_chars;
152 } 138 }
153 139
154
155 // Returns the length of the next token (that would be returned if 140 // Returns the length of the next token (that would be returned if
156 // Next() were called). 141 // Next() were called).
157 int next_literal_length() const { 142 int next_literal_length() const {
158 // Excluding terminal '\x00' added by TerminateLiteral(). 143 // Excluding terminal '\x00' added by TerminateLiteral().
159 return next_.literal_length - 1; 144 return next_.literal_length - 1;
160 } 145 }
161 146
162 i::Vector<const char> next_literal() const { 147 i::Vector<const char> next_literal() const {
163 return i::Vector<const char>(next_literal_string(), next_literal_length()); 148 return i::Vector<const char>(next_literal_string(), next_literal_length());
164 } 149 }
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
243 // If the escape sequence cannot be decoded the result is kBadRune. 228 // If the escape sequence cannot be decoded the result is kBadRune.
244 uc32 ScanIdentifierUnicodeEscape(); 229 uc32 ScanIdentifierUnicodeEscape();
245 230
246 PreScannerStackGuard stack_guard_; 231 PreScannerStackGuard stack_guard_;
247 232
248 TokenDesc current_; // desc for current token (as returned by Next()) 233 TokenDesc current_; // desc for current token (as returned by Next())
249 TokenDesc next_; // desc for next token (one token look-ahead) 234 TokenDesc next_; // desc for next token (one token look-ahead)
250 bool has_line_terminator_before_next_; 235 bool has_line_terminator_before_next_;
251 236
252 // Source. 237 // Source.
253 InputStream* source_; 238 i::UTF16Buffer* source_;
254 239
255 // Buffer to hold literal values (identifiers, strings, numerals, regexps and 240 // Buffer to hold literal values (identifiers, strings, numerals, regexps and
256 // regexp flags) using '\x00'-terminated UTF-8 encoding. 241 // regexp flags) using '\x00'-terminated UTF-8 encoding.
257 // Handles allocation internally. 242 // Handles allocation internally.
258 // Notice that the '\x00' termination is meaningless for strings and regexps 243 // Notice that the '\x00' termination is meaningless for strings and regexps
259 // which may contain the zero-character, but can be used as terminator for 244 // which may contain the zero-character, but can be used as terminator for
260 // identifiers, numerals and regexp flags. 245 // identifiers, numerals and regexp flags.Collector
261 LiteralsBuffer literal_buffer_; 246 i::LiteralCollector literal_buffer_;
262 247
263 bool stack_overflow_; 248 bool stack_overflow_;
264 249
265 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 250 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
266 uc32 c0_; 251 uc32 c0_;
267 }; 252 };
268 253
269 254
270 // ---------------------------------------------------------------------------- 255 // ----------------------------------------------------------------------------
271 // Scanner::LiteralScope 256 // Scanner::LiteralScope
272 257
273 template <typename InputStream, typename LiteralsBuffer> 258 Scanner::LiteralScope::LiteralScope(
274 Scanner<InputStream, LiteralsBuffer>::LiteralScope::LiteralScope(
275 Scanner* self, LiteralType type) 259 Scanner* self, LiteralType type)
276 : scanner_(self), complete_(false) { 260 : scanner_(self), complete_(false) {
277 self->StartLiteral(type); 261 self->StartLiteral(type);
278 } 262 }
279 263
280 264
281 template <typename InputStream, typename LiteralsBuffer> 265 Scanner::LiteralScope::~LiteralScope() {
282 Scanner<InputStream, LiteralsBuffer>::LiteralScope::~LiteralScope() {
283 if (!complete_) scanner_->DropLiteral(); 266 if (!complete_) scanner_->DropLiteral();
284 } 267 }
285 268
286 template <typename InputStream, typename LiteralsBuffer> 269 void Scanner::LiteralScope::Complete() {
287 void Scanner<InputStream, LiteralsBuffer>::LiteralScope::Complete() {
288 scanner_->TerminateLiteral(); 270 scanner_->TerminateLiteral();
289 complete_ = true; 271 complete_ = true;
290 } 272 }
291 273
292 274
293 // ---------------------------------------------------------------------------- 275 // ----------------------------------------------------------------------------
294 // Scanner. 276 // Scanner.
295 template <typename InputStream, typename LiteralsBuffer> 277 Scanner::Scanner()
296 Scanner<InputStream, LiteralsBuffer>::Scanner()
297 : stack_guard_(kMaxStackSize), 278 : stack_guard_(kMaxStackSize),
298 has_line_terminator_before_next_(false), 279 has_line_terminator_before_next_(false),
299 source_(NULL), 280 source_(NULL),
300 stack_overflow_(false) {} 281 stack_overflow_(false) {}
301 282
302 283
303 template <typename InputStream, typename LiteralsBuffer> 284 void Scanner::Initialize(i::UTF16Buffer* stream) {
304 void Scanner<InputStream, LiteralsBuffer>::Initialize(InputStream* stream) {
305 source_ = stream; 285 source_ = stream;
306 286
307 // Initialize current_ to not refer to a literal. 287 // Initialize current_ to not refer to a literal.
308 current_.literal_length = 0; 288 current_.literal_length = 0;
309 // Reset literal buffer. 289 // Reset literal buffer.
310 literal_buffer_.Reset(); 290 literal_buffer_.Reset();
311 291
312 // Set c0_ (one character ahead) 292 // Set c0_ (one character ahead)
313 ASSERT(kCharacterLookaheadBufferSize == 1); 293 ASSERT(kCharacterLookaheadBufferSize == 1);
314 Advance(); 294 Advance();
315 295
316 // Skip initial whitespace allowing HTML comment ends just like 296 // Skip initial whitespace allowing HTML comment ends just like
317 // after a newline and scan first token. 297 // after a newline and scan first token.
318 has_line_terminator_before_next_ = true; 298 has_line_terminator_before_next_ = true;
319 SkipWhiteSpace(); 299 SkipWhiteSpace();
320 Scan(); 300 Scan();
321 } 301 }
322 302
323 303
324 template <typename InputStream, typename LiteralsBuffer> 304 i::Token::Value Scanner::Next() {
325 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Next() {
326 // BUG 1215673: Find a thread safe way to set a stack limit in 305 // BUG 1215673: Find a thread safe way to set a stack limit in
327 // pre-parse mode. Otherwise, we cannot safely pre-parse from other 306 // pre-parse mode. Otherwise, we cannot safely pre-parse from other
328 // threads. 307 // threads.
329 current_ = next_; 308 current_ = next_;
330 // Check for stack-overflow before returning any tokens. 309 // Check for stack-overflow before returning any tokens.
331 if (stack_guard_.has_overflowed()) { 310 if (stack_guard_.has_overflowed()) {
332 stack_overflow_ = true; 311 stack_overflow_ = true;
333 next_.token = i::Token::ILLEGAL; 312 next_.token = i::Token::ILLEGAL;
334 } else { 313 } else {
335 has_line_terminator_before_next_ = false; 314 has_line_terminator_before_next_ = false;
336 Scan(); 315 Scan();
337 } 316 }
338 return current_.token; 317 return current_.token;
339 } 318 }
340 319
341 320
342 template <typename InputStream, typename LiteralsBuffer> 321 void Scanner::StartLiteral(LiteralType type) {
343 void Scanner<InputStream, LiteralsBuffer>::StartLiteral(LiteralType type) {
344 // Only record string and literal identifiers when preparsing. 322 // Only record string and literal identifiers when preparsing.
345 // Those are the ones that are recorded as symbols. Numbers and 323 // Those are the ones that are recorded as symbols. Numbers and
346 // regexps are not recorded. 324 // regexps are not recorded.
347 if (type == kLiteralString || type == kLiteralIdentifier) { 325 if (type == kLiteralString || type == kLiteralIdentifier) {
348 literal_buffer_.StartLiteral(); 326 literal_buffer_.StartLiteral();
349 } 327 }
350 } 328 }
351 329
352 330
353 template <typename InputStream, typename LiteralsBuffer> 331 void Scanner::AddLiteralChar(uc32 c) {
354 void Scanner<InputStream, LiteralsBuffer>::AddLiteralChar(uc32 c) {
355 literal_buffer_.AddChar(c); 332 literal_buffer_.AddChar(c);
356 } 333 }
357 334
358 335
359 template <typename InputStream, typename LiteralsBuffer> 336 void Scanner::TerminateLiteral() {
360 void Scanner<InputStream, LiteralsBuffer>::TerminateLiteral() {
361 i::Vector<const char> chars = literal_buffer_.EndLiteral(); 337 i::Vector<const char> chars = literal_buffer_.EndLiteral();
362 next_.literal_chars = chars.start(); 338 next_.literal_chars = chars.start();
363 next_.literal_length = chars.length(); 339 next_.literal_length = chars.length();
364 } 340 }
365 341
366 342
367 template <typename InputStream, typename LiteralsBuffer> 343 void Scanner::DropLiteral() {
368 void Scanner<InputStream, LiteralsBuffer>::DropLiteral() {
369 literal_buffer_.DropLiteral(); 344 literal_buffer_.DropLiteral();
370 } 345 }
371 346
372 347
373 template <typename InputStream, typename LiteralsBuffer> 348 void Scanner::AddLiteralCharAdvance() {
374 void Scanner<InputStream, LiteralsBuffer>::AddLiteralCharAdvance() {
375 AddLiteralChar(c0_); 349 AddLiteralChar(c0_);
376 Advance(); 350 Advance();
377 } 351 }
378 352
379 353
380 static inline bool IsByteOrderMark(uc32 c) { 354 static inline bool IsByteOrderMark(uc32 c) {
381 // The Unicode value U+FFFE is guaranteed never to be assigned as a 355 // The Unicode value U+FFFE is guaranteed never to be assigned as a
382 // Unicode character; this implies that in a Unicode context the 356 // Unicode character; this implies that in a Unicode context the
383 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 357 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
384 // character expressed in little-endian byte order (since it could 358 // character expressed in little-endian byte order (since it could
385 // not be a U+FFFE character expressed in big-endian byte 359 // not be a U+FFFE character expressed in big-endian byte
386 // order). Nevertheless, we check for it to be compatible with 360 // order). Nevertheless, we check for it to be compatible with
387 // Spidermonkey. 361 // Spidermonkey.
388 return c == 0xFEFF || c == 0xFFFE; 362 return c == 0xFEFF || c == 0xFFFE;
389 } 363 }
390 364
391 365
392 template <typename InputStream, typename LiteralsBuffer> 366 bool Scanner::SkipWhiteSpace() {
393 bool Scanner<InputStream, LiteralsBuffer>::SkipWhiteSpace() {
394 int start_position = source_pos(); 367 int start_position = source_pos();
395 368
396 while (true) { 369 while (true) {
397 // We treat byte-order marks (BOMs) as whitespace for better 370 // We treat byte-order marks (BOMs) as whitespace for better
398 // compatibility with Spidermonkey and other JavaScript engines. 371 // compatibility with Spidermonkey and other JavaScript engines.
399 while (i::ScannerConstants::kIsWhiteSpace.get(c0_) 372 while (i::ScannerConstants::kIsWhiteSpace.get(c0_)
400 || IsByteOrderMark(c0_)) { 373 || IsByteOrderMark(c0_)) {
401 // IsWhiteSpace() includes line terminators! 374 // IsWhiteSpace() includes line terminators!
402 if (i::ScannerConstants::kIsLineTerminator.get(c0_)) { 375 if (i::ScannerConstants::kIsLineTerminator.get(c0_)) {
403 // Ignore line terminators, but remember them. This is necessary 376 // Ignore line terminators, but remember them. This is necessary
(...skipping 20 matching lines...) Expand all
424 PushBack('-'); // undo Advance() 397 PushBack('-'); // undo Advance()
425 } 398 }
426 PushBack('-'); // undo Advance() 399 PushBack('-'); // undo Advance()
427 } 400 }
428 // Return whether or not we skipped any characters. 401 // Return whether or not we skipped any characters.
429 return source_pos() != start_position; 402 return source_pos() != start_position;
430 } 403 }
431 } 404 }
432 405
433 406
434 template <typename InputStream, typename LiteralsBuffer> 407 i::Token::Value Scanner::SkipSingleLineComment() {
435 i::Token::Value Scanner<InputStream, LiteralsBuffer>::SkipSingleLineComment() {
436 Advance(); 408 Advance();
437 409
438 // The line terminator at the end of the line is not considered 410 // The line terminator at the end of the line is not considered
439 // to be part of the single-line comment; it is recognized 411 // to be part of the single-line comment; it is recognized
440 // separately by the lexical grammar and becomes part of the 412 // separately by the lexical grammar and becomes part of the
441 // stream of input elements for the syntactic grammar (see 413 // stream of input elements for the syntactic grammar (see
442 // ECMA-262, section 7.4, page 12). 414 // ECMA-262, section 7.4, page 12).
443 while (c0_ >= 0 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) { 415 while (c0_ >= 0 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {
444 Advance(); 416 Advance();
445 } 417 }
446 418
447 return i::Token::WHITESPACE; 419 return i::Token::WHITESPACE;
448 } 420 }
449 421
450 422
451 template <typename InputStream, typename LiteralsBuffer> 423 i::Token::Value Scanner::SkipMultiLineComment() {
452 i::Token::Value Scanner<InputStream, LiteralsBuffer>::SkipMultiLineComment() {
453 ASSERT(c0_ == '*'); 424 ASSERT(c0_ == '*');
454 Advance(); 425 Advance();
455 426
456 while (c0_ >= 0) { 427 while (c0_ >= 0) {
457 char ch = c0_; 428 char ch = c0_;
458 Advance(); 429 Advance();
459 // If we have reached the end of the multi-line comment, we 430 // If we have reached the end of the multi-line comment, we
460 // consume the '/' and insert a whitespace. This way all 431 // consume the '/' and insert a whitespace. This way all
461 // multi-line comments are treated as whitespace - even the ones 432 // multi-line comments are treated as whitespace - even the ones
462 // containing line terminators. This contradicts ECMA-262, section 433 // containing line terminators. This contradicts ECMA-262, section
463 // 7.4, page 12, that says that multi-line comments containing 434 // 7.4, page 12, that says that multi-line comments containing
464 // line terminators should be treated as a line terminator, but it 435 // line terminators should be treated as a line terminator, but it
465 // matches the behaviour of SpiderMonkey and KJS. 436 // matches the behaviour of SpiderMonkey and KJS.
466 if (ch == '*' && c0_ == '/') { 437 if (ch == '*' && c0_ == '/') {
467 c0_ = ' '; 438 c0_ = ' ';
468 return i::Token::WHITESPACE; 439 return i::Token::WHITESPACE;
469 } 440 }
470 } 441 }
471 442
472 // Unterminated multi-line comment. 443 // Unterminated multi-line comment.
473 return i::Token::ILLEGAL; 444 return i::Token::ILLEGAL;
474 } 445 }
475 446
476 447
477 template <typename InputStream, typename LiteralsBuffer> 448 i::Token::Value Scanner::ScanHtmlComment() {
478 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanHtmlComment() {
479 // Check for <!-- comments. 449 // Check for <!-- comments.
480 ASSERT(c0_ == '!'); 450 ASSERT(c0_ == '!');
481 Advance(); 451 Advance();
482 if (c0_ == '-') { 452 if (c0_ == '-') {
483 Advance(); 453 Advance();
484 if (c0_ == '-') return SkipSingleLineComment(); 454 if (c0_ == '-') return SkipSingleLineComment();
485 PushBack('-'); // undo Advance() 455 PushBack('-'); // undo Advance()
486 } 456 }
487 PushBack('!'); // undo Advance() 457 PushBack('!'); // undo Advance()
488 ASSERT(c0_ == '!'); 458 ASSERT(c0_ == '!');
489 return i::Token::LT; 459 return i::Token::LT;
490 } 460 }
491 461
492 462
493 template <typename InputStream, typename LiteralsBuffer> 463 void Scanner::Scan() {
494 void Scanner<InputStream, LiteralsBuffer>::Scan() {
495 next_.literal_length = 0; 464 next_.literal_length = 0;
496 i::Token::Value token; 465 i::Token::Value token;
497 do { 466 do {
498 // Remember the position of the next token 467 // Remember the position of the next token
499 next_.location.beg_pos = source_pos(); 468 next_.location.beg_pos = source_pos();
500 469
501 switch (c0_) { 470 switch (c0_) {
502 case ' ': 471 case ' ':
503 case '\t': 472 case '\t':
504 Advance(); 473 Advance();
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
724 693
725 // Continue scanning for tokens as long as we're just skipping 694 // Continue scanning for tokens as long as we're just skipping
726 // whitespace. 695 // whitespace.
727 } while (token == i::Token::WHITESPACE); 696 } while (token == i::Token::WHITESPACE);
728 697
729 next_.location.end_pos = source_pos(); 698 next_.location.end_pos = source_pos();
730 next_.token = token; 699 next_.token = token;
731 } 700 }
732 701
733 702
734 template <typename InputStream, typename LiteralsBuffer> 703 void Scanner::SeekForward(int pos) {
735 void Scanner<InputStream, LiteralsBuffer>::SeekForward(int pos) {
736 source_->SeekForward(pos - 1); 704 source_->SeekForward(pos - 1);
737 Advance(); 705 Advance();
738 // This function is only called to seek to the location 706 // This function is only called to seek to the location
739 // of the end of a function (at the "}" token). It doesn't matter 707 // of the end of a function (at the "}" token). It doesn't matter
740 // whether there was a line terminator in the part we skip. 708 // whether there was a line terminator in the part we skip.
741 has_line_terminator_before_next_ = false; 709 has_line_terminator_before_next_ = false;
742 Scan(); 710 Scan();
743 } 711 }
744 712
745 713
746 template <typename InputStream, typename LiteralsBuffer> 714 uc32 Scanner::ScanHexEscape(uc32 c, int length) {
747 uc32 Scanner<InputStream, LiteralsBuffer>::ScanHexEscape(uc32 c, int length) {
748 ASSERT(length <= 4); // prevent overflow 715 ASSERT(length <= 4); // prevent overflow
749 716
750 uc32 digits[4]; 717 uc32 digits[4];
751 uc32 x = 0; 718 uc32 x = 0;
752 for (int i = 0; i < length; i++) { 719 for (int i = 0; i < length; i++) {
753 digits[i] = c0_; 720 digits[i] = c0_;
754 int d = HexValue(c0_); 721 int d = i::HexValue(c0_);
755 if (d < 0) { 722 if (d < 0) {
756 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes 723 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
757 // should be illegal, but other JS VMs just return the 724 // should be illegal, but other JS VMs just return the
758 // non-escaped version of the original character. 725 // non-escaped version of the original character.
759 726
760 // Push back digits read, except the last one (in c0_). 727 // Push back digits read, except the last one (in c0_).
761 for (int j = i-1; j >= 0; j--) { 728 for (int j = i-1; j >= 0; j--) {
762 PushBack(digits[j]); 729 PushBack(digits[j]);
763 } 730 }
764 // Notice: No handling of error - treat it as "\u"->"u". 731 // Notice: No handling of error - treat it as "\u"->"u".
765 return c; 732 return c;
766 } 733 }
767 x = x * 16 + d; 734 x = x * 16 + d;
768 Advance(); 735 Advance();
769 } 736 }
770 737
771 return x; 738 return x;
772 } 739 }
773 740
774 741
775 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 742 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
776 // ECMA-262. Other JS VMs support them. 743 // ECMA-262. Other JS VMs support them.
777 template <typename InputStream, typename LiteralsBuffer> 744 uc32 Scanner::ScanOctalEscape(
778 uc32 Scanner<InputStream, LiteralsBuffer>::ScanOctalEscape(
779 uc32 c, int length) { 745 uc32 c, int length) {
780 uc32 x = c - '0'; 746 uc32 x = c - '0';
781 for (int i = 0; i < length; i++) { 747 for (int i = 0; i < length; i++) {
782 int d = c0_ - '0'; 748 int d = c0_ - '0';
783 if (d < 0 || d > 7) break; 749 if (d < 0 || d > 7) break;
784 int nx = x * 8 + d; 750 int nx = x * 8 + d;
785 if (nx >= 256) break; 751 if (nx >= 256) break;
786 x = nx; 752 x = nx;
787 Advance(); 753 Advance();
788 } 754 }
789 return x; 755 return x;
790 } 756 }
791 757
792 758
793 template <typename InputStream, typename LiteralsBuffer> 759 void Scanner::ScanEscape() {
794 void Scanner<InputStream, LiteralsBuffer>::ScanEscape() {
795 uc32 c = c0_; 760 uc32 c = c0_;
796 Advance(); 761 Advance();
797 762
798 // Skip escaped newlines. 763 // Skip escaped newlines.
799 if (i::ScannerConstants::kIsLineTerminator.get(c)) { 764 if (i::ScannerConstants::kIsLineTerminator.get(c)) {
800 // Allow CR+LF newlines in multiline string literals. 765 // Allow CR+LF newlines in multiline string literals.
801 if (i::IsCarriageReturn(c) && i::IsLineFeed(c0_)) Advance(); 766 if (i::IsCarriageReturn(c) && i::IsLineFeed(c0_)) Advance();
802 // Allow LF+CR newlines in multiline string literals. 767 // Allow LF+CR newlines in multiline string literals.
803 if (i::IsLineFeed(c) && i::IsCarriageReturn(c0_)) Advance(); 768 if (i::IsLineFeed(c) && i::IsCarriageReturn(c0_)) Advance();
804 return; 769 return;
(...skipping 21 matching lines...) Expand all
826 case '7' : c = ScanOctalEscape(c, 2); break; 791 case '7' : c = ScanOctalEscape(c, 2); break;
827 } 792 }
828 793
829 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these 794 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
830 // should be illegal, but they are commonly handled 795 // should be illegal, but they are commonly handled
831 // as non-escaped characters by JS VMs. 796 // as non-escaped characters by JS VMs.
832 AddLiteralChar(c); 797 AddLiteralChar(c);
833 } 798 }
834 799
835 800
836 template <typename InputStream, typename LiteralsBuffer> 801 i::Token::Value Scanner::ScanString() {
837 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanString() {
838 uc32 quote = c0_; 802 uc32 quote = c0_;
839 Advance(); // consume quote 803 Advance(); // consume quote
840 804
841 LiteralScope literal(this, kLiteralString); 805 LiteralScope literal(this, kLiteralString);
842 while (c0_ != quote && c0_ >= 0 806 while (c0_ != quote && c0_ >= 0
843 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) { 807 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {
844 uc32 c = c0_; 808 uc32 c = c0_;
845 Advance(); 809 Advance();
846 if (c == '\\') { 810 if (c == '\\') {
847 if (c0_ < 0) return i::Token::ILLEGAL; 811 if (c0_ < 0) return i::Token::ILLEGAL;
848 ScanEscape(); 812 ScanEscape();
849 } else { 813 } else {
850 AddLiteralChar(c); 814 AddLiteralChar(c);
851 } 815 }
852 } 816 }
853 if (c0_ != quote) return i::Token::ILLEGAL; 817 if (c0_ != quote) return i::Token::ILLEGAL;
854 literal.Complete(); 818 literal.Complete();
855 819
856 Advance(); // consume quote 820 Advance(); // consume quote
857 return i::Token::STRING; 821 return i::Token::STRING;
858 } 822 }
859 823
860 824
861 template <typename InputStream, typename LiteralsBuffer> 825 i::Token::Value Scanner::Select(
862 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Select(
863 i::Token::Value tok) { 826 i::Token::Value tok) {
864 Advance(); 827 Advance();
865 return tok; 828 return tok;
866 } 829 }
867 830
868 831
869 template <typename InputStream, typename LiteralsBuffer> 832 i::Token::Value Scanner::Select(
870 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Select(
871 uc32 next, 833 uc32 next,
872 i::Token::Value then, 834 i::Token::Value then,
873 i::Token::Value else_) { 835 i::Token::Value else_) {
874 Advance(); 836 Advance();
875 if (c0_ == next) { 837 if (c0_ == next) {
876 Advance(); 838 Advance();
877 return then; 839 return then;
878 } else { 840 } else {
879 return else_; 841 return else_;
880 } 842 }
881 } 843 }
882 844
883 845
884 // Returns true if any decimal digits were scanned, returns false otherwise. 846 // Returns true if any decimal digits were scanned, returns false otherwise.
885 template <typename InputStream, typename LiteralsBuffer> 847 void Scanner::ScanDecimalDigits() {
886 void Scanner<InputStream, LiteralsBuffer>::ScanDecimalDigits() {
887 while (i::IsDecimalDigit(c0_)) 848 while (i::IsDecimalDigit(c0_))
888 AddLiteralCharAdvance(); 849 AddLiteralCharAdvance();
889 } 850 }
890 851
891 852
892 template <typename InputStream, typename LiteralsBuffer> 853 i::Token::Value Scanner::ScanNumber(
893 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanNumber(
894 bool seen_period) { 854 bool seen_period) {
895 // c0_ is the first digit of the number or the fraction. 855 // c0_ is the first digit of the number or the fraction.
896 ASSERT(i::IsDecimalDigit(c0_)); 856 ASSERT(i::IsDecimalDigit(c0_));
897 857
898 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; 858 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
899 859
900 LiteralScope literal(this, kLiteralNumber); 860 LiteralScope literal(this, kLiteralNumber);
901 if (seen_period) { 861 if (seen_period) {
902 // we have already seen a decimal point of the float 862 // we have already seen a decimal point of the float
903 AddLiteralChar('.'); 863 AddLiteralChar('.');
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
966 if (i::IsDecimalDigit(c0_) 926 if (i::IsDecimalDigit(c0_)
967 || i::ScannerConstants::kIsIdentifierStart.get(c0_)) 927 || i::ScannerConstants::kIsIdentifierStart.get(c0_))
968 return i::Token::ILLEGAL; 928 return i::Token::ILLEGAL;
969 929
970 literal.Complete(); 930 literal.Complete();
971 931
972 return i::Token::NUMBER; 932 return i::Token::NUMBER;
973 } 933 }
974 934
975 935
976 template <typename InputStream, typename LiteralsBuffer> 936 uc32 Scanner::ScanIdentifierUnicodeEscape() {
977 uc32 Scanner<InputStream, LiteralsBuffer>::ScanIdentifierUnicodeEscape() {
978 Advance(); 937 Advance();
979 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 938 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
980 Advance(); 939 Advance();
981 uc32 c = ScanHexEscape('u', 4); 940 uc32 c = ScanHexEscape('u', 4);
982 // We do not allow a unicode escape sequence to start another 941 // We do not allow a unicode escape sequence to start another
983 // unicode escape sequence. 942 // unicode escape sequence.
984 if (c == '\\') return unibrow::Utf8::kBadChar; 943 if (c == '\\') return unibrow::Utf8::kBadChar;
985 return c; 944 return c;
986 } 945 }
987 946
988 947
989 template <typename InputStream, typename LiteralsBuffer> 948 i::Token::Value Scanner::ScanIdentifier() {
990 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanIdentifier() {
991 ASSERT(i::ScannerConstants::kIsIdentifierStart.get(c0_)); 949 ASSERT(i::ScannerConstants::kIsIdentifierStart.get(c0_));
992 950
993 LiteralScope literal(this, kLiteralIdentifier); 951 LiteralScope literal(this, kLiteralIdentifier);
994 i::KeywordMatcher keyword_match; 952 i::KeywordMatcher keyword_match;
995 953
996 // Scan identifier start character. 954 // Scan identifier start character.
997 if (c0_ == '\\') { 955 if (c0_ == '\\') {
998 uc32 c = ScanIdentifierUnicodeEscape(); 956 uc32 c = ScanIdentifierUnicodeEscape();
999 // Only allow legal identifier start characters. 957 // Only allow legal identifier start characters.
1000 if (!i::ScannerConstants::kIsIdentifierStart.get(c)) { 958 if (!i::ScannerConstants::kIsIdentifierStart.get(c)) {
(...skipping 22 matching lines...) Expand all
1023 keyword_match.AddChar(c0_); 981 keyword_match.AddChar(c0_);
1024 Advance(); 982 Advance();
1025 } 983 }
1026 } 984 }
1027 literal.Complete(); 985 literal.Complete();
1028 986
1029 return keyword_match.token(); 987 return keyword_match.token();
1030 } 988 }
1031 989
1032 990
1033 template <typename InputStream, typename LiteralsBuffer> 991 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1034 bool Scanner<InputStream, LiteralsBuffer>::ScanRegExpPattern(bool seen_equal) {
1035 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 992 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1036 bool in_character_class = false; 993 bool in_character_class = false;
1037 994
1038 // Previous token is either '/' or '/=', in the second case, the 995 // Previous token is either '/' or '/=', in the second case, the
1039 // pattern starts at =. 996 // pattern starts at =.
1040 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 997 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1041 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 998 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1042 999
1043 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1000 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1044 // the scanner should pass uninterpreted bodies to the RegExp 1001 // the scanner should pass uninterpreted bodies to the RegExp
(...skipping 18 matching lines...) Expand all
1063 AddLiteralCharAdvance(); 1020 AddLiteralCharAdvance();
1064 } 1021 }
1065 } 1022 }
1066 Advance(); // consume '/' 1023 Advance(); // consume '/'
1067 1024
1068 literal.Complete(); 1025 literal.Complete();
1069 1026
1070 return true; 1027 return true;
1071 } 1028 }
1072 1029
1073 template <typename InputStream, typename LiteralsBuffer> 1030 bool Scanner::ScanRegExpFlags() {
1074 bool Scanner<InputStream, LiteralsBuffer>::ScanRegExpFlags() {
1075 // Scan regular expression flags. 1031 // Scan regular expression flags.
1076 LiteralScope literal(this, kLiteralRegExpFlags); 1032 LiteralScope literal(this, kLiteralRegExpFlags);
1077 while (i::ScannerConstants::kIsIdentifierPart.get(c0_)) { 1033 while (i::ScannerConstants::kIsIdentifierPart.get(c0_)) {
1078 if (c0_ == '\\') { 1034 if (c0_ == '\\') {
1079 uc32 c = ScanIdentifierUnicodeEscape(); 1035 uc32 c = ScanIdentifierUnicodeEscape();
1080 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 1036 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1081 // We allow any escaped character, unlike the restriction on 1037 // We allow any escaped character, unlike the restriction on
1082 // IdentifierPart when it is used to build an IdentifierName. 1038 // IdentifierPart when it is used to build an IdentifierName.
1083 AddLiteralChar(c); 1039 AddLiteralChar(c);
1084 continue; 1040 continue;
1085 } 1041 }
1086 } 1042 }
1087 AddLiteralCharAdvance(); 1043 AddLiteralCharAdvance();
1088 } 1044 }
1089 literal.Complete(); 1045 literal.Complete();
1090 1046
1091 next_.location.end_pos = source_pos() - 1; 1047 next_.location.end_pos = source_pos() - 1;
1092 return true; 1048 return true;
1093 } 1049 }
1094 1050
1095 1051
1096 } } // namespace v8::preparser 1052 } } // namespace v8::preparser
1097 1053
1098 #endif // V8_PRESCANNER_H_ 1054 #endif // V8_PRESCANNER_H_
OLDNEW
« no previous file with comments | « src/parser.cc ('k') | src/scanner.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698