OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 11 matching lines...) Expand all Loading... |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
29 | 29 |
30 #include "../include/v8stdint.h" | 30 #include "../include/v8stdint.h" |
31 #include "scanner-base.h" | 31 #include "scanner-base.h" |
| 32 #include "char-predicates-inl.h" |
32 | 33 |
33 namespace v8 { | 34 namespace v8 { |
34 namespace internal { | 35 namespace internal { |
35 | 36 |
36 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
| 38 // UTF16Buffer |
| 39 |
| 40 UTF16Buffer::UTF16Buffer() |
| 41 : pos_(0), end_(kNoEndPosition) { } |
| 42 |
| 43 // ---------------------------------------------------------------------------- |
| 44 // LiteralCollector |
| 45 |
| 46 LiteralCollector::LiteralCollector() |
| 47 : buffer_(kInitialCapacity), recording_(false) { } |
| 48 |
| 49 |
| 50 LiteralCollector::~LiteralCollector() {} |
| 51 |
| 52 |
| 53 void LiteralCollector::AddCharSlow(uc32 c) { |
| 54 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); |
| 55 int length = unibrow::Utf8::Length(c); |
| 56 Vector<char> block = buffer_.AddBlock(length, '\0'); |
| 57 #ifdef DEBUG |
| 58 int written_length = unibrow::Utf8::Encode(block.start(), c); |
| 59 CHECK_EQ(length, written_length); |
| 60 #else |
| 61 unibrow::Utf8::Encode(block.start(), c); |
| 62 #endif |
| 63 } |
| 64 |
| 65 // ---------------------------------------------------------------------------- |
37 // Character predicates | 66 // Character predicates |
38 | 67 |
39 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; | 68 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; |
40 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; | 69 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; |
41 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; | 70 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; |
42 unibrow::Predicate<unibrow::LineTerminator, 128> | 71 unibrow::Predicate<unibrow::LineTerminator, 128> |
43 ScannerConstants::kIsLineTerminator; | 72 ScannerConstants::kIsLineTerminator; |
44 | 73 |
45 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; | 74 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; |
46 | 75 |
47 // Compound predicates. | 76 // Compound predicates. |
48 | 77 |
49 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { | 78 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { |
50 // Checks whether the buffer contains an identifier (no escape). | 79 // Checks whether the buffer contains an identifier (no escape). |
51 if (!buffer->has_more()) return false; | 80 if (!buffer->has_more()) return false; |
52 if (!kIsIdentifierStart.get(buffer->GetNext())) { | 81 if (!kIsIdentifierStart.get(buffer->GetNext())) { |
53 return false; | 82 return false; |
54 } | 83 } |
55 while (buffer->has_more()) { | 84 while (buffer->has_more()) { |
56 if (!kIsIdentifierPart.get(buffer->GetNext())) { | 85 if (!kIsIdentifierPart.get(buffer->GetNext())) { |
57 return false; | 86 return false; |
58 } | 87 } |
59 } | 88 } |
60 return true; | 89 return true; |
61 } | 90 } |
62 | 91 |
63 // ---------------------------------------------------------------------------- | 92 // ---------------------------------------------------------------------------- |
| 93 // Scanner |
| 94 |
| 95 Scanner::Scanner() : source_(NULL), stack_overflow_(false) {} |
| 96 |
| 97 |
| 98 uc32 Scanner::ScanHexEscape(uc32 c, int length) { |
| 99 ASSERT(length <= 4); // prevent overflow |
| 100 |
| 101 uc32 digits[4]; |
| 102 uc32 x = 0; |
| 103 for (int i = 0; i < length; i++) { |
| 104 digits[i] = c0_; |
| 105 int d = HexValue(c0_); |
| 106 if (d < 0) { |
| 107 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 108 // should be illegal, but other JS VMs just return the |
| 109 // non-escaped version of the original character. |
| 110 |
| 111 // Push back digits read, except the last one (in c0_). |
| 112 for (int j = i-1; j >= 0; j--) { |
| 113 PushBack(digits[j]); |
| 114 } |
| 115 // Notice: No handling of error - treat it as "\u"->"u". |
| 116 return c; |
| 117 } |
| 118 x = x * 16 + d; |
| 119 Advance(); |
| 120 } |
| 121 |
| 122 return x; |
| 123 } |
| 124 |
| 125 |
| 126 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 127 // ECMA-262. Other JS VMs support them. |
| 128 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
| 129 uc32 x = c - '0'; |
| 130 for (int i = 0; i < length; i++) { |
| 131 int d = c0_ - '0'; |
| 132 if (d < 0 || d > 7) break; |
| 133 int nx = x * 8 + d; |
| 134 if (nx >= 256) break; |
| 135 x = nx; |
| 136 Advance(); |
| 137 } |
| 138 return x; |
| 139 } |
| 140 |
| 141 |
| 142 // ---------------------------------------------------------------------------- |
| 143 // JavaScriptScanner |
| 144 |
| 145 JavaScriptScanner::JavaScriptScanner() |
| 146 : has_line_terminator_before_next_(false) {} |
| 147 |
| 148 |
| 149 Token::Value JavaScriptScanner::Next() { |
| 150 current_ = next_; |
| 151 has_line_terminator_before_next_ = false; |
| 152 Scan(); |
| 153 return current_.token; |
| 154 } |
| 155 |
| 156 |
| 157 static inline bool IsByteOrderMark(uc32 c) { |
| 158 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 159 // Unicode character; this implies that in a Unicode context the |
| 160 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 161 // character expressed in little-endian byte order (since it could |
| 162 // not be a U+FFFE character expressed in big-endian byte |
| 163 // order). Nevertheless, we check for it to be compatible with |
| 164 // Spidermonkey. |
| 165 return c == 0xFEFF || c == 0xFFFE; |
| 166 } |
| 167 |
| 168 |
| 169 bool JavaScriptScanner::SkipWhiteSpace() { |
| 170 int start_position = source_pos(); |
| 171 |
| 172 while (true) { |
| 173 // We treat byte-order marks (BOMs) as whitespace for better |
| 174 // compatibility with Spidermonkey and other JavaScript engines. |
| 175 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
| 176 // IsWhiteSpace() includes line terminators! |
| 177 if (ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 178 // Ignore line terminators, but remember them. This is necessary |
| 179 // for automatic semicolon insertion. |
| 180 has_line_terminator_before_next_ = true; |
| 181 } |
| 182 Advance(); |
| 183 } |
| 184 |
| 185 // If there is an HTML comment end '-->' at the beginning of a |
| 186 // line (with only whitespace in front of it), we treat the rest |
| 187 // of the line as a comment. This is in line with the way |
| 188 // SpiderMonkey handles it. |
| 189 if (c0_ == '-' && has_line_terminator_before_next_) { |
| 190 Advance(); |
| 191 if (c0_ == '-') { |
| 192 Advance(); |
| 193 if (c0_ == '>') { |
| 194 // Treat the rest of the line as a comment. |
| 195 SkipSingleLineComment(); |
| 196 // Continue skipping white space after the comment. |
| 197 continue; |
| 198 } |
| 199 PushBack('-'); // undo Advance() |
| 200 } |
| 201 PushBack('-'); // undo Advance() |
| 202 } |
| 203 // Return whether or not we skipped any characters. |
| 204 return source_pos() != start_position; |
| 205 } |
| 206 } |
| 207 |
| 208 |
| 209 Token::Value JavaScriptScanner::SkipSingleLineComment() { |
| 210 Advance(); |
| 211 |
| 212 // The line terminator at the end of the line is not considered |
| 213 // to be part of the single-line comment; it is recognized |
| 214 // separately by the lexical grammar and becomes part of the |
| 215 // stream of input elements for the syntactic grammar (see |
| 216 // ECMA-262, section 7.4, page 12). |
| 217 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 218 Advance(); |
| 219 } |
| 220 |
| 221 return Token::WHITESPACE; |
| 222 } |
| 223 |
| 224 |
| 225 Token::Value JavaScriptScanner::SkipMultiLineComment() { |
| 226 ASSERT(c0_ == '*'); |
| 227 Advance(); |
| 228 |
| 229 while (c0_ >= 0) { |
| 230 char ch = c0_; |
| 231 Advance(); |
| 232 // If we have reached the end of the multi-line comment, we |
| 233 // consume the '/' and insert a whitespace. This way all |
| 234 // multi-line comments are treated as whitespace - even the ones |
| 235 // containing line terminators. This contradicts ECMA-262, section |
| 236 // 7.4, page 12, that says that multi-line comments containing |
| 237 // line terminators should be treated as a line terminator, but it |
| 238 // matches the behaviour of SpiderMonkey and KJS. |
| 239 if (ch == '*' && c0_ == '/') { |
| 240 c0_ = ' '; |
| 241 return Token::WHITESPACE; |
| 242 } |
| 243 } |
| 244 |
| 245 // Unterminated multi-line comment. |
| 246 return Token::ILLEGAL; |
| 247 } |
| 248 |
| 249 |
| 250 Token::Value JavaScriptScanner::ScanHtmlComment() { |
| 251 // Check for <!-- comments. |
| 252 ASSERT(c0_ == '!'); |
| 253 Advance(); |
| 254 if (c0_ == '-') { |
| 255 Advance(); |
| 256 if (c0_ == '-') return SkipSingleLineComment(); |
| 257 PushBack('-'); // undo Advance() |
| 258 } |
| 259 PushBack('!'); // undo Advance() |
| 260 ASSERT(c0_ == '!'); |
| 261 return Token::LT; |
| 262 } |
| 263 |
| 264 |
| 265 void JavaScriptScanner::Scan() { |
| 266 next_.literal_chars = Vector<const char>(); |
| 267 Token::Value token; |
| 268 do { |
| 269 // Remember the position of the next token |
| 270 next_.location.beg_pos = source_pos(); |
| 271 |
| 272 switch (c0_) { |
| 273 case ' ': |
| 274 case '\t': |
| 275 Advance(); |
| 276 token = Token::WHITESPACE; |
| 277 break; |
| 278 |
| 279 case '\n': |
| 280 Advance(); |
| 281 has_line_terminator_before_next_ = true; |
| 282 token = Token::WHITESPACE; |
| 283 break; |
| 284 |
| 285 case '"': case '\'': |
| 286 token = ScanString(); |
| 287 break; |
| 288 |
| 289 case '<': |
| 290 // < <= << <<= <!-- |
| 291 Advance(); |
| 292 if (c0_ == '=') { |
| 293 token = Select(Token::LTE); |
| 294 } else if (c0_ == '<') { |
| 295 token = Select('=', Token::ASSIGN_SHL, Token::SHL); |
| 296 } else if (c0_ == '!') { |
| 297 token = ScanHtmlComment(); |
| 298 } else { |
| 299 token = Token::LT; |
| 300 } |
| 301 break; |
| 302 |
| 303 case '>': |
| 304 // > >= >> >>= >>> >>>= |
| 305 Advance(); |
| 306 if (c0_ == '=') { |
| 307 token = Select(Token::GTE); |
| 308 } else if (c0_ == '>') { |
| 309 // >> >>= >>> >>>= |
| 310 Advance(); |
| 311 if (c0_ == '=') { |
| 312 token = Select(Token::ASSIGN_SAR); |
| 313 } else if (c0_ == '>') { |
| 314 token = Select('=', Token::ASSIGN_SHR, Token::SHR); |
| 315 } else { |
| 316 token = Token::SAR; |
| 317 } |
| 318 } else { |
| 319 token = Token::GT; |
| 320 } |
| 321 break; |
| 322 |
| 323 case '=': |
| 324 // = == === |
| 325 Advance(); |
| 326 if (c0_ == '=') { |
| 327 token = Select('=', Token::EQ_STRICT, Token::EQ); |
| 328 } else { |
| 329 token = Token::ASSIGN; |
| 330 } |
| 331 break; |
| 332 |
| 333 case '!': |
| 334 // ! != !== |
| 335 Advance(); |
| 336 if (c0_ == '=') { |
| 337 token = Select('=', Token::NE_STRICT, Token::NE); |
| 338 } else { |
| 339 token = Token::NOT; |
| 340 } |
| 341 break; |
| 342 |
| 343 case '+': |
| 344 // + ++ += |
| 345 Advance(); |
| 346 if (c0_ == '+') { |
| 347 token = Select(Token::INC); |
| 348 } else if (c0_ == '=') { |
| 349 token = Select(Token::ASSIGN_ADD); |
| 350 } else { |
| 351 token = Token::ADD; |
| 352 } |
| 353 break; |
| 354 |
| 355 case '-': |
| 356 // - -- --> -= |
| 357 Advance(); |
| 358 if (c0_ == '-') { |
| 359 Advance(); |
| 360 if (c0_ == '>' && has_line_terminator_before_next_) { |
| 361 // For compatibility with SpiderMonkey, we skip lines that |
| 362 // start with an HTML comment end '-->'. |
| 363 token = SkipSingleLineComment(); |
| 364 } else { |
| 365 token = Token::DEC; |
| 366 } |
| 367 } else if (c0_ == '=') { |
| 368 token = Select(Token::ASSIGN_SUB); |
| 369 } else { |
| 370 token = Token::SUB; |
| 371 } |
| 372 break; |
| 373 |
| 374 case '*': |
| 375 // * *= |
| 376 token = Select('=', Token::ASSIGN_MUL, Token::MUL); |
| 377 break; |
| 378 |
| 379 case '%': |
| 380 // % %= |
| 381 token = Select('=', Token::ASSIGN_MOD, Token::MOD); |
| 382 break; |
| 383 |
| 384 case '/': |
| 385 // / // /* /= |
| 386 Advance(); |
| 387 if (c0_ == '/') { |
| 388 token = SkipSingleLineComment(); |
| 389 } else if (c0_ == '*') { |
| 390 token = SkipMultiLineComment(); |
| 391 } else if (c0_ == '=') { |
| 392 token = Select(Token::ASSIGN_DIV); |
| 393 } else { |
| 394 token = Token::DIV; |
| 395 } |
| 396 break; |
| 397 |
| 398 case '&': |
| 399 // & && &= |
| 400 Advance(); |
| 401 if (c0_ == '&') { |
| 402 token = Select(Token::AND); |
| 403 } else if (c0_ == '=') { |
| 404 token = Select(Token::ASSIGN_BIT_AND); |
| 405 } else { |
| 406 token = Token::BIT_AND; |
| 407 } |
| 408 break; |
| 409 |
| 410 case '|': |
| 411 // | || |= |
| 412 Advance(); |
| 413 if (c0_ == '|') { |
| 414 token = Select(Token::OR); |
| 415 } else if (c0_ == '=') { |
| 416 token = Select(Token::ASSIGN_BIT_OR); |
| 417 } else { |
| 418 token = Token::BIT_OR; |
| 419 } |
| 420 break; |
| 421 |
| 422 case '^': |
| 423 // ^ ^= |
| 424 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); |
| 425 break; |
| 426 |
| 427 case '.': |
| 428 // . Number |
| 429 Advance(); |
| 430 if (IsDecimalDigit(c0_)) { |
| 431 token = ScanNumber(true); |
| 432 } else { |
| 433 token = Token::PERIOD; |
| 434 } |
| 435 break; |
| 436 |
| 437 case ':': |
| 438 token = Select(Token::COLON); |
| 439 break; |
| 440 |
| 441 case ';': |
| 442 token = Select(Token::SEMICOLON); |
| 443 break; |
| 444 |
| 445 case ',': |
| 446 token = Select(Token::COMMA); |
| 447 break; |
| 448 |
| 449 case '(': |
| 450 token = Select(Token::LPAREN); |
| 451 break; |
| 452 |
| 453 case ')': |
| 454 token = Select(Token::RPAREN); |
| 455 break; |
| 456 |
| 457 case '[': |
| 458 token = Select(Token::LBRACK); |
| 459 break; |
| 460 |
| 461 case ']': |
| 462 token = Select(Token::RBRACK); |
| 463 break; |
| 464 |
| 465 case '{': |
| 466 token = Select(Token::LBRACE); |
| 467 break; |
| 468 |
| 469 case '}': |
| 470 token = Select(Token::RBRACE); |
| 471 break; |
| 472 |
| 473 case '?': |
| 474 token = Select(Token::CONDITIONAL); |
| 475 break; |
| 476 |
| 477 case '~': |
| 478 token = Select(Token::BIT_NOT); |
| 479 break; |
| 480 |
| 481 default: |
| 482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { |
| 483 token = ScanIdentifier(); |
| 484 } else if (IsDecimalDigit(c0_)) { |
| 485 token = ScanNumber(false); |
| 486 } else if (SkipWhiteSpace()) { |
| 487 token = Token::WHITESPACE; |
| 488 } else if (c0_ < 0) { |
| 489 token = Token::EOS; |
| 490 } else { |
| 491 token = Select(Token::ILLEGAL); |
| 492 } |
| 493 break; |
| 494 } |
| 495 |
| 496 // Continue scanning for tokens as long as we're just skipping |
| 497 // whitespace. |
| 498 } while (token == Token::WHITESPACE); |
| 499 |
| 500 next_.location.end_pos = source_pos(); |
| 501 next_.token = token; |
| 502 } |
| 503 |
| 504 |
| 505 void JavaScriptScanner::SeekForward(int pos) { |
| 506 source_->SeekForward(pos - 1); |
| 507 Advance(); |
| 508 // This function is only called to seek to the location |
| 509 // of the end of a function (at the "}" token). It doesn't matter |
| 510 // whether there was a line terminator in the part we skip. |
| 511 has_line_terminator_before_next_ = false; |
| 512 Scan(); |
| 513 } |
| 514 |
| 515 |
| 516 void JavaScriptScanner::ScanEscape() { |
| 517 uc32 c = c0_; |
| 518 Advance(); |
| 519 |
| 520 // Skip escaped newlines. |
| 521 if (ScannerConstants::kIsLineTerminator.get(c)) { |
| 522 // Allow CR+LF newlines in multiline string literals. |
| 523 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 524 // Allow LF+CR newlines in multiline string literals. |
| 525 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 526 return; |
| 527 } |
| 528 |
| 529 switch (c) { |
| 530 case '\'': // fall through |
| 531 case '"' : // fall through |
| 532 case '\\': break; |
| 533 case 'b' : c = '\b'; break; |
| 534 case 'f' : c = '\f'; break; |
| 535 case 'n' : c = '\n'; break; |
| 536 case 'r' : c = '\r'; break; |
| 537 case 't' : c = '\t'; break; |
| 538 case 'u' : c = ScanHexEscape(c, 4); break; |
| 539 case 'v' : c = '\v'; break; |
| 540 case 'x' : c = ScanHexEscape(c, 2); break; |
| 541 case '0' : // fall through |
| 542 case '1' : // fall through |
| 543 case '2' : // fall through |
| 544 case '3' : // fall through |
| 545 case '4' : // fall through |
| 546 case '5' : // fall through |
| 547 case '6' : // fall through |
| 548 case '7' : c = ScanOctalEscape(c, 2); break; |
| 549 } |
| 550 |
| 551 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
| 552 // should be illegal, but they are commonly handled |
| 553 // as non-escaped characters by JS VMs. |
| 554 AddLiteralChar(c); |
| 555 } |
| 556 |
| 557 |
| 558 Token::Value JavaScriptScanner::ScanString() { |
| 559 uc32 quote = c0_; |
| 560 Advance(); // consume quote |
| 561 |
| 562 LiteralScope literal(this); |
| 563 while (c0_ != quote && c0_ >= 0 |
| 564 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 565 uc32 c = c0_; |
| 566 Advance(); |
| 567 if (c == '\\') { |
| 568 if (c0_ < 0) return Token::ILLEGAL; |
| 569 ScanEscape(); |
| 570 } else { |
| 571 AddLiteralChar(c); |
| 572 } |
| 573 } |
| 574 if (c0_ != quote) return Token::ILLEGAL; |
| 575 literal.Complete(); |
| 576 |
| 577 Advance(); // consume quote |
| 578 return Token::STRING; |
| 579 } |
| 580 |
| 581 |
| 582 void JavaScriptScanner::ScanDecimalDigits() { |
| 583 while (IsDecimalDigit(c0_)) |
| 584 AddLiteralCharAdvance(); |
| 585 } |
| 586 |
| 587 |
| 588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { |
| 589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 590 |
| 591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 592 |
| 593 LiteralScope literal(this); |
| 594 if (seen_period) { |
| 595 // we have already seen a decimal point of the float |
| 596 AddLiteralChar('.'); |
| 597 ScanDecimalDigits(); // we know we have at least one digit |
| 598 |
| 599 } else { |
| 600 // if the first character is '0' we must check for octals and hex |
| 601 if (c0_ == '0') { |
| 602 AddLiteralCharAdvance(); |
| 603 |
| 604 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
| 605 if (c0_ == 'x' || c0_ == 'X') { |
| 606 // hex number |
| 607 kind = HEX; |
| 608 AddLiteralCharAdvance(); |
| 609 if (!IsHexDigit(c0_)) { |
| 610 // we must have at least one hex digit after 'x'/'X' |
| 611 return Token::ILLEGAL; |
| 612 } |
| 613 while (IsHexDigit(c0_)) { |
| 614 AddLiteralCharAdvance(); |
| 615 } |
| 616 } else if ('0' <= c0_ && c0_ <= '7') { |
| 617 // (possible) octal number |
| 618 kind = OCTAL; |
| 619 while (true) { |
| 620 if (c0_ == '8' || c0_ == '9') { |
| 621 kind = DECIMAL; |
| 622 break; |
| 623 } |
| 624 if (c0_ < '0' || '7' < c0_) break; |
| 625 AddLiteralCharAdvance(); |
| 626 } |
| 627 } |
| 628 } |
| 629 |
| 630 // Parse decimal digits and allow trailing fractional part. |
| 631 if (kind == DECIMAL) { |
| 632 ScanDecimalDigits(); // optional |
| 633 if (c0_ == '.') { |
| 634 AddLiteralCharAdvance(); |
| 635 ScanDecimalDigits(); // optional |
| 636 } |
| 637 } |
| 638 } |
| 639 |
| 640 // scan exponent, if any |
| 641 if (c0_ == 'e' || c0_ == 'E') { |
| 642 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
| 643 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
| 644 // scan exponent |
| 645 AddLiteralCharAdvance(); |
| 646 if (c0_ == '+' || c0_ == '-') |
| 647 AddLiteralCharAdvance(); |
| 648 if (!IsDecimalDigit(c0_)) { |
| 649 // we must have at least one decimal digit after 'e'/'E' |
| 650 return Token::ILLEGAL; |
| 651 } |
| 652 ScanDecimalDigits(); |
| 653 } |
| 654 |
| 655 // The source character immediately following a numeric literal must |
| 656 // not be an identifier start or a decimal digit; see ECMA-262 |
| 657 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 658 // if the value is 0). |
| 659 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) |
| 660 return Token::ILLEGAL; |
| 661 |
| 662 literal.Complete(); |
| 663 |
| 664 return Token::NUMBER; |
| 665 } |
| 666 |
| 667 |
| 668 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { |
| 669 Advance(); |
| 670 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
| 671 Advance(); |
| 672 uc32 c = ScanHexEscape('u', 4); |
| 673 // We do not allow a unicode escape sequence to start another |
| 674 // unicode escape sequence. |
| 675 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 676 return c; |
| 677 } |
| 678 |
| 679 |
| 680 Token::Value JavaScriptScanner::ScanIdentifier() { |
| 681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
| 682 |
| 683 LiteralScope literal(this); |
| 684 KeywordMatcher keyword_match; |
| 685 |
| 686 // Scan identifier start character. |
| 687 if (c0_ == '\\') { |
| 688 uc32 c = ScanIdentifierUnicodeEscape(); |
| 689 // Only allow legal identifier start characters. |
| 690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
| 691 AddLiteralChar(c); |
| 692 keyword_match.Fail(); |
| 693 } else { |
| 694 AddLiteralChar(c0_); |
| 695 keyword_match.AddChar(c0_); |
| 696 Advance(); |
| 697 } |
| 698 |
| 699 // Scan the rest of the identifier characters. |
| 700 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
| 701 if (c0_ == '\\') { |
| 702 uc32 c = ScanIdentifierUnicodeEscape(); |
| 703 // Only allow legal identifier part characters. |
| 704 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
| 705 AddLiteralChar(c); |
| 706 keyword_match.Fail(); |
| 707 } else { |
| 708 AddLiteralChar(c0_); |
| 709 keyword_match.AddChar(c0_); |
| 710 Advance(); |
| 711 } |
| 712 } |
| 713 literal.Complete(); |
| 714 |
| 715 return keyword_match.token(); |
| 716 } |
| 717 |
| 718 |
| 719 |
| 720 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { |
| 721 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 722 bool in_character_class = false; |
| 723 |
| 724 // Previous token is either '/' or '/=', in the second case, the |
| 725 // pattern starts at =. |
| 726 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 727 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 728 |
| 729 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 730 // the scanner should pass uninterpreted bodies to the RegExp |
| 731 // constructor. |
| 732 LiteralScope literal(this); |
| 733 if (seen_equal) |
| 734 AddLiteralChar('='); |
| 735 |
| 736 while (c0_ != '/' || in_character_class) { |
| 737 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 738 if (c0_ == '\\') { // escaped character |
| 739 AddLiteralCharAdvance(); |
| 740 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 741 AddLiteralCharAdvance(); |
| 742 } else { // unescaped character |
| 743 if (c0_ == '[') in_character_class = true; |
| 744 if (c0_ == ']') in_character_class = false; |
| 745 AddLiteralCharAdvance(); |
| 746 } |
| 747 } |
| 748 Advance(); // consume '/' |
| 749 |
| 750 literal.Complete(); |
| 751 |
| 752 return true; |
| 753 } |
| 754 |
| 755 bool JavaScriptScanner::ScanRegExpFlags() { |
| 756 // Scan regular expression flags. |
| 757 LiteralScope literal(this); |
| 758 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
| 759 if (c0_ == '\\') { |
| 760 uc32 c = ScanIdentifierUnicodeEscape(); |
| 761 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 762 // We allow any escaped character, unlike the restriction on |
| 763 // IdentifierPart when it is used to build an IdentifierName. |
| 764 AddLiteralChar(c); |
| 765 continue; |
| 766 } |
| 767 } |
| 768 AddLiteralCharAdvance(); |
| 769 } |
| 770 literal.Complete(); |
| 771 |
| 772 next_.location.end_pos = source_pos() - 1; |
| 773 return true; |
| 774 } |
| 775 |
| 776 // ---------------------------------------------------------------------------- |
64 // Keyword Matcher | 777 // Keyword Matcher |
65 | 778 |
66 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { | 779 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { |
67 { "break", KEYWORD_PREFIX, Token::BREAK }, | 780 { "break", KEYWORD_PREFIX, Token::BREAK }, |
68 { NULL, C, Token::ILLEGAL }, | 781 { NULL, C, Token::ILLEGAL }, |
69 { NULL, D, Token::ILLEGAL }, | 782 { NULL, D, Token::ILLEGAL }, |
70 { "else", KEYWORD_PREFIX, Token::ELSE }, | 783 { "else", KEYWORD_PREFIX, Token::ELSE }, |
71 { NULL, F, Token::ILLEGAL }, | 784 { NULL, F, Token::ILLEGAL }, |
72 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 785 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
73 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 786 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
186 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
187 break; | 900 break; |
188 case UNMATCHABLE: | 901 case UNMATCHABLE: |
189 break; | 902 break; |
190 } | 903 } |
191 // On fallthrough, it's a failure. | 904 // On fallthrough, it's a failure. |
192 state_ = UNMATCHABLE; | 905 state_ = UNMATCHABLE; |
193 } | 906 } |
194 | 907 |
195 } } // namespace v8::internal | 908 } } // namespace v8::internal |
OLD | NEW |