| OLD | NEW | 
|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 //     * Redistributions of source code must retain the above copyright | 6 //     * Redistributions of source code must retain the above copyright | 
| 7 //       notice, this list of conditions and the following disclaimer. | 7 //       notice, this list of conditions and the following disclaimer. | 
| 8 //     * Redistributions in binary form must reproduce the above | 8 //     * Redistributions in binary form must reproduce the above | 
| 9 //       copyright notice, this list of conditions and the following | 9 //       copyright notice, this list of conditions and the following | 
| 10 //       disclaimer in the documentation and/or other materials provided | 10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 30 matching lines...) Expand all  Loading... | 
| 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 
| 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 
| 43 | 43 | 
| 44 | 44 | 
| 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 
| 46 | 46 | 
| 47 | 47 | 
| 48 // ---------------------------------------------------------------------------- | 48 // ---------------------------------------------------------------------------- | 
| 49 // UTF8Buffer | 49 // UTF8Buffer | 
| 50 | 50 | 
| 51 UTF8Buffer::UTF8Buffer() : data_(NULL) { | 51 UTF8Buffer::UTF8Buffer() { | 
| 52   Initialize(NULL, 0); | 52   static const int kInitialCapacity = 1 * KB; | 
|  | 53   data_ = NewArray<char>(kInitialCapacity); | 
|  | 54   limit_ = ComputeLimit(data_, kInitialCapacity); | 
|  | 55   Reset(); | 
|  | 56   ASSERT(Capacity() == kInitialCapacity && pos() == 0); | 
| 53 } | 57 } | 
| 54 | 58 | 
| 55 | 59 | 
| 56 UTF8Buffer::~UTF8Buffer() { | 60 UTF8Buffer::~UTF8Buffer() { | 
| 57   DeleteArray(data_); | 61   DeleteArray(data_); | 
| 58 } | 62 } | 
| 59 | 63 | 
| 60 | 64 | 
| 61 void UTF8Buffer::Initialize(char* src, int length) { | 65 void UTF8Buffer::AddCharSlow(uc32 c) { | 
| 62   DeleteArray(data_); | 66   static const int kCapacityGrowthLimit = 1 * MB; | 
| 63   data_ = src; | 67   if (cursor_ > limit_) { | 
| 64   size_ = length; | 68     int old_capacity = Capacity(); | 
| 65   Reset(); | 69     int old_position = pos(); | 
|  | 70     int new_capacity = | 
|  | 71         Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); | 
|  | 72     char* new_data = NewArray<char>(new_capacity); | 
|  | 73     memcpy(new_data, data_, old_position); | 
|  | 74     DeleteArray(data_); | 
|  | 75     data_ = new_data; | 
|  | 76     cursor_ = new_data + old_position; | 
|  | 77     limit_ = ComputeLimit(new_data, new_capacity); | 
|  | 78     ASSERT(Capacity() == new_capacity && pos() == old_position); | 
|  | 79   } | 
|  | 80   if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 
|  | 81     *cursor_++ = c;  // Common case: 7-bit ASCII. | 
|  | 82   } else { | 
|  | 83     cursor_ += unibrow::Utf8::Encode(cursor_, c); | 
|  | 84   } | 
|  | 85   ASSERT(pos() <= Capacity()); | 
| 66 } | 86 } | 
| 67 | 87 | 
| 68 | 88 | 
| 69 void UTF8Buffer::AddChar(uc32 c) { |  | 
| 70   const int min_size = 1024; |  | 
| 71   if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) { |  | 
| 72     int new_size = size_ * 2; |  | 
| 73     if (new_size < min_size) { |  | 
| 74       new_size = min_size; |  | 
| 75     } |  | 
| 76     char* new_data = NewArray<char>(new_size); |  | 
| 77     memcpy(new_data, data_, pos_); |  | 
| 78     DeleteArray(data_); |  | 
| 79     data_ = new_data; |  | 
| 80     size_ = new_size; |  | 
| 81   } |  | 
| 82   if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) { |  | 
| 83     data_[pos_++] = c;  // common case: 7bit ASCII |  | 
| 84   } else { |  | 
| 85     pos_ += unibrow::Utf8::Encode(&data_[pos_], c); |  | 
| 86   } |  | 
| 87   ASSERT(pos_ <= size_); |  | 
| 88 } |  | 
| 89 |  | 
| 90 |  | 
| 91 // ---------------------------------------------------------------------------- | 89 // ---------------------------------------------------------------------------- | 
| 92 // UTF16Buffer | 90 // UTF16Buffer | 
| 93 | 91 | 
| 94 | 92 | 
| 95 UTF16Buffer::UTF16Buffer() | 93 UTF16Buffer::UTF16Buffer() | 
| 96   : pos_(0), | 94   : pos_(0), | 
| 97     pushback_buffer_(0), | 95     pushback_buffer_(0), | 
| 98     last_(0), | 96     last_(0), | 
| 99     stream_(NULL) { } | 97     stream_(NULL) { } | 
| 100 | 98 | 
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 165   source_.Initialize(source, stream); | 163   source_.Initialize(source, stream); | 
| 166   position_ = position; | 164   position_ = position; | 
| 167 | 165 | 
| 168   // Reset literals buffer | 166   // Reset literals buffer | 
| 169   literals_.Reset(); | 167   literals_.Reset(); | 
| 170 | 168 | 
| 171   // Set c0_ (one character ahead) | 169   // Set c0_ (one character ahead) | 
| 172   ASSERT(kCharacterLookaheadBufferSize == 1); | 170   ASSERT(kCharacterLookaheadBufferSize == 1); | 
| 173   Advance(); | 171   Advance(); | 
| 174 | 172 | 
| 175   // Skip initial whitespace (allowing HTML comment ends) and scan | 173   // Skip initial whitespace allowing HTML comment ends just like | 
| 176   // first token. | 174   // after a newline and scan first token. | 
| 177   SkipWhiteSpace(true); | 175   has_line_terminator_before_next_ = true; | 
|  | 176   SkipWhiteSpace(); | 
| 178   Scan(); | 177   Scan(); | 
| 179 } | 178 } | 
| 180 | 179 | 
| 181 | 180 | 
| 182 Handle<String> Scanner::SubString(int start, int end) { | 181 Handle<String> Scanner::SubString(int start, int end) { | 
| 183   return source_.SubString(start - position_, end - position_); | 182   return source_.SubString(start - position_, end - position_); | 
| 184 } | 183 } | 
| 185 | 184 | 
| 186 | 185 | 
| 187 Token::Value Scanner::Next() { | 186 Token::Value Scanner::Next() { | 
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 239   // Unicode character; this implies that in a Unicode context the | 238   // Unicode character; this implies that in a Unicode context the | 
| 240   // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 239   // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 
| 241   // character expressed in little-endian byte order (since it could | 240   // character expressed in little-endian byte order (since it could | 
| 242   // not be a U+FFFE character expressed in big-endian byte | 241   // not be a U+FFFE character expressed in big-endian byte | 
| 243   // order). Nevertheless, we check for it to be compatible with | 242   // order). Nevertheless, we check for it to be compatible with | 
| 244   // Spidermonkey. | 243   // Spidermonkey. | 
| 245   return c == 0xFEFF || c == 0xFFFE; | 244   return c == 0xFEFF || c == 0xFFFE; | 
| 246 } | 245 } | 
| 247 | 246 | 
| 248 | 247 | 
| 249 void Scanner::SkipWhiteSpace(bool initial) { | 248 bool Scanner::SkipWhiteSpace() { | 
| 250   has_line_terminator_before_next_ = initial; | 249   int start_position = source_pos(); | 
| 251 | 250 | 
| 252   while (true) { | 251   while (true) { | 
| 253     // We treat byte-order marks (BOMs) as whitespace for better | 252     // We treat byte-order marks (BOMs) as whitespace for better | 
| 254     // compatibility with Spidermonkey and other JavaScript engines. | 253     // compatibility with Spidermonkey and other JavaScript engines. | 
| 255     while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 254     while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 
| 256       // IsWhiteSpace() includes line terminators! | 255       // IsWhiteSpace() includes line terminators! | 
| 257       if (kIsLineTerminator.get(c0_)) | 256       if (kIsLineTerminator.get(c0_)) { | 
| 258         // Ignore line terminators, but remember them. This is necessary | 257         // Ignore line terminators, but remember them. This is necessary | 
| 259         // for automatic semicolon insertion. | 258         // for automatic semicolon insertion. | 
| 260         has_line_terminator_before_next_ = true; | 259         has_line_terminator_before_next_ = true; | 
|  | 260       } | 
| 261       Advance(); | 261       Advance(); | 
| 262     } | 262     } | 
| 263 | 263 | 
| 264     // If there is an HTML comment end '-->' at the beginning of a | 264     // If there is an HTML comment end '-->' at the beginning of a | 
| 265     // line (with only whitespace in front of it), we treat the rest | 265     // line (with only whitespace in front of it), we treat the rest | 
| 266     // of the line as a comment. This is in line with the way | 266     // of the line as a comment. This is in line with the way | 
| 267     // SpiderMonkey handles it. | 267     // SpiderMonkey handles it. | 
| 268     if (c0_ == '-' && has_line_terminator_before_next_) { | 268     if (c0_ == '-' && has_line_terminator_before_next_) { | 
| 269       Advance(); | 269       Advance(); | 
| 270       if (c0_ == '-') { | 270       if (c0_ == '-') { | 
| 271         Advance(); | 271         Advance(); | 
| 272         if (c0_ == '>') { | 272         if (c0_ == '>') { | 
| 273           // Treat the rest of the line as a comment. | 273           // Treat the rest of the line as a comment. | 
| 274           SkipSingleLineComment(); | 274           SkipSingleLineComment(); | 
| 275           // Continue skipping white space after the comment. | 275           // Continue skipping white space after the comment. | 
| 276           continue; | 276           continue; | 
| 277         } | 277         } | 
| 278         PushBack('-');  // undo Advance() | 278         PushBack('-');  // undo Advance() | 
| 279       } | 279       } | 
| 280       PushBack('-');  // undo Advance() | 280       PushBack('-');  // undo Advance() | 
| 281     } | 281     } | 
| 282     return; | 282     // Return whether or not we skipped any characters. | 
|  | 283     return source_pos() != start_position; | 
| 283   } | 284   } | 
| 284 } | 285 } | 
| 285 | 286 | 
| 286 | 287 | 
| 287 Token::Value Scanner::SkipSingleLineComment() { | 288 Token::Value Scanner::SkipSingleLineComment() { | 
| 288   Advance(); | 289   Advance(); | 
| 289 | 290 | 
| 290   // The line terminator at the end of the line is not considered | 291   // The line terminator at the end of the line is not considered | 
| 291   // to be part of the single-line comment; it is recognized | 292   // to be part of the single-line comment; it is recognized | 
| 292   // separately by the lexical grammar and becomes part of the | 293   // separately by the lexical grammar and becomes part of the | 
| 293   // stream of input elements for the syntactic grammar (see | 294   // stream of input elements for the syntactic grammar (see | 
| 294   // ECMA-262, section 7.4, page 12). | 295   // ECMA-262, section 7.4, page 12). | 
| 295   while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 296   while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 
| 296     Advance(); | 297     Advance(); | 
| 297   } | 298   } | 
| 298 | 299 | 
| 299   return Token::COMMENT; | 300   return Token::WHITESPACE; | 
| 300 } | 301 } | 
| 301 | 302 | 
| 302 | 303 | 
| 303 Token::Value Scanner::SkipMultiLineComment() { | 304 Token::Value Scanner::SkipMultiLineComment() { | 
| 304   ASSERT(c0_ == '*'); | 305   ASSERT(c0_ == '*'); | 
| 305   Advance(); | 306   Advance(); | 
| 306 | 307 | 
| 307   while (c0_ >= 0) { | 308   while (c0_ >= 0) { | 
| 308     char ch = c0_; | 309     char ch = c0_; | 
| 309     Advance(); | 310     Advance(); | 
| 310     // If we have reached the end of the multi-line comment, we | 311     // If we have reached the end of the multi-line comment, we | 
| 311     // consume the '/' and insert a whitespace. This way all | 312     // consume the '/' and insert a whitespace. This way all | 
| 312     // multi-line comments are treated as whitespace - even the ones | 313     // multi-line comments are treated as whitespace - even the ones | 
| 313     // containing line terminators. This contradicts ECMA-262, section | 314     // containing line terminators. This contradicts ECMA-262, section | 
| 314     // 7.4, page 12, that says that multi-line comments containing | 315     // 7.4, page 12, that says that multi-line comments containing | 
| 315     // line terminators should be treated as a line terminator, but it | 316     // line terminators should be treated as a line terminator, but it | 
| 316     // matches the behaviour of SpiderMonkey and KJS. | 317     // matches the behaviour of SpiderMonkey and KJS. | 
| 317     if (ch == '*' && c0_ == '/') { | 318     if (ch == '*' && c0_ == '/') { | 
| 318       c0_ = ' '; | 319       c0_ = ' '; | 
| 319       return Token::COMMENT; | 320       return Token::WHITESPACE; | 
| 320     } | 321     } | 
| 321   } | 322   } | 
| 322 | 323 | 
| 323   // Unterminated multi-line comment. | 324   // Unterminated multi-line comment. | 
| 324   return Token::ILLEGAL; | 325   return Token::ILLEGAL; | 
| 325 } | 326 } | 
| 326 | 327 | 
| 327 | 328 | 
| 328 Token::Value Scanner::ScanHtmlComment() { | 329 Token::Value Scanner::ScanHtmlComment() { | 
| 329   // Check for <!-- comments. | 330   // Check for <!-- comments. | 
| 330   ASSERT(c0_ == '!'); | 331   ASSERT(c0_ == '!'); | 
| 331   Advance(); | 332   Advance(); | 
| 332   if (c0_ == '-') { | 333   if (c0_ == '-') { | 
| 333     Advance(); | 334     Advance(); | 
| 334     if (c0_ == '-') return SkipSingleLineComment(); | 335     if (c0_ == '-') return SkipSingleLineComment(); | 
| 335     PushBack('-');  // undo Advance() | 336     PushBack('-');  // undo Advance() | 
| 336   } | 337   } | 
| 337   PushBack('!');  // undo Advance() | 338   PushBack('!');  // undo Advance() | 
| 338   ASSERT(c0_ == '!'); | 339   ASSERT(c0_ == '!'); | 
| 339   return Token::LT; | 340   return Token::LT; | 
| 340 } | 341 } | 
| 341 | 342 | 
| 342 | 343 | 
| 343 void Scanner::Scan() { | 344 void Scanner::Scan() { | 
| 344   Token::Value token; | 345   Token::Value token; | 
| 345   bool has_line_terminator = false; | 346   has_line_terminator_before_next_ = false; | 
| 346   do { | 347   do { | 
| 347     SkipWhiteSpace(has_line_terminator); |  | 
| 348 |  | 
| 349     // Remember the line terminator in previous loop |  | 
| 350     has_line_terminator = has_line_terminator_before_next(); |  | 
| 351 |  | 
| 352     // Remember the position of the next token | 348     // Remember the position of the next token | 
| 353     next_.location.beg_pos = source_pos(); | 349     next_.location.beg_pos = source_pos(); | 
| 354 | 350 | 
| 355     token = ScanToken(); | 351     switch (c0_) { | 
| 356   } while (token == Token::COMMENT); | 352       case ' ': | 
|  | 353       case '\t': | 
|  | 354         Advance(); | 
|  | 355         token = Token::WHITESPACE; | 
|  | 356         break; | 
|  | 357 | 
|  | 358       case '\n': | 
|  | 359         Advance(); | 
|  | 360         has_line_terminator_before_next_ = true; | 
|  | 361         token = Token::WHITESPACE; | 
|  | 362         break; | 
|  | 363 | 
|  | 364       case '"': case '\'': | 
|  | 365         token = ScanString(); | 
|  | 366         break; | 
|  | 367 | 
|  | 368       case '<': | 
|  | 369         // < <= << <<= <!-- | 
|  | 370         Advance(); | 
|  | 371         if (c0_ == '=') { | 
|  | 372           token = Select(Token::LTE); | 
|  | 373         } else if (c0_ == '<') { | 
|  | 374           token = Select('=', Token::ASSIGN_SHL, Token::SHL); | 
|  | 375         } else if (c0_ == '!') { | 
|  | 376           token = ScanHtmlComment(); | 
|  | 377         } else { | 
|  | 378           token = Token::LT; | 
|  | 379         } | 
|  | 380         break; | 
|  | 381 | 
|  | 382       case '>': | 
|  | 383         // > >= >> >>= >>> >>>= | 
|  | 384         Advance(); | 
|  | 385         if (c0_ == '=') { | 
|  | 386           token = Select(Token::GTE); | 
|  | 387         } else if (c0_ == '>') { | 
|  | 388           // >> >>= >>> >>>= | 
|  | 389           Advance(); | 
|  | 390           if (c0_ == '=') { | 
|  | 391             token = Select(Token::ASSIGN_SAR); | 
|  | 392           } else if (c0_ == '>') { | 
|  | 393             token = Select('=', Token::ASSIGN_SHR, Token::SHR); | 
|  | 394           } else { | 
|  | 395             token = Token::SAR; | 
|  | 396           } | 
|  | 397         } else { | 
|  | 398           token = Token::GT; | 
|  | 399         } | 
|  | 400         break; | 
|  | 401 | 
|  | 402       case '=': | 
|  | 403         // = == === | 
|  | 404         Advance(); | 
|  | 405         if (c0_ == '=') { | 
|  | 406           token = Select('=', Token::EQ_STRICT, Token::EQ); | 
|  | 407         } else { | 
|  | 408           token = Token::ASSIGN; | 
|  | 409         } | 
|  | 410         break; | 
|  | 411 | 
|  | 412       case '!': | 
|  | 413         // ! != !== | 
|  | 414         Advance(); | 
|  | 415         if (c0_ == '=') { | 
|  | 416           token = Select('=', Token::NE_STRICT, Token::NE); | 
|  | 417         } else { | 
|  | 418           token = Token::NOT; | 
|  | 419         } | 
|  | 420         break; | 
|  | 421 | 
|  | 422       case '+': | 
|  | 423         // + ++ += | 
|  | 424         Advance(); | 
|  | 425         if (c0_ == '+') { | 
|  | 426           token = Select(Token::INC); | 
|  | 427         } else if (c0_ == '=') { | 
|  | 428           token = Select(Token::ASSIGN_ADD); | 
|  | 429         } else { | 
|  | 430           token = Token::ADD; | 
|  | 431         } | 
|  | 432         break; | 
|  | 433 | 
|  | 434       case '-': | 
|  | 435         // - -- --> -= | 
|  | 436         Advance(); | 
|  | 437         if (c0_ == '-') { | 
|  | 438           Advance(); | 
|  | 439           if (c0_ == '>' && has_line_terminator_before_next_) { | 
|  | 440             // For compatibility with SpiderMonkey, we skip lines that | 
|  | 441             // start with an HTML comment end '-->'. | 
|  | 442             token = SkipSingleLineComment(); | 
|  | 443           } else { | 
|  | 444             token = Token::DEC; | 
|  | 445           } | 
|  | 446         } else if (c0_ == '=') { | 
|  | 447           token = Select(Token::ASSIGN_SUB); | 
|  | 448         } else { | 
|  | 449           token = Token::SUB; | 
|  | 450         } | 
|  | 451         break; | 
|  | 452 | 
|  | 453       case '*': | 
|  | 454         // * *= | 
|  | 455         token = Select('=', Token::ASSIGN_MUL, Token::MUL); | 
|  | 456         break; | 
|  | 457 | 
|  | 458       case '%': | 
|  | 459         // % %= | 
|  | 460         token = Select('=', Token::ASSIGN_MOD, Token::MOD); | 
|  | 461         break; | 
|  | 462 | 
|  | 463       case '/': | 
|  | 464         // /  // /* /= | 
|  | 465         Advance(); | 
|  | 466         if (c0_ == '/') { | 
|  | 467           token = SkipSingleLineComment(); | 
|  | 468         } else if (c0_ == '*') { | 
|  | 469           token = SkipMultiLineComment(); | 
|  | 470         } else if (c0_ == '=') { | 
|  | 471           token = Select(Token::ASSIGN_DIV); | 
|  | 472         } else { | 
|  | 473           token = Token::DIV; | 
|  | 474         } | 
|  | 475         break; | 
|  | 476 | 
|  | 477       case '&': | 
|  | 478         // & && &= | 
|  | 479         Advance(); | 
|  | 480         if (c0_ == '&') { | 
|  | 481           token = Select(Token::AND); | 
|  | 482         } else if (c0_ == '=') { | 
|  | 483           token = Select(Token::ASSIGN_BIT_AND); | 
|  | 484         } else { | 
|  | 485           token = Token::BIT_AND; | 
|  | 486         } | 
|  | 487         break; | 
|  | 488 | 
|  | 489       case '|': | 
|  | 490         // | || |= | 
|  | 491         Advance(); | 
|  | 492         if (c0_ == '|') { | 
|  | 493           token = Select(Token::OR); | 
|  | 494         } else if (c0_ == '=') { | 
|  | 495           token = Select(Token::ASSIGN_BIT_OR); | 
|  | 496         } else { | 
|  | 497           token = Token::BIT_OR; | 
|  | 498         } | 
|  | 499         break; | 
|  | 500 | 
|  | 501       case '^': | 
|  | 502         // ^ ^= | 
|  | 503         token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); | 
|  | 504         break; | 
|  | 505 | 
|  | 506       case '.': | 
|  | 507         // . Number | 
|  | 508         Advance(); | 
|  | 509         if (IsDecimalDigit(c0_)) { | 
|  | 510           token = ScanNumber(true); | 
|  | 511         } else { | 
|  | 512           token = Token::PERIOD; | 
|  | 513         } | 
|  | 514         break; | 
|  | 515 | 
|  | 516       case ':': | 
|  | 517         token = Select(Token::COLON); | 
|  | 518         break; | 
|  | 519 | 
|  | 520       case ';': | 
|  | 521         token = Select(Token::SEMICOLON); | 
|  | 522         break; | 
|  | 523 | 
|  | 524       case ',': | 
|  | 525         token = Select(Token::COMMA); | 
|  | 526         break; | 
|  | 527 | 
|  | 528       case '(': | 
|  | 529         token = Select(Token::LPAREN); | 
|  | 530         break; | 
|  | 531 | 
|  | 532       case ')': | 
|  | 533         token = Select(Token::RPAREN); | 
|  | 534         break; | 
|  | 535 | 
|  | 536       case '[': | 
|  | 537         token = Select(Token::LBRACK); | 
|  | 538         break; | 
|  | 539 | 
|  | 540       case ']': | 
|  | 541         token = Select(Token::RBRACK); | 
|  | 542         break; | 
|  | 543 | 
|  | 544       case '{': | 
|  | 545         token = Select(Token::LBRACE); | 
|  | 546         break; | 
|  | 547 | 
|  | 548       case '}': | 
|  | 549         token = Select(Token::RBRACE); | 
|  | 550         break; | 
|  | 551 | 
|  | 552       case '?': | 
|  | 553         token = Select(Token::CONDITIONAL); | 
|  | 554         break; | 
|  | 555 | 
|  | 556       case '~': | 
|  | 557         token = Select(Token::BIT_NOT); | 
|  | 558         break; | 
|  | 559 | 
|  | 560       default: | 
|  | 561         if (kIsIdentifierStart.get(c0_)) { | 
|  | 562           token = ScanIdentifier(); | 
|  | 563         } else if (IsDecimalDigit(c0_)) { | 
|  | 564           token = ScanNumber(false); | 
|  | 565         } else if (SkipWhiteSpace()) { | 
|  | 566           token = Token::WHITESPACE; | 
|  | 567         } else if (c0_ < 0) { | 
|  | 568           token = Token::EOS; | 
|  | 569         } else { | 
|  | 570           token = Select(Token::ILLEGAL); | 
|  | 571         } | 
|  | 572         break; | 
|  | 573     } | 
|  | 574 | 
|  | 575     // Continue scanning for tokens as long as we're just skipping | 
|  | 576     // whitespace. | 
|  | 577   } while (token == Token::WHITESPACE); | 
| 357 | 578 | 
| 358   next_.location.end_pos = source_pos(); | 579   next_.location.end_pos = source_pos(); | 
| 359   next_.token = token; | 580   next_.token = token; | 
| 360 } | 581 } | 
| 361 | 582 | 
| 362 | 583 | 
| 363 void Scanner::SeekForward(int pos) { | 584 void Scanner::SeekForward(int pos) { | 
| 364   source_.SeekForward(pos - 1); | 585   source_.SeekForward(pos - 1); | 
| 365   Advance(); | 586   Advance(); | 
| 366   Scan(); | 587   Scan(); | 
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 488   Advance(); | 709   Advance(); | 
| 489   if (c0_ == next) { | 710   if (c0_ == next) { | 
| 490     Advance(); | 711     Advance(); | 
| 491     return then; | 712     return then; | 
| 492   } else { | 713   } else { | 
| 493     return else_; | 714     return else_; | 
| 494   } | 715   } | 
| 495 } | 716 } | 
| 496 | 717 | 
| 497 | 718 | 
| 498 Token::Value Scanner::ScanToken() { |  | 
| 499   switch (c0_) { |  | 
| 500     // strings |  | 
| 501     case '"': case '\'': |  | 
| 502       return ScanString(); |  | 
| 503 |  | 
| 504     case '<': |  | 
| 505       // < <= << <<= <!-- |  | 
| 506       Advance(); |  | 
| 507       if (c0_ == '=') return Select(Token::LTE); |  | 
| 508       if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL); |  | 
| 509       if (c0_ == '!') return ScanHtmlComment(); |  | 
| 510       return Token::LT; |  | 
| 511 |  | 
| 512     case '>': |  | 
| 513       // > >= >> >>= >>> >>>= |  | 
| 514       Advance(); |  | 
| 515       if (c0_ == '=') return Select(Token::GTE); |  | 
| 516       if (c0_ == '>') { |  | 
| 517         // >> >>= >>> >>>= |  | 
| 518         Advance(); |  | 
| 519         if (c0_ == '=') return Select(Token::ASSIGN_SAR); |  | 
| 520         if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR); |  | 
| 521         return Token::SAR; |  | 
| 522       } |  | 
| 523       return Token::GT; |  | 
| 524 |  | 
| 525     case '=': |  | 
| 526       // = == === |  | 
| 527       Advance(); |  | 
| 528       if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ); |  | 
| 529       return Token::ASSIGN; |  | 
| 530 |  | 
| 531     case '!': |  | 
| 532       // ! != !== |  | 
| 533       Advance(); |  | 
| 534       if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE); |  | 
| 535       return Token::NOT; |  | 
| 536 |  | 
| 537     case '+': |  | 
| 538       // + ++ += |  | 
| 539       Advance(); |  | 
| 540       if (c0_ == '+') return Select(Token::INC); |  | 
| 541       if (c0_ == '=') return Select(Token::ASSIGN_ADD); |  | 
| 542       return Token::ADD; |  | 
| 543 |  | 
| 544     case '-': |  | 
| 545       // - -- -= |  | 
| 546       Advance(); |  | 
| 547       if (c0_ == '-') return Select(Token::DEC); |  | 
| 548       if (c0_ == '=') return Select(Token::ASSIGN_SUB); |  | 
| 549       return Token::SUB; |  | 
| 550 |  | 
| 551     case '*': |  | 
| 552       // * *= |  | 
| 553       return Select('=', Token::ASSIGN_MUL, Token::MUL); |  | 
| 554 |  | 
| 555     case '%': |  | 
| 556       // % %= |  | 
| 557       return Select('=', Token::ASSIGN_MOD, Token::MOD); |  | 
| 558 |  | 
| 559     case '/': |  | 
| 560       // /  // /* /= |  | 
| 561       Advance(); |  | 
| 562       if (c0_ == '/') return SkipSingleLineComment(); |  | 
| 563       if (c0_ == '*') return SkipMultiLineComment(); |  | 
| 564       if (c0_ == '=') return Select(Token::ASSIGN_DIV); |  | 
| 565       return Token::DIV; |  | 
| 566 |  | 
| 567     case '&': |  | 
| 568       // & && &= |  | 
| 569       Advance(); |  | 
| 570       if (c0_ == '&') return Select(Token::AND); |  | 
| 571       if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND); |  | 
| 572       return Token::BIT_AND; |  | 
| 573 |  | 
| 574     case '|': |  | 
| 575       // | || |= |  | 
| 576       Advance(); |  | 
| 577       if (c0_ == '|') return Select(Token::OR); |  | 
| 578       if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR); |  | 
| 579       return Token::BIT_OR; |  | 
| 580 |  | 
| 581     case '^': |  | 
| 582       // ^ ^= |  | 
| 583       return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); |  | 
| 584 |  | 
| 585     case '.': |  | 
| 586       // . Number |  | 
| 587       Advance(); |  | 
| 588       if (IsDecimalDigit(c0_)) return ScanNumber(true); |  | 
| 589       return Token::PERIOD; |  | 
| 590 |  | 
| 591     case ':': |  | 
| 592       return Select(Token::COLON); |  | 
| 593 |  | 
| 594     case ';': |  | 
| 595       return Select(Token::SEMICOLON); |  | 
| 596 |  | 
| 597     case ',': |  | 
| 598       return Select(Token::COMMA); |  | 
| 599 |  | 
| 600     case '(': |  | 
| 601       return Select(Token::LPAREN); |  | 
| 602 |  | 
| 603     case ')': |  | 
| 604       return Select(Token::RPAREN); |  | 
| 605 |  | 
| 606     case '[': |  | 
| 607       return Select(Token::LBRACK); |  | 
| 608 |  | 
| 609     case ']': |  | 
| 610       return Select(Token::RBRACK); |  | 
| 611 |  | 
| 612     case '{': |  | 
| 613       return Select(Token::LBRACE); |  | 
| 614 |  | 
| 615     case '}': |  | 
| 616       return Select(Token::RBRACE); |  | 
| 617 |  | 
| 618     case '?': |  | 
| 619       return Select(Token::CONDITIONAL); |  | 
| 620 |  | 
| 621     case '~': |  | 
| 622       return Select(Token::BIT_NOT); |  | 
| 623 |  | 
| 624     default: |  | 
| 625       if (kIsIdentifierStart.get(c0_)) |  | 
| 626         return ScanIdentifier(); |  | 
| 627       if (IsDecimalDigit(c0_)) |  | 
| 628         return ScanNumber(false); |  | 
| 629       if (c0_ < 0) |  | 
| 630         return Token::EOS; |  | 
| 631       return Select(Token::ILLEGAL); |  | 
| 632   } |  | 
| 633 |  | 
| 634   UNREACHABLE(); |  | 
| 635   return Token::ILLEGAL; |  | 
| 636 } |  | 
| 637 |  | 
| 638 |  | 
| 639 // Returns true if any decimal digits were scanned, returns false otherwise. | 719 // Returns true if any decimal digits were scanned, returns false otherwise. | 
| 640 void Scanner::ScanDecimalDigits() { | 720 void Scanner::ScanDecimalDigits() { | 
| 641   while (IsDecimalDigit(c0_)) | 721   while (IsDecimalDigit(c0_)) | 
| 642     AddCharAdvance(); | 722     AddCharAdvance(); | 
| 643 } | 723 } | 
| 644 | 724 | 
| 645 | 725 | 
| 646 Token::Value Scanner::ScanNumber(bool seen_period) { | 726 Token::Value Scanner::ScanNumber(bool seen_period) { | 
| 647   ASSERT(IsDecimalDigit(c0_));  // the first digit of the number or the fraction | 727   ASSERT(IsDecimalDigit(c0_));  // the first digit of the number or the fraction | 
| 648 | 728 | 
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 727   uc32 c = ScanHexEscape('u', 4); | 807   uc32 c = ScanHexEscape('u', 4); | 
| 728   // We do not allow a unicode escape sequence to start another | 808   // We do not allow a unicode escape sequence to start another | 
| 729   // unicode escape sequence. | 809   // unicode escape sequence. | 
| 730   if (c == '\\') return unibrow::Utf8::kBadChar; | 810   if (c == '\\') return unibrow::Utf8::kBadChar; | 
| 731   return c; | 811   return c; | 
| 732 } | 812 } | 
| 733 | 813 | 
| 734 | 814 | 
| 735 Token::Value Scanner::ScanIdentifier() { | 815 Token::Value Scanner::ScanIdentifier() { | 
| 736   ASSERT(kIsIdentifierStart.get(c0_)); | 816   ASSERT(kIsIdentifierStart.get(c0_)); | 
| 737 |  | 
| 738   bool has_escapes = false; | 817   bool has_escapes = false; | 
| 739 | 818 | 
| 740   StartLiteral(); | 819   StartLiteral(); | 
| 741   // Scan identifier start character. | 820   // Scan identifier start character. | 
| 742   if (c0_ == '\\') { | 821   if (c0_ == '\\') { | 
| 743     has_escapes = true; | 822     has_escapes = true; | 
| 744     uc32 c = ScanIdentifierUnicodeEscape(); | 823     uc32 c = ScanIdentifierUnicodeEscape(); | 
| 745     // Only allow legal identifier start characters. | 824     // Only allow legal identifier start characters. | 
| 746     if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 825     if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 
| 747     AddChar(c); | 826     AddChar(c); | 
| 748   } else { | 827   } else { | 
| 749     AddCharAdvance(); | 828     AddChar(c0_); | 
|  | 829     Advance(); | 
| 750   } | 830   } | 
|  | 831 | 
| 751   // Scan the rest of the identifier characters. | 832   // Scan the rest of the identifier characters. | 
| 752   while (kIsIdentifierPart.get(c0_)) { | 833   while (kIsIdentifierPart.get(c0_)) { | 
| 753     if (c0_ == '\\') { | 834     if (c0_ == '\\') { | 
| 754       has_escapes = true; | 835       has_escapes = true; | 
| 755       uc32 c = ScanIdentifierUnicodeEscape(); | 836       uc32 c = ScanIdentifierUnicodeEscape(); | 
| 756       // Only allow legal identifier part characters. | 837       // Only allow legal identifier part characters. | 
| 757       if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 838       if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 
| 758       AddChar(c); | 839       AddChar(c); | 
| 759     } else { | 840     } else { | 
| 760       AddCharAdvance(); | 841       AddChar(c0_); | 
|  | 842       Advance(); | 
| 761     } | 843     } | 
| 762   } | 844   } | 
| 763   TerminateLiteral(); | 845   TerminateLiteral(); | 
| 764 | 846 | 
| 765   // We don't have any 1-letter keywords (this is probably a common case). | 847   // We don't have any 1-letter keywords (this is probably a common case). | 
| 766   if ((next_.literal_end - next_.literal_pos) == 1) | 848   if ((next_.literal_end - next_.literal_pos) == 1) { | 
| 767     return Token::IDENTIFIER; | 849     return Token::IDENTIFIER; | 
|  | 850   } | 
| 768 | 851 | 
| 769   // If the identifier contains unicode escapes, it must not be | 852   // If the identifier contains unicode escapes, it must not be | 
| 770   // resolved to a keyword. | 853   // resolved to a keyword. | 
| 771   if (has_escapes) | 854   if (has_escapes) { | 
| 772     return Token::IDENTIFIER; | 855     return Token::IDENTIFIER; | 
|  | 856   } | 
| 773 | 857 | 
| 774   return Token::Lookup(&literals_.data()[next_.literal_pos]); | 858   return Token::Lookup(&literals_.data()[next_.literal_pos]); | 
| 775 } | 859 } | 
| 776 | 860 | 
| 777 | 861 | 
| 778 | 862 | 
| 779 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 863 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 
| 780   // Checks whether the buffer contains an identifier (no escape). | 864   // Checks whether the buffer contains an identifier (no escape). | 
| 781   if (!buffer->has_more()) return false; | 865   if (!buffer->has_more()) return false; | 
| 782   if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 866   if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 841     } | 925     } | 
| 842     AddCharAdvance(); | 926     AddCharAdvance(); | 
| 843   } | 927   } | 
| 844   TerminateLiteral(); | 928   TerminateLiteral(); | 
| 845 | 929 | 
| 846   next_.location.end_pos = source_pos() - 1; | 930   next_.location.end_pos = source_pos() - 1; | 
| 847   return true; | 931   return true; | 
| 848 } | 932 } | 
| 849 | 933 | 
| 850 } }  // namespace v8::internal | 934 } }  // namespace v8::internal | 
| OLD | NEW | 
|---|