| Index: src/scanner.cc
|
| ===================================================================
|
| --- src/scanner.cc (revision 5846)
|
| +++ src/scanner.cc (working copy)
|
| @@ -30,27 +30,15 @@
|
| #include "ast.h"
|
| #include "handles.h"
|
| #include "scanner.h"
|
| +#include "unicode-inl.h"
|
|
|
| namespace v8 {
|
| namespace internal {
|
|
|
| // ----------------------------------------------------------------------------
|
| -// Character predicates
|
| -
|
| -
|
| -unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
|
| -unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
|
| -unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
|
| -unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
|
| -
|
| -
|
| -StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
|
| -
|
| -
|
| -// ----------------------------------------------------------------------------
|
| // UTF8Buffer
|
|
|
| -UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
|
| +UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }
|
|
|
|
|
| UTF8Buffer::~UTF8Buffer() {}
|
| @@ -135,55 +123,6 @@
|
| }
|
|
|
|
|
| -// ExternalStringUTF16Buffer
|
| -template <typename StringType, typename CharType>
|
| -ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
|
| - : raw_data_(NULL) { }
|
| -
|
| -
|
| -template <typename StringType, typename CharType>
|
| -void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
|
| - Handle<StringType> data,
|
| - int start_position,
|
| - int end_position) {
|
| - ASSERT(!data.is_null());
|
| - raw_data_ = data->resource()->data();
|
| -
|
| - ASSERT(end_position <= data->length());
|
| - if (start_position > 0) {
|
| - SeekForward(start_position);
|
| - }
|
| - end_ =
|
| - end_position != Scanner::kNoEndPosition ? end_position : data->length();
|
| -}
|
| -
|
| -
|
| -template <typename StringType, typename CharType>
|
| -uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
|
| - if (pos_ < end_) {
|
| - return raw_data_[pos_++];
|
| - } else {
|
| - // note: currently the following increment is necessary to avoid a
|
| - // test-parser problem!
|
| - pos_++;
|
| - return static_cast<uc32>(-1);
|
| - }
|
| -}
|
| -
|
| -
|
| -template <typename StringType, typename CharType>
|
| -void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
|
| - pos_--;
|
| - ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
|
| - ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
|
| -}
|
| -
|
| -
|
| -template <typename StringType, typename CharType>
|
| -void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
|
| - pos_ = pos;
|
| -}
|
| -
|
| // ----------------------------------------------------------------------------
|
| // Scanner::LiteralScope
|
|
|
| @@ -309,7 +248,7 @@
|
| }
|
|
|
|
|
| -void Scanner::AddChar(uc32 c) {
|
| +void Scanner::AddLiteralChar(uc32 c) {
|
| literal_buffer_.AddChar(c);
|
| }
|
|
|
| @@ -324,8 +263,8 @@
|
| }
|
|
|
|
|
| -void Scanner::AddCharAdvance() {
|
| - AddChar(c0_);
|
| +void Scanner::AddLiteralCharAdvance() {
|
| + AddLiteralChar(c0_);
|
| Advance();
|
| }
|
|
|
| @@ -358,9 +297,9 @@
|
| while (true) {
|
| // We treat byte-order marks (BOMs) as whitespace for better
|
| // compatibility with Spidermonkey and other JavaScript engines.
|
| - while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
|
| + while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
|
| // IsWhiteSpace() includes line terminators!
|
| - if (kIsLineTerminator.get(c0_)) {
|
| + if (ScannerConstants::kIsLineTerminator.get(c0_)) {
|
| // Ignore line terminators, but remember them. This is necessary
|
| // for automatic semicolon insertion.
|
| has_line_terminator_before_next_ = true;
|
| @@ -400,7 +339,7 @@
|
| // separately by the lexical grammar and becomes part of the
|
| // stream of input elements for the syntactic grammar (see
|
| // ECMA-262, section 7.4, page 12).
|
| - while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
|
| + while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
|
| Advance();
|
| }
|
|
|
| @@ -537,29 +476,29 @@
|
| // Check for control character (0x00-0x1f) or unterminated string (<0).
|
| if (c0_ < 0x20) return Token::ILLEGAL;
|
| if (c0_ != '\\') {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| } else {
|
| Advance();
|
| switch (c0_) {
|
| case '"':
|
| case '\\':
|
| case '/':
|
| - AddChar(c0_);
|
| + AddLiteralChar(c0_);
|
| break;
|
| case 'b':
|
| - AddChar('\x08');
|
| + AddLiteralChar('\x08');
|
| break;
|
| case 'f':
|
| - AddChar('\x0c');
|
| + AddLiteralChar('\x0c');
|
| break;
|
| case 'n':
|
| - AddChar('\x0a');
|
| + AddLiteralChar('\x0a');
|
| break;
|
| case 'r':
|
| - AddChar('\x0d');
|
| + AddLiteralChar('\x0d');
|
| break;
|
| case 't':
|
| - AddChar('\x09');
|
| + AddLiteralChar('\x09');
|
| break;
|
| case 'u': {
|
| uc32 value = 0;
|
| @@ -571,7 +510,7 @@
|
| }
|
| value = value * 16 + digit;
|
| }
|
| - AddChar(value);
|
| + AddLiteralChar(value);
|
| break;
|
| }
|
| default:
|
| @@ -591,31 +530,31 @@
|
|
|
| Token::Value Scanner::ScanJsonNumber() {
|
| LiteralScope literal(this);
|
| - if (c0_ == '-') AddCharAdvance();
|
| + if (c0_ == '-') AddLiteralCharAdvance();
|
| if (c0_ == '0') {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| // Prefix zero is only allowed if it's the only digit before
|
| // a decimal point or exponent.
|
| if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
|
| } else {
|
| if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
|
| do {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| } while (c0_ >= '0' && c0_ <= '9');
|
| }
|
| if (c0_ == '.') {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
| do {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| } while (c0_ >= '0' && c0_ <= '9');
|
| }
|
| if (AsciiAlphaToLower(c0_) == 'e') {
|
| - AddCharAdvance();
|
| - if (c0_ == '-' || c0_ == '+') AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| + if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
|
| if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
| do {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| } while (c0_ >= '0' && c0_ <= '9');
|
| }
|
| literal.Complete();
|
| @@ -631,7 +570,7 @@
|
| Advance();
|
| text++;
|
| }
|
| - if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
|
| + if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
|
| literal.Complete();
|
| return token;
|
| }
|
| @@ -854,7 +793,7 @@
|
| break;
|
|
|
| default:
|
| - if (kIsIdentifierStart.get(c0_)) {
|
| + if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
|
| token = ScanIdentifier();
|
| } else if (IsDecimalDigit(c0_)) {
|
| token = ScanNumber(false);
|
| @@ -937,7 +876,7 @@
|
| Advance();
|
|
|
| // Skip escaped newlines.
|
| - if (kIsLineTerminator.get(c)) {
|
| + if (ScannerConstants::kIsLineTerminator.get(c)) {
|
| // Allow CR+LF newlines in multiline string literals.
|
| if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
|
| // Allow LF+CR newlines in multiline string literals.
|
| @@ -970,7 +909,7 @@
|
| // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
|
| // should be illegal, but they are commonly handled
|
| // as non-escaped characters by JS VMs.
|
| - AddChar(c);
|
| + AddLiteralChar(c);
|
| }
|
|
|
|
|
| @@ -979,14 +918,15 @@
|
| Advance(); // consume quote
|
|
|
| LiteralScope literal(this);
|
| - while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
|
| + while (c0_ != quote && c0_ >= 0
|
| + && !ScannerConstants::kIsLineTerminator.get(c0_)) {
|
| uc32 c = c0_;
|
| Advance();
|
| if (c == '\\') {
|
| if (c0_ < 0) return Token::ILLEGAL;
|
| ScanEscape();
|
| } else {
|
| - AddChar(c);
|
| + AddLiteralChar(c);
|
| }
|
| }
|
| if (c0_ != quote) return Token::ILLEGAL;
|
| @@ -1017,7 +957,7 @@
|
| // Returns true if any decimal digits were scanned, returns false otherwise.
|
| void Scanner::ScanDecimalDigits() {
|
| while (IsDecimalDigit(c0_))
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| }
|
|
|
|
|
| @@ -1029,25 +969,25 @@
|
| LiteralScope literal(this);
|
| if (seen_period) {
|
| // we have already seen a decimal point of the float
|
| - AddChar('.');
|
| + AddLiteralChar('.');
|
| ScanDecimalDigits(); // we know we have at least one digit
|
|
|
| } else {
|
| // if the first character is '0' we must check for octals and hex
|
| if (c0_ == '0') {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
|
|
| // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
|
| if (c0_ == 'x' || c0_ == 'X') {
|
| // hex number
|
| kind = HEX;
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| if (!IsHexDigit(c0_)) {
|
| // we must have at least one hex digit after 'x'/'X'
|
| return Token::ILLEGAL;
|
| }
|
| while (IsHexDigit(c0_)) {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| }
|
| } else if ('0' <= c0_ && c0_ <= '7') {
|
| // (possible) octal number
|
| @@ -1058,7 +998,7 @@
|
| break;
|
| }
|
| if (c0_ < '0' || '7' < c0_) break;
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| }
|
| }
|
| }
|
| @@ -1067,7 +1007,7 @@
|
| if (kind == DECIMAL) {
|
| ScanDecimalDigits(); // optional
|
| if (c0_ == '.') {
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| ScanDecimalDigits(); // optional
|
| }
|
| }
|
| @@ -1078,9 +1018,9 @@
|
| ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
|
| if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
|
| // scan exponent
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| if (c0_ == '+' || c0_ == '-')
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| if (!IsDecimalDigit(c0_)) {
|
| // we must have at least one decimal digit after 'e'/'E'
|
| return Token::ILLEGAL;
|
| @@ -1092,7 +1032,7 @@
|
| // not be an identifier start or a decimal digit; see ECMA-262
|
| // section 7.8.3, page 17 (note that we read only one decimal digit
|
| // if the value is 0).
|
| - if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
|
| + if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
|
| return Token::ILLEGAL;
|
|
|
| literal.Complete();
|
| @@ -1114,7 +1054,7 @@
|
|
|
|
|
| Token::Value Scanner::ScanIdentifier() {
|
| - ASSERT(kIsIdentifierStart.get(c0_));
|
| + ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
|
|
|
| LiteralScope literal(this);
|
| KeywordMatcher keyword_match;
|
| @@ -1123,25 +1063,25 @@
|
| if (c0_ == '\\') {
|
| uc32 c = ScanIdentifierUnicodeEscape();
|
| // Only allow legal identifier start characters.
|
| - if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
| - AddChar(c);
|
| + if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
| + AddLiteralChar(c);
|
| keyword_match.Fail();
|
| } else {
|
| - AddChar(c0_);
|
| + AddLiteralChar(c0_);
|
| keyword_match.AddChar(c0_);
|
| Advance();
|
| }
|
|
|
| // Scan the rest of the identifier characters.
|
| - while (kIsIdentifierPart.get(c0_)) {
|
| + while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
|
| if (c0_ == '\\') {
|
| uc32 c = ScanIdentifierUnicodeEscape();
|
| // Only allow legal identifier part characters.
|
| - if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
| - AddChar(c);
|
| + if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
| + AddLiteralChar(c);
|
| keyword_match.Fail();
|
| } else {
|
| - AddChar(c0_);
|
| + AddLiteralChar(c0_);
|
| keyword_match.AddChar(c0_);
|
| Advance();
|
| }
|
| @@ -1153,17 +1093,6 @@
|
|
|
|
|
|
|
| -bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
|
| - // Checks whether the buffer contains an identifier (no escape).
|
| - if (!buffer->has_more()) return false;
|
| - if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
|
| - while (buffer->has_more()) {
|
| - if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -
|
| bool Scanner::ScanRegExpPattern(bool seen_equal) {
|
| // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
|
| bool in_character_class = false;
|
| @@ -1178,18 +1107,18 @@
|
| // constructor.
|
| LiteralScope literal(this);
|
| if (seen_equal)
|
| - AddChar('=');
|
| + AddLiteralChar('=');
|
|
|
| while (c0_ != '/' || in_character_class) {
|
| - if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
| + if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
| if (c0_ == '\\') { // escaped character
|
| - AddCharAdvance();
|
| - if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| + if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
| + AddLiteralCharAdvance();
|
| } else { // unescaped character
|
| if (c0_ == '[') in_character_class = true;
|
| if (c0_ == ']') in_character_class = false;
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| }
|
| }
|
| Advance(); // consume '/'
|
| @@ -1202,17 +1131,17 @@
|
| bool Scanner::ScanRegExpFlags() {
|
| // Scan regular expression flags.
|
| LiteralScope literal(this);
|
| - while (kIsIdentifierPart.get(c0_)) {
|
| + while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
|
| if (c0_ == '\\') {
|
| uc32 c = ScanIdentifierUnicodeEscape();
|
| if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
|
| // We allow any escaped character, unlike the restriction on
|
| // IdentifierPart when it is used to build an IdentifierName.
|
| - AddChar(c);
|
| + AddLiteralChar(c);
|
| continue;
|
| }
|
| }
|
| - AddCharAdvance();
|
| + AddLiteralCharAdvance();
|
| }
|
| literal.Complete();
|
|
|
|
|