Index: src/scanner.cc |
diff --git a/src/scanner.cc b/src/scanner.cc |
index 8062faa2447403af9354c1304200401521ebb123..e63239d6eb0d3afed82d7af59498cc6c9a1cbfe4 100644 |
--- a/src/scanner.cc |
+++ b/src/scanner.cc |
@@ -245,6 +245,8 @@ bool Scanner::SkipWhiteSpace() { |
while (true) { |
while (true) { |
+ // The unicode cache accepts unsigned inputs. |
+ if (c0_ < 0) break; |
// Advance as long as character is a WhiteSpace or LineTerminator. |
// Remember if the latter is the case. |
if (unicode_cache_->IsLineTerminator(c0_)) { |
@@ -365,7 +367,7 @@ Token::Value Scanner::SkipMultiLineComment() { |
while (c0_ >= 0) { |
uc32 ch = c0_; |
Advance(); |
- if (unicode_cache_->IsLineTerminator(ch)) { |
+ if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
// Following ECMA-262, section 7.4, a comment containing |
// a newline will make the comment count as a line-terminator. |
has_multiline_comment_before_next_ = true; |
@@ -625,14 +627,14 @@ void Scanner::Scan() { |
break; |
default: |
- if (unicode_cache_->IsIdentifierStart(c0_)) { |
+ if (c0_ < 0) { |
+ token = Token::EOS; |
+ } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
token = ScanIdentifierOrKeyword(); |
} else if (IsDecimalDigit(c0_)) { |
token = ScanNumber(false); |
} else if (SkipWhiteSpace()) { |
token = Token::WHITESPACE; |
- } else if (c0_ < 0) { |
- token = Token::EOS; |
} else { |
token = Select(Token::ILLEGAL); |
} |
@@ -674,7 +676,7 @@ bool Scanner::ScanEscape() { |
Advance(); |
// Skip escaped newlines. |
- if (unicode_cache_->IsLineTerminator(c)) { |
+ if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { |
// Allow CR+LF newlines in multiline string literals. |
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
// Allow LF+CR newlines in multiline string literals. |
@@ -871,7 +873,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) { |
// not be an identifier start or a decimal digit; see ECMA-262 |
// section 7.8.3, page 17 (note that we read only one decimal digit |
// if the value is 0). |
- if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) |
+ if (IsDecimalDigit(c0_) || |
+ (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) |
return Token::ILLEGAL; |
literal.Complete(); |
@@ -1039,7 +1042,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
AddLiteralChar(first_char); |
// Scan the rest of the identifier characters. |
- while (unicode_cache_->IsIdentifierPart(c0_)) { |
+ while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
if (c0_ != '\\') { |
uc32 next_char = c0_; |
Advance(); |
@@ -1067,7 +1070,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { |
Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
// Scan the rest of the identifier characters. |
- while (unicode_cache_->IsIdentifierPart(c0_)) { |
+ while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
if (c0_ == '\\') { |
uc32 c = ScanIdentifierUnicodeEscape(); |
// Only allow legal identifier part characters. |
@@ -1106,10 +1109,10 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { |
} |
while (c0_ != '/' || in_character_class) { |
- if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
+ if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
if (c0_ == '\\') { // Escape sequence. |
AddLiteralCharAdvance(); |
- if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
+ if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
AddLiteralCharAdvance(); |
// If the escape allows more characters, i.e., \x??, \u????, or \c?, |
// only "safe" characters are allowed (letters, digits, underscore), |
@@ -1156,7 +1159,7 @@ bool Scanner::ScanLiteralUnicodeEscape() { |
bool Scanner::ScanRegExpFlags() { |
// Scan regular expression flags. |
LiteralScope literal(this); |
- while (unicode_cache_->IsIdentifierPart(c0_)) { |
+ while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
if (c0_ != '\\') { |
AddLiteralCharAdvance(); |
} else { |