Index: src/scanner.cc |
diff --git a/src/scanner.cc b/src/scanner.cc |
index 26f840b23a5914e9e5d52c207d5f9ed904e15a9d..27768547fb7b56ab70d51b2df0aed7a093510d2e 100644 |
--- a/src/scanner.cc |
+++ b/src/scanner.cc |
@@ -246,7 +246,8 @@ Token::Value Scanner::Next() { |
} |
-static inline bool IsByteOrderMark(uc32 c) { |
+// TODO(yangguo): check whether this is actually necessary. |
+static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
// The Unicode value U+FFFE is guaranteed never to be assigned as a |
// Unicode character; this implies that in a Unicode context the |
// 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
@@ -254,7 +255,7 @@ static inline bool IsByteOrderMark(uc32 c) { |
// not be a U+FFFE character expressed in big-endian byte |
// order). Nevertheless, we check for it to be compatible with |
// Spidermonkey. |
- return c == 0xFEFF || c == 0xFFFE; |
+ return c == 0xFFFE; |
} |
@@ -262,14 +263,14 @@ bool Scanner::SkipWhiteSpace() { |
int start_position = source_pos(); |
while (true) { |
- // We treat byte-order marks (BOMs) as whitespace for better |
- // compatibility with Spidermonkey and other JavaScript engines. |
- while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { |
- // IsWhiteSpace() includes line terminators! |
+ while (true) { |
+ // Advance as long as character is a WhiteSpace or LineTerminator. |
+ // Remember if the latter is the case. |
if (unicode_cache_->IsLineTerminator(c0_)) { |
- // Ignore line terminators, but remember them. This is necessary |
- // for automatic semicolon insertion. |
has_line_terminator_before_next_ = true; |
+ } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
+ !IsLittleEndianByteOrderMark(c0_)) { |
+ break; |
} |
Advance(); |
} |