| Index: src/scanner.cc
|
| diff --git a/src/scanner.cc b/src/scanner.cc
|
| index 26f840b23a5914e9e5d52c207d5f9ed904e15a9d..27768547fb7b56ab70d51b2df0aed7a093510d2e 100644
|
| --- a/src/scanner.cc
|
| +++ b/src/scanner.cc
|
| @@ -246,7 +246,8 @@ Token::Value Scanner::Next() {
|
| }
|
|
|
|
|
| -static inline bool IsByteOrderMark(uc32 c) {
|
| +// TODO(yangguo): check whether this is actually necessary.
|
| +static inline bool IsLittleEndianByteOrderMark(uc32 c) {
|
| // The Unicode value U+FFFE is guaranteed never to be assigned as a
|
| // Unicode character; this implies that in a Unicode context the
|
| // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
|
| @@ -254,7 +255,7 @@ static inline bool IsByteOrderMark(uc32 c) {
|
| // not be a U+FFFE character expressed in big-endian byte
|
| // order). Nevertheless, we check for it to be compatible with
|
| // Spidermonkey.
|
| - return c == 0xFEFF || c == 0xFFFE;
|
| + return c == 0xFFFE;
|
| }
|
|
|
|
|
| @@ -262,14 +263,14 @@ bool Scanner::SkipWhiteSpace() {
|
| int start_position = source_pos();
|
|
|
| while (true) {
|
| - // We treat byte-order marks (BOMs) as whitespace for better
|
| - // compatibility with Spidermonkey and other JavaScript engines.
|
| - while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) {
|
| - // IsWhiteSpace() includes line terminators!
|
| + while (true) {
|
| + // Advance as long as character is a WhiteSpace or LineTerminator.
|
| + // Remember if the latter is the case.
|
| if (unicode_cache_->IsLineTerminator(c0_)) {
|
| - // Ignore line terminators, but remember them. This is necessary
|
| - // for automatic semicolon insertion.
|
| has_line_terminator_before_next_ = true;
|
| + } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
|
| + !IsLittleEndianByteOrderMark(c0_)) {
|
| + break;
|
| }
|
| Advance();
|
| }
|
|
|