OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
239 next_.location.end_pos = pos + 1; | 239 next_.location.end_pos = pos + 1; |
240 Advance(); | 240 Advance(); |
241 return current_.token; | 241 return current_.token; |
242 } | 242 } |
243 } | 243 } |
244 Scan(); | 244 Scan(); |
245 return current_.token; | 245 return current_.token; |
246 } | 246 } |
247 | 247 |
248 | 248 |
249 static inline bool IsByteOrderMark(uc32 c) { | 249 // TODO(yangguo): check whether this is actually necessary. |
| 250 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
250 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 251 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
251 // Unicode character; this implies that in a Unicode context the | 252 // Unicode character; this implies that in a Unicode context the |
252 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 253 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
253 // character expressed in little-endian byte order (since it could | 254 // character expressed in little-endian byte order (since it could |
254 // not be a U+FFFE character expressed in big-endian byte | 255 // not be a U+FFFE character expressed in big-endian byte |
255 // order). Nevertheless, we check for it to be compatible with | 256 // order). Nevertheless, we check for it to be compatible with |
256 // Spidermonkey. | 257 // Spidermonkey. |
257 return c == 0xFEFF || c == 0xFFFE; | 258 return c == 0xFFFE; |
258 } | 259 } |
259 | 260 |
260 | 261 |
261 bool Scanner::SkipWhiteSpace() { | 262 bool Scanner::SkipWhiteSpace() { |
262 int start_position = source_pos(); | 263 int start_position = source_pos(); |
263 | 264 |
264 while (true) { | 265 while (true) { |
265 // We treat byte-order marks (BOMs) as whitespace for better | 266 while (true) { |
266 // compatibility with Spidermonkey and other JavaScript engines. | 267 // Advance as long as character is a WhiteSpace or LineTerminator. |
267 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { | 268 // Remember if the latter is the case. |
268 // IsWhiteSpace() includes line terminators! | |
269 if (unicode_cache_->IsLineTerminator(c0_)) { | 269 if (unicode_cache_->IsLineTerminator(c0_)) { |
270 // Ignore line terminators, but remember them. This is necessary | |
271 // for automatic semicolon insertion. | |
272 has_line_terminator_before_next_ = true; | 270 has_line_terminator_before_next_ = true; |
| 271 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
| 272 !IsLittleEndianByteOrderMark(c0_)) { |
| 273 break; |
273 } | 274 } |
274 Advance(); | 275 Advance(); |
275 } | 276 } |
276 | 277 |
277 // If there is an HTML comment end '-->' at the beginning of a | 278 // If there is an HTML comment end '-->' at the beginning of a |
278 // line (with only whitespace in front of it), we treat the rest | 279 // line (with only whitespace in front of it), we treat the rest |
279 // of the line as a comment. This is in line with the way | 280 // of the line as a comment. This is in line with the way |
280 // SpiderMonkey handles it. | 281 // SpiderMonkey handles it. |
281 if (c0_ == '-' && has_line_terminator_before_next_) { | 282 if (c0_ == '-' && has_line_terminator_before_next_) { |
282 Advance(); | 283 Advance(); |
(...skipping 959 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1242 } | 1243 } |
1243 backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u)); | 1244 backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u)); |
1244 } | 1245 } |
1245 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f)); | 1246 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f)); |
1246 | 1247 |
1247 backing_store_.AddBlock(bytes); | 1248 backing_store_.AddBlock(bytes); |
1248 return backing_store_.EndSequence().start(); | 1249 return backing_store_.EndSequence().start(); |
1249 } | 1250 } |
1250 | 1251 |
1251 } } // namespace v8::internal | 1252 } } // namespace v8::internal |
OLD | NEW |