| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
| 29 | 29 |
| 30 #include <cmath> | 30 #include <cmath> |
| 31 | 31 |
| 32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 | 33 |
| 34 #include "../include/v8stdint.h" | 34 #include "../include/v8stdint.h" |
| 35 #include "char-predicates-inl.h" | 35 #include "char-predicates-inl.h" |
| 36 #include "conversions-inl.h" | 36 #include "conversions-inl.h" |
| 37 #include "list-inl.h" | 37 #include "list-inl.h" |
| 38 #include "v8.h" |
| 38 | 39 |
| 39 namespace v8 { | 40 namespace v8 { |
| 40 namespace internal { | 41 namespace internal { |
| 41 | 42 |
| 43 |
| 44 #ifndef V8_USE_GENERATED_LEXER |
| 42 // ---------------------------------------------------------------------------- | 45 // ---------------------------------------------------------------------------- |
| 43 // Scanner | 46 // Scanner |
| 44 | 47 |
| 45 Scanner::Scanner(UnicodeCache* unicode_cache) | 48 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 46 : unicode_cache_(unicode_cache), | 49 : unicode_cache_(unicode_cache), |
| 47 octal_pos_(Location::invalid()), | 50 octal_pos_(Location::invalid()), |
| 48 harmony_scoping_(false), | 51 harmony_scoping_(false), |
| 49 harmony_modules_(false), | 52 harmony_modules_(false), |
| 50 harmony_numeric_literals_(false) { } | 53 harmony_numeric_literals_(false) { } |
| 51 | 54 |
| (...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 239 next_.location.end_pos = pos + 1; | 242 next_.location.end_pos = pos + 1; |
| 240 Advance(); | 243 Advance(); |
| 241 return current_.token; | 244 return current_.token; |
| 242 } | 245 } |
| 243 } | 246 } |
| 244 Scan(); | 247 Scan(); |
| 245 return current_.token; | 248 return current_.token; |
| 246 } | 249 } |
| 247 | 250 |
| 248 | 251 |
| 249 static inline bool IsByteOrderMark(uc32 c) { | 252 // TODO(yangguo): check whether this is actually necessary. |
| 253 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
| 250 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 254 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 251 // Unicode character; this implies that in a Unicode context the | 255 // Unicode character; this implies that in a Unicode context the |
| 252 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 256 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 253 // character expressed in little-endian byte order (since it could | 257 // character expressed in little-endian byte order (since it could |
| 254 // not be a U+FFFE character expressed in big-endian byte | 258 // not be a U+FFFE character expressed in big-endian byte |
| 255 // order). Nevertheless, we check for it to be compatible with | 259 // order). Nevertheless, we check for it to be compatible with |
| 256 // Spidermonkey. | 260 // Spidermonkey. |
| 257 return c == 0xFEFF || c == 0xFFFE; | 261 return c == 0xFFFE; |
| 258 } | 262 } |
| 259 | 263 |
| 260 | 264 |
| 261 bool Scanner::SkipWhiteSpace() { | 265 bool Scanner::SkipWhiteSpace() { |
| 262 int start_position = source_pos(); | 266 int start_position = source_pos(); |
| 263 | 267 |
| 264 while (true) { | 268 while (true) { |
| 265 // We treat byte-order marks (BOMs) as whitespace for better | 269 while (true) { |
| 266 // compatibility with Spidermonkey and other JavaScript engines. | 270 // Advance as long as character is a WhiteSpace or LineTerminator. |
| 267 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { | 271 // Remember if the latter is the case. |
| 268 // IsWhiteSpace() includes line terminators! | |
| 269 if (unicode_cache_->IsLineTerminator(c0_)) { | 272 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 270 // Ignore line terminators, but remember them. This is necessary | |
| 271 // for automatic semicolon insertion. | |
| 272 has_line_terminator_before_next_ = true; | 273 has_line_terminator_before_next_ = true; |
| 274 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
| 275 !IsLittleEndianByteOrderMark(c0_)) { |
| 276 break; |
| 273 } | 277 } |
| 274 Advance(); | 278 Advance(); |
| 275 } | 279 } |
| 276 | 280 |
| 277 // If there is an HTML comment end '-->' at the beginning of a | 281 // If there is an HTML comment end '-->' at the beginning of a |
| 278 // line (with only whitespace in front of it), we treat the rest | 282 // line (with only whitespace in front of it), we treat the rest |
| 279 // of the line as a comment. This is in line with the way | 283 // of the line as a comment. This is in line with the way |
| 280 // SpiderMonkey handles it. | 284 // SpiderMonkey handles it. |
| 281 if (c0_ == '-' && has_line_terminator_before_next_) { | 285 if (c0_ == '-' && has_line_terminator_before_next_) { |
| 282 Advance(); | 286 Advance(); |
| (...skipping 823 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1106 Advance(); | 1110 Advance(); |
| 1107 } | 1111 } |
| 1108 } | 1112 } |
| 1109 literal.Complete(); | 1113 literal.Complete(); |
| 1110 | 1114 |
| 1111 next_.location.end_pos = source_pos() - 1; | 1115 next_.location.end_pos = source_pos() - 1; |
| 1112 return true; | 1116 return true; |
| 1113 } | 1117 } |
| 1114 | 1118 |
| 1115 | 1119 |
| 1120 #endif |
| 1121 |
| 1122 |
| 1116 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) { | 1123 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) { |
| 1117 return AddSymbol(Vector<const byte>::cast(key), true, value); | 1124 return AddSymbol(Vector<const byte>::cast(key), true, value); |
| 1118 } | 1125 } |
| 1119 | 1126 |
| 1120 | 1127 |
| 1121 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) { | 1128 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) { |
| 1122 return AddSymbol(Vector<const byte>::cast(key), false, value); | 1129 return AddSymbol(Vector<const byte>::cast(key), false, value); |
| 1123 } | 1130 } |
| 1124 | 1131 |
| 1125 | 1132 |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1242 } | 1249 } |
| 1243 backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u)); | 1250 backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u)); |
| 1244 } | 1251 } |
| 1245 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f)); | 1252 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f)); |
| 1246 | 1253 |
| 1247 backing_store_.AddBlock(bytes); | 1254 backing_store_.AddBlock(bytes); |
| 1248 return backing_store_.EndSequence().start(); | 1255 return backing_store_.EndSequence().start(); |
| 1249 } | 1256 } |
| 1250 | 1257 |
| 1251 } } // namespace v8::internal | 1258 } } // namespace v8::internal |
| OLD | NEW |