| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 | 29 |
| 30 #include "ast.h" | 30 #include "ast.h" |
| 31 #include "handles.h" | 31 #include "handles.h" |
| 32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 #include "unicode-inl.h" | 33 #include "unicode-inl.h" |
| 34 | 34 |
| 35 namespace v8 { | 35 namespace v8 { |
| 36 namespace internal { | 36 namespace internal { |
| 37 | 37 |
| 38 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
| 39 // UTF8Buffer | |
| 40 | |
| 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { } | |
| 42 | |
| 43 | |
| 44 UTF8Buffer::~UTF8Buffer() {} | |
| 45 | |
| 46 | |
| 47 void UTF8Buffer::AddCharSlow(uc32 c) { | |
| 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | |
| 49 int length = unibrow::Utf8::Length(c); | |
| 50 Vector<char> block = buffer_.AddBlock(length, '\0'); | |
| 51 #ifdef DEBUG | |
| 52 int written_length = unibrow::Utf8::Encode(block.start(), c); | |
| 53 CHECK_EQ(length, written_length); | |
| 54 #else | |
| 55 unibrow::Utf8::Encode(block.start(), c); | |
| 56 #endif | |
| 57 } | |
| 58 | |
| 59 | |
| 60 // ---------------------------------------------------------------------------- | |
| 61 // UTF16Buffer | 39 // UTF16Buffer |
| 62 | 40 |
| 63 | |
| 64 UTF16Buffer::UTF16Buffer() | |
| 65 : pos_(0), end_(Scanner::kNoEndPosition) { } | |
| 66 | |
| 67 | |
| 68 // CharacterStreamUTF16Buffer | 41 // CharacterStreamUTF16Buffer |
| 69 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() | 42 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() |
| 70 : pushback_buffer_(0), last_(0), stream_(NULL) { } | 43 : pushback_buffer_(0), last_(0), stream_(NULL) { } |
| 71 | 44 |
| 72 | 45 |
| 73 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, | 46 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, |
| 74 unibrow::CharacterStream* input, | 47 unibrow::CharacterStream* input, |
| 75 int start_position, | 48 int start_position, |
| 76 int end_position) { | 49 int end_position) { |
| 77 stream_ = input; | 50 stream_ = input; |
| 78 if (start_position > 0) { | 51 if (start_position > 0) { |
| 79 SeekForward(start_position); | 52 SeekForward(start_position); |
| 80 } | 53 } |
| 81 end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt; | 54 end_ = end_position != kNoEndPosition ? end_position : kMaxInt; |
| 82 } | 55 } |
| 83 | 56 |
| 84 | 57 |
| 85 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { | 58 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { |
| 86 pushback_buffer()->Add(last_); | 59 pushback_buffer()->Add(last_); |
| 87 last_ = ch; | 60 last_ = ch; |
| 88 pos_--; | 61 pos_--; |
| 89 } | 62 } |
| 90 | 63 |
| 91 | 64 |
| 92 uc32 CharacterStreamUTF16Buffer::Advance() { | 65 uc32 CharacterStreamUTF16Buffer::Advance() { |
| 93 ASSERT(end_ != Scanner::kNoEndPosition); | 66 ASSERT(end_ != kNoEndPosition); |
| 94 ASSERT(end_ >= 0); | 67 ASSERT(end_ >= 0); |
| 95 // NOTE: It is of importance to Persian / Farsi resources that we do | 68 // NOTE: It is of importance to Persian / Farsi resources that we do |
| 96 // *not* strip format control characters in the scanner; see | 69 // *not* strip format control characters in the scanner; see |
| 97 // | 70 // |
| 98 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 | 71 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 |
| 99 // | 72 // |
| 100 // So, even though ECMA-262, section 7.1, page 11, dictates that we | 73 // So, even though ECMA-262, section 7.1, page 11, dictates that we |
| 101 // must remove Unicode format-control characters, we do not. This is | 74 // must remove Unicode format-control characters, we do not. This is |
| 102 // in line with how IE and SpiderMonkey handles it. | 75 // in line with how IE and SpiderMonkey handles it. |
| 103 if (!pushback_buffer()->is_empty()) { | 76 if (!pushback_buffer()->is_empty()) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 136 if (!complete_) scanner_->DropLiteral(); | 109 if (!complete_) scanner_->DropLiteral(); |
| 137 } | 110 } |
| 138 | 111 |
| 139 | 112 |
| 140 void Scanner::LiteralScope::Complete() { | 113 void Scanner::LiteralScope::Complete() { |
| 141 scanner_->TerminateLiteral(); | 114 scanner_->TerminateLiteral(); |
| 142 complete_ = true; | 115 complete_ = true; |
| 143 } | 116 } |
| 144 | 117 |
| 145 // ---------------------------------------------------------------------------- | 118 // ---------------------------------------------------------------------------- |
| 146 // Scanner | 119 // V8JavaScriptScanner |
| 147 | 120 |
| 148 Scanner::Scanner() | 121 void V8JavaScriptScanner::Initialize(Handle<String> source) { |
| 149 : has_line_terminator_before_next_(false), | 122 source_ = stream_initializer_.Init(source, NULL, 0, source->length()); |
| 150 is_parsing_json_(false), | 123 Init(); |
| 151 source_(NULL), | 124 // Skip initial whitespace allowing HTML comment ends just like |
| 152 stack_overflow_(false) {} | 125 // after a newline and scan first token. |
| 153 | 126 has_line_terminator_before_next_ = true; |
| 154 | 127 SkipWhiteSpace(); |
| 155 void Scanner::Initialize(Handle<String> source, | 128 Scan(); |
| 156 ParserLanguage language) { | |
| 157 Init(source, NULL, 0, source->length(), language); | |
| 158 } | 129 } |
| 159 | 130 |
| 160 | 131 |
| 161 void Scanner::Initialize(Handle<String> source, | 132 void V8JavaScriptScanner::Initialize(Handle<String> source, |
| 162 unibrow::CharacterStream* stream, | 133 unibrow::CharacterStream* stream) { |
| 163 ParserLanguage language) { | 134 source_ = stream_initializer_.Init(source, stream, |
| 164 Init(source, stream, 0, kNoEndPosition, language); | 135 0, UTF16Buffer::kNoEndPosition); |
| 136 Init(); |
| 137 // Skip initial whitespace allowing HTML comment ends just like |
| 138 // after a newline and scan first token. |
| 139 has_line_terminator_before_next_ = true; |
| 140 SkipWhiteSpace(); |
| 141 Scan(); |
| 165 } | 142 } |
| 166 | 143 |
| 167 | 144 |
| 168 void Scanner::Initialize(Handle<String> source, | 145 void V8JavaScriptScanner::Initialize(Handle<String> source, |
| 169 int start_position, | 146 int start_position, |
| 170 int end_position, | 147 int end_position) { |
| 171 ParserLanguage language) { | 148 source_ = stream_initializer_.Init(source, NULL, |
| 172 Init(source, NULL, start_position, end_position, language); | 149 start_position, end_position); |
| 150 Init(); |
| 151 // Skip initial whitespace allowing HTML comment ends just like |
| 152 // after a newline and scan first token. |
| 153 has_line_terminator_before_next_ = true; |
| 154 SkipWhiteSpace(); |
| 155 Scan(); |
| 173 } | 156 } |
| 174 | 157 |
| 175 | 158 |
| 176 void Scanner::Init(Handle<String> source, | 159 Token::Value V8JavaScriptScanner::NextCheckStack() { |
| 177 unibrow::CharacterStream* stream, | 160 // BUG 1215673: Find a thread safe way to set a stack limit in |
| 178 int start_position, | 161 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
| 179 int end_position, | 162 // threads. |
| 180 ParserLanguage language) { | 163 StackLimitCheck check; |
| 164 if (check.HasOverflowed()) { |
| 165 stack_overflow_ = true; |
| 166 current_ = next_; |
| 167 next_.token = Token::ILLEGAL; |
| 168 return current_.token; |
| 169 } else { |
| 170 return Next(); |
| 171 } |
| 172 } |
| 173 |
| 174 |
| 175 UTF16Buffer* StreamInitializer::Init(Handle<String> source, |
| 176 unibrow::CharacterStream* stream, |
| 177 int start_position, |
| 178 int end_position) { |
| 181 // Either initialize the scanner from a character stream or from a | 179 // Either initialize the scanner from a character stream or from a |
| 182 // string. | 180 // string. |
| 183 ASSERT(source.is_null() || stream == NULL); | 181 ASSERT(source.is_null() || stream == NULL); |
| 184 | 182 |
| 185 // Initialize the source buffer. | 183 // Initialize the source buffer. |
| 186 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 184 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
| 187 two_byte_string_buffer_.Initialize( | 185 two_byte_string_buffer_.Initialize( |
| 188 Handle<ExternalTwoByteString>::cast(source), | 186 Handle<ExternalTwoByteString>::cast(source), |
| 189 start_position, | 187 start_position, |
| 190 end_position); | 188 end_position); |
| 191 source_ = &two_byte_string_buffer_; | 189 return &two_byte_string_buffer_; |
| 192 } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) { | 190 } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) { |
| 193 ascii_string_buffer_.Initialize( | 191 ascii_string_buffer_.Initialize( |
| 194 Handle<ExternalAsciiString>::cast(source), | 192 Handle<ExternalAsciiString>::cast(source), |
| 195 start_position, | 193 start_position, |
| 196 end_position); | 194 end_position); |
| 197 source_ = &ascii_string_buffer_; | 195 return &ascii_string_buffer_; |
| 198 } else { | 196 } else { |
| 199 if (!source.is_null()) { | 197 if (!source.is_null()) { |
| 200 safe_string_input_buffer_.Reset(source.location()); | 198 safe_string_input_buffer_.Reset(source.location()); |
| 201 stream = &safe_string_input_buffer_; | 199 stream = &safe_string_input_buffer_; |
| 202 } | 200 } |
| 203 char_stream_buffer_.Initialize(source, | 201 char_stream_buffer_.Initialize(source, |
| 204 stream, | 202 stream, |
| 205 start_position, | 203 start_position, |
| 206 end_position); | 204 end_position); |
| 207 source_ = &char_stream_buffer_; | 205 return &char_stream_buffer_; |
| 208 } | 206 } |
| 207 } |
| 209 | 208 |
| 210 is_parsing_json_ = (language == JSON); | 209 // ---------------------------------------------------------------------------- |
| 210 // JsonScanner |
| 211 | 211 |
| 212 // Set c0_ (one character ahead) | 212 JsonScanner::JsonScanner() {} |
| 213 ASSERT(kCharacterLookaheadBufferSize == 1); | |
| 214 Advance(); | |
| 215 // Initialize current_ to not refer to a literal. | |
| 216 current_.literal_chars = Vector<const char>(); | |
| 217 // Reset literal buffer. | |
| 218 literal_buffer_.Reset(); | |
| 219 | 213 |
| 220 // Skip initial whitespace allowing HTML comment ends just like | 214 |
| 221 // after a newline and scan first token. | 215 void JsonScanner::Initialize(Handle<String> source) { |
| 222 has_line_terminator_before_next_ = true; | 216 source_ = stream_initializer_.Init(source, NULL, 0, source->length()); |
| 223 SkipWhiteSpace(); | 217 Init(); |
| 224 Scan(); | 218 // Skip initial whitespace. |
| 219 SkipJsonWhiteSpace(); |
| 220 // Preload first token as look-ahead. |
| 221 ScanJson(); |
| 225 } | 222 } |
| 226 | 223 |
| 227 | 224 |
| 228 Token::Value Scanner::Next() { | 225 Token::Value JsonScanner::Next() { |
| 229 // BUG 1215673: Find a thread safe way to set a stack limit in | 226 // BUG 1215673: Find a thread safe way to set a stack limit in |
| 230 // pre-parse mode. Otherwise, we cannot safely pre-parse from other | 227 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
| 231 // threads. | 228 // threads. |
| 232 current_ = next_; | 229 current_ = next_; |
| 233 // Check for stack-overflow before returning any tokens. | 230 // Check for stack-overflow before returning any tokens. |
| 234 StackLimitCheck check; | 231 StackLimitCheck check; |
| 235 if (check.HasOverflowed()) { | 232 if (check.HasOverflowed()) { |
| 236 stack_overflow_ = true; | 233 stack_overflow_ = true; |
| 237 next_.token = Token::ILLEGAL; | 234 next_.token = Token::ILLEGAL; |
| 238 } else { | 235 } else { |
| 239 has_line_terminator_before_next_ = false; | 236 ScanJson(); |
| 240 Scan(); | |
| 241 } | 237 } |
| 242 return current_.token; | 238 return current_.token; |
| 243 } | 239 } |
| 244 | 240 |
| 245 | 241 |
| 246 void Scanner::StartLiteral() { | 242 bool JsonScanner::SkipJsonWhiteSpace() { |
| 247 literal_buffer_.StartLiteral(); | |
| 248 } | |
| 249 | |
| 250 | |
| 251 void Scanner::AddLiteralChar(uc32 c) { | |
| 252 literal_buffer_.AddChar(c); | |
| 253 } | |
| 254 | |
| 255 | |
| 256 void Scanner::TerminateLiteral() { | |
| 257 next_.literal_chars = literal_buffer_.EndLiteral(); | |
| 258 } | |
| 259 | |
| 260 | |
| 261 void Scanner::DropLiteral() { | |
| 262 literal_buffer_.DropLiteral(); | |
| 263 } | |
| 264 | |
| 265 | |
| 266 void Scanner::AddLiteralCharAdvance() { | |
| 267 AddLiteralChar(c0_); | |
| 268 Advance(); | |
| 269 } | |
| 270 | |
| 271 | |
| 272 static inline bool IsByteOrderMark(uc32 c) { | |
| 273 // The Unicode value U+FFFE is guaranteed never to be assigned as a | |
| 274 // Unicode character; this implies that in a Unicode context the | |
| 275 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | |
| 276 // character expressed in little-endian byte order (since it could | |
| 277 // not be a U+FFFE character expressed in big-endian byte | |
| 278 // order). Nevertheless, we check for it to be compatible with | |
| 279 // Spidermonkey. | |
| 280 return c == 0xFEFF || c == 0xFFFE; | |
| 281 } | |
| 282 | |
| 283 | |
| 284 bool Scanner::SkipJsonWhiteSpace() { | |
| 285 int start_position = source_pos(); | 243 int start_position = source_pos(); |
| 286 // JSON WhiteSpace is tab, carrige-return, newline and space. | 244 // JSON WhiteSpace is tab, carrige-return, newline and space. |
| 287 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { | 245 while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { |
| 288 Advance(); | 246 Advance(); |
| 289 } | 247 } |
| 290 return source_pos() != start_position; | 248 return source_pos() != start_position; |
| 291 } | 249 } |
| 292 | 250 |
| 293 | 251 |
| 294 bool Scanner::SkipJavaScriptWhiteSpace() { | 252 void JsonScanner::ScanJson() { |
| 295 int start_position = source_pos(); | |
| 296 | |
| 297 while (true) { | |
| 298 // We treat byte-order marks (BOMs) as whitespace for better | |
| 299 // compatibility with Spidermonkey and other JavaScript engines. | |
| 300 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | |
| 301 // IsWhiteSpace() includes line terminators! | |
| 302 if (ScannerConstants::kIsLineTerminator.get(c0_)) { | |
| 303 // Ignore line terminators, but remember them. This is necessary | |
| 304 // for automatic semicolon insertion. | |
| 305 has_line_terminator_before_next_ = true; | |
| 306 } | |
| 307 Advance(); | |
| 308 } | |
| 309 | |
| 310 // If there is an HTML comment end '-->' at the beginning of a | |
| 311 // line (with only whitespace in front of it), we treat the rest | |
| 312 // of the line as a comment. This is in line with the way | |
| 313 // SpiderMonkey handles it. | |
| 314 if (c0_ == '-' && has_line_terminator_before_next_) { | |
| 315 Advance(); | |
| 316 if (c0_ == '-') { | |
| 317 Advance(); | |
| 318 if (c0_ == '>') { | |
| 319 // Treat the rest of the line as a comment. | |
| 320 SkipSingleLineComment(); | |
| 321 // Continue skipping white space after the comment. | |
| 322 continue; | |
| 323 } | |
| 324 PushBack('-'); // undo Advance() | |
| 325 } | |
| 326 PushBack('-'); // undo Advance() | |
| 327 } | |
| 328 // Return whether or not we skipped any characters. | |
| 329 return source_pos() != start_position; | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 | |
| 334 Token::Value Scanner::SkipSingleLineComment() { | |
| 335 Advance(); | |
| 336 | |
| 337 // The line terminator at the end of the line is not considered | |
| 338 // to be part of the single-line comment; it is recognized | |
| 339 // separately by the lexical grammar and becomes part of the | |
| 340 // stream of input elements for the syntactic grammar (see | |
| 341 // ECMA-262, section 7.4, page 12). | |
| 342 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | |
| 343 Advance(); | |
| 344 } | |
| 345 | |
| 346 return Token::WHITESPACE; | |
| 347 } | |
| 348 | |
| 349 | |
| 350 Token::Value Scanner::SkipMultiLineComment() { | |
| 351 ASSERT(c0_ == '*'); | |
| 352 Advance(); | |
| 353 | |
| 354 while (c0_ >= 0) { | |
| 355 char ch = c0_; | |
| 356 Advance(); | |
| 357 // If we have reached the end of the multi-line comment, we | |
| 358 // consume the '/' and insert a whitespace. This way all | |
| 359 // multi-line comments are treated as whitespace - even the ones | |
| 360 // containing line terminators. This contradicts ECMA-262, section | |
| 361 // 7.4, page 12, that says that multi-line comments containing | |
| 362 // line terminators should be treated as a line terminator, but it | |
| 363 // matches the behaviour of SpiderMonkey and KJS. | |
| 364 if (ch == '*' && c0_ == '/') { | |
| 365 c0_ = ' '; | |
| 366 return Token::WHITESPACE; | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 // Unterminated multi-line comment. | |
| 371 return Token::ILLEGAL; | |
| 372 } | |
| 373 | |
| 374 | |
| 375 Token::Value Scanner::ScanHtmlComment() { | |
| 376 // Check for <!-- comments. | |
| 377 ASSERT(c0_ == '!'); | |
| 378 Advance(); | |
| 379 if (c0_ == '-') { | |
| 380 Advance(); | |
| 381 if (c0_ == '-') return SkipSingleLineComment(); | |
| 382 PushBack('-'); // undo Advance() | |
| 383 } | |
| 384 PushBack('!'); // undo Advance() | |
| 385 ASSERT(c0_ == '!'); | |
| 386 return Token::LT; | |
| 387 } | |
| 388 | |
| 389 | |
| 390 | |
| 391 void Scanner::ScanJson() { | |
| 392 next_.literal_chars = Vector<const char>(); | 253 next_.literal_chars = Vector<const char>(); |
| 393 Token::Value token; | 254 Token::Value token; |
| 394 has_line_terminator_before_next_ = false; | |
| 395 do { | 255 do { |
| 396 // Remember the position of the next token | 256 // Remember the position of the next token |
| 397 next_.location.beg_pos = source_pos(); | 257 next_.location.beg_pos = source_pos(); |
| 398 switch (c0_) { | 258 switch (c0_) { |
| 399 case '\t': | 259 case '\t': |
| 400 case '\r': | 260 case '\r': |
| 401 case '\n': | 261 case '\n': |
| 402 case ' ': | 262 case ' ': |
| 403 Advance(); | 263 Advance(); |
| 404 token = Token::WHITESPACE; | 264 token = Token::WHITESPACE; |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 461 token = Select(Token::ILLEGAL); | 321 token = Select(Token::ILLEGAL); |
| 462 } | 322 } |
| 463 } | 323 } |
| 464 } while (token == Token::WHITESPACE); | 324 } while (token == Token::WHITESPACE); |
| 465 | 325 |
| 466 next_.location.end_pos = source_pos(); | 326 next_.location.end_pos = source_pos(); |
| 467 next_.token = token; | 327 next_.token = token; |
| 468 } | 328 } |
| 469 | 329 |
| 470 | 330 |
| 471 Token::Value Scanner::ScanJsonString() { | 331 Token::Value JsonScanner::ScanJsonString() { |
| 472 ASSERT_EQ('"', c0_); | 332 ASSERT_EQ('"', c0_); |
| 473 Advance(); | 333 Advance(); |
| 474 LiteralScope literal(this); | 334 LiteralScope literal(this); |
| 475 while (c0_ != '"' && c0_ > 0) { | 335 while (c0_ != '"' && c0_ > 0) { |
| 476 // Check for control character (0x00-0x1f) or unterminated string (<0). | 336 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 477 if (c0_ < 0x20) return Token::ILLEGAL; | 337 if (c0_ < 0x20) return Token::ILLEGAL; |
| 478 if (c0_ != '\\') { | 338 if (c0_ != '\\') { |
| 479 AddLiteralCharAdvance(); | 339 AddLiteralCharAdvance(); |
| 480 } else { | 340 } else { |
| 481 Advance(); | 341 Advance(); |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 521 } | 381 } |
| 522 if (c0_ != '"') { | 382 if (c0_ != '"') { |
| 523 return Token::ILLEGAL; | 383 return Token::ILLEGAL; |
| 524 } | 384 } |
| 525 literal.Complete(); | 385 literal.Complete(); |
| 526 Advance(); | 386 Advance(); |
| 527 return Token::STRING; | 387 return Token::STRING; |
| 528 } | 388 } |
| 529 | 389 |
| 530 | 390 |
| 531 Token::Value Scanner::ScanJsonNumber() { | 391 Token::Value JsonScanner::ScanJsonNumber() { |
| 532 LiteralScope literal(this); | 392 LiteralScope literal(this); |
| 533 if (c0_ == '-') AddLiteralCharAdvance(); | 393 if (c0_ == '-') AddLiteralCharAdvance(); |
| 534 if (c0_ == '0') { | 394 if (c0_ == '0') { |
| 535 AddLiteralCharAdvance(); | 395 AddLiteralCharAdvance(); |
| 536 // Prefix zero is only allowed if it's the only digit before | 396 // Prefix zero is only allowed if it's the only digit before |
| 537 // a decimal point or exponent. | 397 // a decimal point or exponent. |
| 538 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | 398 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
| 539 } else { | 399 } else { |
| 540 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | 400 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
| 541 do { | 401 do { |
| (...skipping 13 matching lines...) Expand all Loading... |
| 555 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | 415 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
| 556 do { | 416 do { |
| 557 AddLiteralCharAdvance(); | 417 AddLiteralCharAdvance(); |
| 558 } while (c0_ >= '0' && c0_ <= '9'); | 418 } while (c0_ >= '0' && c0_ <= '9'); |
| 559 } | 419 } |
| 560 literal.Complete(); | 420 literal.Complete(); |
| 561 return Token::NUMBER; | 421 return Token::NUMBER; |
| 562 } | 422 } |
| 563 | 423 |
| 564 | 424 |
| 565 Token::Value Scanner::ScanJsonIdentifier(const char* text, | 425 Token::Value JsonScanner::ScanJsonIdentifier(const char* text, |
| 566 Token::Value token) { | 426 Token::Value token) { |
| 567 LiteralScope literal(this); | 427 LiteralScope literal(this); |
| 568 while (*text != '\0') { | 428 while (*text != '\0') { |
| 569 if (c0_ != *text) return Token::ILLEGAL; | 429 if (c0_ != *text) return Token::ILLEGAL; |
| 570 Advance(); | 430 Advance(); |
| 571 text++; | 431 text++; |
| 572 } | 432 } |
| 573 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | 433 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
| 574 literal.Complete(); | 434 literal.Complete(); |
| 575 return token; | 435 return token; |
| 576 } | 436 } |
| 577 | 437 |
| 578 | 438 |
| 579 void Scanner::ScanJavaScript() { | |
| 580 next_.literal_chars = Vector<const char>(); | |
| 581 Token::Value token; | |
| 582 do { | |
| 583 // Remember the position of the next token | |
| 584 next_.location.beg_pos = source_pos(); | |
| 585 | |
| 586 switch (c0_) { | |
| 587 case ' ': | |
| 588 case '\t': | |
| 589 Advance(); | |
| 590 token = Token::WHITESPACE; | |
| 591 break; | |
| 592 | |
| 593 case '\n': | |
| 594 Advance(); | |
| 595 has_line_terminator_before_next_ = true; | |
| 596 token = Token::WHITESPACE; | |
| 597 break; | |
| 598 | |
| 599 case '"': case '\'': | |
| 600 token = ScanString(); | |
| 601 break; | |
| 602 | |
| 603 case '<': | |
| 604 // < <= << <<= <!-- | |
| 605 Advance(); | |
| 606 if (c0_ == '=') { | |
| 607 token = Select(Token::LTE); | |
| 608 } else if (c0_ == '<') { | |
| 609 token = Select('=', Token::ASSIGN_SHL, Token::SHL); | |
| 610 } else if (c0_ == '!') { | |
| 611 token = ScanHtmlComment(); | |
| 612 } else { | |
| 613 token = Token::LT; | |
| 614 } | |
| 615 break; | |
| 616 | |
| 617 case '>': | |
| 618 // > >= >> >>= >>> >>>= | |
| 619 Advance(); | |
| 620 if (c0_ == '=') { | |
| 621 token = Select(Token::GTE); | |
| 622 } else if (c0_ == '>') { | |
| 623 // >> >>= >>> >>>= | |
| 624 Advance(); | |
| 625 if (c0_ == '=') { | |
| 626 token = Select(Token::ASSIGN_SAR); | |
| 627 } else if (c0_ == '>') { | |
| 628 token = Select('=', Token::ASSIGN_SHR, Token::SHR); | |
| 629 } else { | |
| 630 token = Token::SAR; | |
| 631 } | |
| 632 } else { | |
| 633 token = Token::GT; | |
| 634 } | |
| 635 break; | |
| 636 | |
| 637 case '=': | |
| 638 // = == === | |
| 639 Advance(); | |
| 640 if (c0_ == '=') { | |
| 641 token = Select('=', Token::EQ_STRICT, Token::EQ); | |
| 642 } else { | |
| 643 token = Token::ASSIGN; | |
| 644 } | |
| 645 break; | |
| 646 | |
| 647 case '!': | |
| 648 // ! != !== | |
| 649 Advance(); | |
| 650 if (c0_ == '=') { | |
| 651 token = Select('=', Token::NE_STRICT, Token::NE); | |
| 652 } else { | |
| 653 token = Token::NOT; | |
| 654 } | |
| 655 break; | |
| 656 | |
| 657 case '+': | |
| 658 // + ++ += | |
| 659 Advance(); | |
| 660 if (c0_ == '+') { | |
| 661 token = Select(Token::INC); | |
| 662 } else if (c0_ == '=') { | |
| 663 token = Select(Token::ASSIGN_ADD); | |
| 664 } else { | |
| 665 token = Token::ADD; | |
| 666 } | |
| 667 break; | |
| 668 | |
| 669 case '-': | |
| 670 // - -- --> -= | |
| 671 Advance(); | |
| 672 if (c0_ == '-') { | |
| 673 Advance(); | |
| 674 if (c0_ == '>' && has_line_terminator_before_next_) { | |
| 675 // For compatibility with SpiderMonkey, we skip lines that | |
| 676 // start with an HTML comment end '-->'. | |
| 677 token = SkipSingleLineComment(); | |
| 678 } else { | |
| 679 token = Token::DEC; | |
| 680 } | |
| 681 } else if (c0_ == '=') { | |
| 682 token = Select(Token::ASSIGN_SUB); | |
| 683 } else { | |
| 684 token = Token::SUB; | |
| 685 } | |
| 686 break; | |
| 687 | |
| 688 case '*': | |
| 689 // * *= | |
| 690 token = Select('=', Token::ASSIGN_MUL, Token::MUL); | |
| 691 break; | |
| 692 | |
| 693 case '%': | |
| 694 // % %= | |
| 695 token = Select('=', Token::ASSIGN_MOD, Token::MOD); | |
| 696 break; | |
| 697 | |
| 698 case '/': | |
| 699 // / // /* /= | |
| 700 Advance(); | |
| 701 if (c0_ == '/') { | |
| 702 token = SkipSingleLineComment(); | |
| 703 } else if (c0_ == '*') { | |
| 704 token = SkipMultiLineComment(); | |
| 705 } else if (c0_ == '=') { | |
| 706 token = Select(Token::ASSIGN_DIV); | |
| 707 } else { | |
| 708 token = Token::DIV; | |
| 709 } | |
| 710 break; | |
| 711 | |
| 712 case '&': | |
| 713 // & && &= | |
| 714 Advance(); | |
| 715 if (c0_ == '&') { | |
| 716 token = Select(Token::AND); | |
| 717 } else if (c0_ == '=') { | |
| 718 token = Select(Token::ASSIGN_BIT_AND); | |
| 719 } else { | |
| 720 token = Token::BIT_AND; | |
| 721 } | |
| 722 break; | |
| 723 | |
| 724 case '|': | |
| 725 // | || |= | |
| 726 Advance(); | |
| 727 if (c0_ == '|') { | |
| 728 token = Select(Token::OR); | |
| 729 } else if (c0_ == '=') { | |
| 730 token = Select(Token::ASSIGN_BIT_OR); | |
| 731 } else { | |
| 732 token = Token::BIT_OR; | |
| 733 } | |
| 734 break; | |
| 735 | |
| 736 case '^': | |
| 737 // ^ ^= | |
| 738 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); | |
| 739 break; | |
| 740 | |
| 741 case '.': | |
| 742 // . Number | |
| 743 Advance(); | |
| 744 if (IsDecimalDigit(c0_)) { | |
| 745 token = ScanNumber(true); | |
| 746 } else { | |
| 747 token = Token::PERIOD; | |
| 748 } | |
| 749 break; | |
| 750 | |
| 751 case ':': | |
| 752 token = Select(Token::COLON); | |
| 753 break; | |
| 754 | |
| 755 case ';': | |
| 756 token = Select(Token::SEMICOLON); | |
| 757 break; | |
| 758 | |
| 759 case ',': | |
| 760 token = Select(Token::COMMA); | |
| 761 break; | |
| 762 | |
| 763 case '(': | |
| 764 token = Select(Token::LPAREN); | |
| 765 break; | |
| 766 | |
| 767 case ')': | |
| 768 token = Select(Token::RPAREN); | |
| 769 break; | |
| 770 | |
| 771 case '[': | |
| 772 token = Select(Token::LBRACK); | |
| 773 break; | |
| 774 | |
| 775 case ']': | |
| 776 token = Select(Token::RBRACK); | |
| 777 break; | |
| 778 | |
| 779 case '{': | |
| 780 token = Select(Token::LBRACE); | |
| 781 break; | |
| 782 | |
| 783 case '}': | |
| 784 token = Select(Token::RBRACE); | |
| 785 break; | |
| 786 | |
| 787 case '?': | |
| 788 token = Select(Token::CONDITIONAL); | |
| 789 break; | |
| 790 | |
| 791 case '~': | |
| 792 token = Select(Token::BIT_NOT); | |
| 793 break; | |
| 794 | |
| 795 default: | |
| 796 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { | |
| 797 token = ScanIdentifier(); | |
| 798 } else if (IsDecimalDigit(c0_)) { | |
| 799 token = ScanNumber(false); | |
| 800 } else if (SkipWhiteSpace()) { | |
| 801 token = Token::WHITESPACE; | |
| 802 } else if (c0_ < 0) { | |
| 803 token = Token::EOS; | |
| 804 } else { | |
| 805 token = Select(Token::ILLEGAL); | |
| 806 } | |
| 807 break; | |
| 808 } | |
| 809 | |
| 810 // Continue scanning for tokens as long as we're just skipping | |
| 811 // whitespace. | |
| 812 } while (token == Token::WHITESPACE); | |
| 813 | |
| 814 next_.location.end_pos = source_pos(); | |
| 815 next_.token = token; | |
| 816 } | |
| 817 | |
| 818 | |
| 819 void Scanner::SeekForward(int pos) { | |
| 820 source_->SeekForward(pos - 1); | |
| 821 Advance(); | |
| 822 // This function is only called to seek to the location | |
| 823 // of the end of a function (at the "}" token). It doesn't matter | |
| 824 // whether there was a line terminator in the part we skip. | |
| 825 has_line_terminator_before_next_ = false; | |
| 826 Scan(); | |
| 827 } | |
| 828 | |
| 829 | |
| 830 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | |
| 831 ASSERT(length <= 4); // prevent overflow | |
| 832 | |
| 833 uc32 digits[4]; | |
| 834 uc32 x = 0; | |
| 835 for (int i = 0; i < length; i++) { | |
| 836 digits[i] = c0_; | |
| 837 int d = HexValue(c0_); | |
| 838 if (d < 0) { | |
| 839 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | |
| 840 // should be illegal, but other JS VMs just return the | |
| 841 // non-escaped version of the original character. | |
| 842 | |
| 843 // Push back digits read, except the last one (in c0_). | |
| 844 for (int j = i-1; j >= 0; j--) { | |
| 845 PushBack(digits[j]); | |
| 846 } | |
| 847 // Notice: No handling of error - treat it as "\u"->"u". | |
| 848 return c; | |
| 849 } | |
| 850 x = x * 16 + d; | |
| 851 Advance(); | |
| 852 } | |
| 853 | |
| 854 return x; | |
| 855 } | |
| 856 | |
| 857 | |
| 858 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | |
| 859 // ECMA-262. Other JS VMs support them. | |
| 860 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | |
| 861 uc32 x = c - '0'; | |
| 862 for (int i = 0; i < length; i++) { | |
| 863 int d = c0_ - '0'; | |
| 864 if (d < 0 || d > 7) break; | |
| 865 int nx = x * 8 + d; | |
| 866 if (nx >= 256) break; | |
| 867 x = nx; | |
| 868 Advance(); | |
| 869 } | |
| 870 return x; | |
| 871 } | |
| 872 | |
| 873 | |
| 874 void Scanner::ScanEscape() { | |
| 875 uc32 c = c0_; | |
| 876 Advance(); | |
| 877 | |
| 878 // Skip escaped newlines. | |
| 879 if (ScannerConstants::kIsLineTerminator.get(c)) { | |
| 880 // Allow CR+LF newlines in multiline string literals. | |
| 881 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | |
| 882 // Allow LF+CR newlines in multiline string literals. | |
| 883 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | |
| 884 return; | |
| 885 } | |
| 886 | |
| 887 switch (c) { | |
| 888 case '\'': // fall through | |
| 889 case '"' : // fall through | |
| 890 case '\\': break; | |
| 891 case 'b' : c = '\b'; break; | |
| 892 case 'f' : c = '\f'; break; | |
| 893 case 'n' : c = '\n'; break; | |
| 894 case 'r' : c = '\r'; break; | |
| 895 case 't' : c = '\t'; break; | |
| 896 case 'u' : c = ScanHexEscape(c, 4); break; | |
| 897 case 'v' : c = '\v'; break; | |
| 898 case 'x' : c = ScanHexEscape(c, 2); break; | |
| 899 case '0' : // fall through | |
| 900 case '1' : // fall through | |
| 901 case '2' : // fall through | |
| 902 case '3' : // fall through | |
| 903 case '4' : // fall through | |
| 904 case '5' : // fall through | |
| 905 case '6' : // fall through | |
| 906 case '7' : c = ScanOctalEscape(c, 2); break; | |
| 907 } | |
| 908 | |
| 909 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | |
| 910 // should be illegal, but they are commonly handled | |
| 911 // as non-escaped characters by JS VMs. | |
| 912 AddLiteralChar(c); | |
| 913 } | |
| 914 | |
| 915 | |
| 916 Token::Value Scanner::ScanString() { | |
| 917 uc32 quote = c0_; | |
| 918 Advance(); // consume quote | |
| 919 | |
| 920 LiteralScope literal(this); | |
| 921 while (c0_ != quote && c0_ >= 0 | |
| 922 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | |
| 923 uc32 c = c0_; | |
| 924 Advance(); | |
| 925 if (c == '\\') { | |
| 926 if (c0_ < 0) return Token::ILLEGAL; | |
| 927 ScanEscape(); | |
| 928 } else { | |
| 929 AddLiteralChar(c); | |
| 930 } | |
| 931 } | |
| 932 if (c0_ != quote) return Token::ILLEGAL; | |
| 933 literal.Complete(); | |
| 934 | |
| 935 Advance(); // consume quote | |
| 936 return Token::STRING; | |
| 937 } | |
| 938 | |
| 939 | |
| 940 Token::Value Scanner::Select(Token::Value tok) { | |
| 941 Advance(); | |
| 942 return tok; | |
| 943 } | |
| 944 | |
| 945 | |
| 946 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { | |
| 947 Advance(); | |
| 948 if (c0_ == next) { | |
| 949 Advance(); | |
| 950 return then; | |
| 951 } else { | |
| 952 return else_; | |
| 953 } | |
| 954 } | |
| 955 | |
| 956 | |
| 957 // Returns true if any decimal digits were scanned, returns false otherwise. | |
| 958 void Scanner::ScanDecimalDigits() { | |
| 959 while (IsDecimalDigit(c0_)) | |
| 960 AddLiteralCharAdvance(); | |
| 961 } | |
| 962 | |
| 963 | |
| 964 Token::Value Scanner::ScanNumber(bool seen_period) { | |
| 965 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | |
| 966 | |
| 967 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | |
| 968 | |
| 969 LiteralScope literal(this); | |
| 970 if (seen_period) { | |
| 971 // we have already seen a decimal point of the float | |
| 972 AddLiteralChar('.'); | |
| 973 ScanDecimalDigits(); // we know we have at least one digit | |
| 974 | |
| 975 } else { | |
| 976 // if the first character is '0' we must check for octals and hex | |
| 977 if (c0_ == '0') { | |
| 978 AddLiteralCharAdvance(); | |
| 979 | |
| 980 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number | |
| 981 if (c0_ == 'x' || c0_ == 'X') { | |
| 982 // hex number | |
| 983 kind = HEX; | |
| 984 AddLiteralCharAdvance(); | |
| 985 if (!IsHexDigit(c0_)) { | |
| 986 // we must have at least one hex digit after 'x'/'X' | |
| 987 return Token::ILLEGAL; | |
| 988 } | |
| 989 while (IsHexDigit(c0_)) { | |
| 990 AddLiteralCharAdvance(); | |
| 991 } | |
| 992 } else if ('0' <= c0_ && c0_ <= '7') { | |
| 993 // (possible) octal number | |
| 994 kind = OCTAL; | |
| 995 while (true) { | |
| 996 if (c0_ == '8' || c0_ == '9') { | |
| 997 kind = DECIMAL; | |
| 998 break; | |
| 999 } | |
| 1000 if (c0_ < '0' || '7' < c0_) break; | |
| 1001 AddLiteralCharAdvance(); | |
| 1002 } | |
| 1003 } | |
| 1004 } | |
| 1005 | |
| 1006 // Parse decimal digits and allow trailing fractional part. | |
| 1007 if (kind == DECIMAL) { | |
| 1008 ScanDecimalDigits(); // optional | |
| 1009 if (c0_ == '.') { | |
| 1010 AddLiteralCharAdvance(); | |
| 1011 ScanDecimalDigits(); // optional | |
| 1012 } | |
| 1013 } | |
| 1014 } | |
| 1015 | |
| 1016 // scan exponent, if any | |
| 1017 if (c0_ == 'e' || c0_ == 'E') { | |
| 1018 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | |
| 1019 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed | |
| 1020 // scan exponent | |
| 1021 AddLiteralCharAdvance(); | |
| 1022 if (c0_ == '+' || c0_ == '-') | |
| 1023 AddLiteralCharAdvance(); | |
| 1024 if (!IsDecimalDigit(c0_)) { | |
| 1025 // we must have at least one decimal digit after 'e'/'E' | |
| 1026 return Token::ILLEGAL; | |
| 1027 } | |
| 1028 ScanDecimalDigits(); | |
| 1029 } | |
| 1030 | |
| 1031 // The source character immediately following a numeric literal must | |
| 1032 // not be an identifier start or a decimal digit; see ECMA-262 | |
| 1033 // section 7.8.3, page 17 (note that we read only one decimal digit | |
| 1034 // if the value is 0). | |
| 1035 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) | |
| 1036 return Token::ILLEGAL; | |
| 1037 | |
| 1038 literal.Complete(); | |
| 1039 | |
| 1040 return Token::NUMBER; | |
| 1041 } | |
| 1042 | |
| 1043 | |
| 1044 uc32 Scanner::ScanIdentifierUnicodeEscape() { | |
| 1045 Advance(); | |
| 1046 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | |
| 1047 Advance(); | |
| 1048 uc32 c = ScanHexEscape('u', 4); | |
| 1049 // We do not allow a unicode escape sequence to start another | |
| 1050 // unicode escape sequence. | |
| 1051 if (c == '\\') return unibrow::Utf8::kBadChar; | |
| 1052 return c; | |
| 1053 } | |
| 1054 | |
| 1055 | |
| 1056 Token::Value Scanner::ScanIdentifier() { | |
| 1057 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); | |
| 1058 | |
| 1059 LiteralScope literal(this); | |
| 1060 KeywordMatcher keyword_match; | |
| 1061 | |
| 1062 // Scan identifier start character. | |
| 1063 if (c0_ == '\\') { | |
| 1064 uc32 c = ScanIdentifierUnicodeEscape(); | |
| 1065 // Only allow legal identifier start characters. | |
| 1066 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; | |
| 1067 AddLiteralChar(c); | |
| 1068 keyword_match.Fail(); | |
| 1069 } else { | |
| 1070 AddLiteralChar(c0_); | |
| 1071 keyword_match.AddChar(c0_); | |
| 1072 Advance(); | |
| 1073 } | |
| 1074 | |
| 1075 // Scan the rest of the identifier characters. | |
| 1076 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | |
| 1077 if (c0_ == '\\') { | |
| 1078 uc32 c = ScanIdentifierUnicodeEscape(); | |
| 1079 // Only allow legal identifier part characters. | |
| 1080 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; | |
| 1081 AddLiteralChar(c); | |
| 1082 keyword_match.Fail(); | |
| 1083 } else { | |
| 1084 AddLiteralChar(c0_); | |
| 1085 keyword_match.AddChar(c0_); | |
| 1086 Advance(); | |
| 1087 } | |
| 1088 } | |
| 1089 literal.Complete(); | |
| 1090 | |
| 1091 return keyword_match.token(); | |
| 1092 } | |
| 1093 | |
| 1094 | |
| 1095 | 439 |
| 1096 bool Scanner::ScanRegExpPattern(bool seen_equal) { | |
| 1097 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | |
| 1098 bool in_character_class = false; | |
| 1099 | |
| 1100 // Previous token is either '/' or '/=', in the second case, the | |
| 1101 // pattern starts at =. | |
| 1102 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | |
| 1103 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | |
| 1104 | |
| 1105 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | |
| 1106 // the scanner should pass uninterpreted bodies to the RegExp | |
| 1107 // constructor. | |
| 1108 LiteralScope literal(this); | |
| 1109 if (seen_equal) | |
| 1110 AddLiteralChar('='); | |
| 1111 | |
| 1112 while (c0_ != '/' || in_character_class) { | |
| 1113 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | |
| 1114 if (c0_ == '\\') { // escaped character | |
| 1115 AddLiteralCharAdvance(); | |
| 1116 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | |
| 1117 AddLiteralCharAdvance(); | |
| 1118 } else { // unescaped character | |
| 1119 if (c0_ == '[') in_character_class = true; | |
| 1120 if (c0_ == ']') in_character_class = false; | |
| 1121 AddLiteralCharAdvance(); | |
| 1122 } | |
| 1123 } | |
| 1124 Advance(); // consume '/' | |
| 1125 | |
| 1126 literal.Complete(); | |
| 1127 | |
| 1128 return true; | |
| 1129 } | |
| 1130 | |
| 1131 bool Scanner::ScanRegExpFlags() { | |
| 1132 // Scan regular expression flags. | |
| 1133 LiteralScope literal(this); | |
| 1134 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | |
| 1135 if (c0_ == '\\') { | |
| 1136 uc32 c = ScanIdentifierUnicodeEscape(); | |
| 1137 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | |
| 1138 // We allow any escaped character, unlike the restriction on | |
| 1139 // IdentifierPart when it is used to build an IdentifierName. | |
| 1140 AddLiteralChar(c); | |
| 1141 continue; | |
| 1142 } | |
| 1143 } | |
| 1144 AddLiteralCharAdvance(); | |
| 1145 } | |
| 1146 literal.Complete(); | |
| 1147 | |
| 1148 next_.location.end_pos = source_pos() - 1; | |
| 1149 return true; | |
| 1150 } | |
| 1151 | |
| 1152 } } // namespace v8::internal | 440 } } // namespace v8::internal |
| OLD | NEW |