| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 251 token_ = keyword_token; | 251 token_ = keyword_token; |
| 252 return true; | 252 return true; |
| 253 } | 253 } |
| 254 return false; | 254 return false; |
| 255 } | 255 } |
| 256 | 256 |
| 257 void Step(uc32 input); | 257 void Step(uc32 input); |
| 258 }; | 258 }; |
| 259 | 259 |
| 260 | 260 |
| 261 class ScannerCharacterClasses; |
| 262 |
| 263 |
| 261 enum ParserMode { PARSE, PREPARSE }; | 264 enum ParserMode { PARSE, PREPARSE }; |
| 262 enum ParserLanguage { JAVASCRIPT, JSON }; | 265 enum ParserLanguage { JAVASCRIPT, JSON }; |
| 263 | 266 |
| 264 | 267 |
| 265 class Scanner { | 268 class Scanner { |
| 266 public: | 269 public: |
| 267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; | 270 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 268 | 271 |
| 269 // Construction | 272 // Construction |
| 270 explicit Scanner(ParserMode parse_mode); | 273 explicit Scanner(ParserMode parse_mode); |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 340 bool ScanRegExpFlags(); | 343 bool ScanRegExpFlags(); |
| 341 | 344 |
| 342 // Seek forward to the given position. This operation does not | 345 // Seek forward to the given position. This operation does not |
| 343 // work in general, for instance when there are pushed back | 346 // work in general, for instance when there are pushed back |
| 344 // characters, but works for seeking forward until simple delimiter | 347 // characters, but works for seeking forward until simple delimiter |
| 345 // tokens, which is what it is used for. | 348 // tokens, which is what it is used for. |
| 346 void SeekForward(int pos); | 349 void SeekForward(int pos); |
| 347 | 350 |
| 348 bool stack_overflow() { return stack_overflow_; } | 351 bool stack_overflow() { return stack_overflow_; } |
| 349 | 352 |
| 350 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; } | |
| 351 | |
| 352 // Tells whether the buffer contains an identifier (no escapes). | |
| 353 // Used for checking if a property name is an identifier. | |
| 354 static bool IsIdentifier(unibrow::CharacterStream* buffer); | |
| 355 | |
| 356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; | |
| 357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; | |
| 358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; | |
| 359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; | |
| 360 | |
| 361 static const int kCharacterLookaheadBufferSize = 1; | 353 static const int kCharacterLookaheadBufferSize = 1; |
| 362 static const int kNoEndPosition = 1; | 354 static const int kNoEndPosition = 1; |
| 363 | 355 |
| 364 private: | 356 private: |
| 365 void Init(Handle<String> source, | 357 void Init(Handle<String> source, |
| 366 unibrow::CharacterStream* stream, | 358 unibrow::CharacterStream* stream, |
| 367 int start_position, int end_position, | 359 int start_position, int end_position, |
| 368 ParserLanguage language); | 360 ParserLanguage language); |
| 369 | 361 |
| 370 | 362 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 381 // Used to convert the source string into a character stream when a stream | 373 // Used to convert the source string into a character stream when a stream |
| 382 // is not passed to the scanner. | 374 // is not passed to the scanner. |
| 383 SafeStringInputBuffer safe_string_input_buffer_; | 375 SafeStringInputBuffer safe_string_input_buffer_; |
| 384 | 376 |
| 385 // Buffer to hold literal values (identifiers, strings, numbers) | 377 // Buffer to hold literal values (identifiers, strings, numbers) |
| 386 // using 0-terminated UTF-8 encoding. | 378 // using 0-terminated UTF-8 encoding. |
| 387 UTF8Buffer literal_buffer_1_; | 379 UTF8Buffer literal_buffer_1_; |
| 388 UTF8Buffer literal_buffer_2_; | 380 UTF8Buffer literal_buffer_2_; |
| 389 | 381 |
| 390 bool stack_overflow_; | 382 bool stack_overflow_; |
| 391 static StaticResource<Utf8Decoder> utf8_decoder_; | |
| 392 | 383 |
| 393 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 384 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 394 uc32 c0_; | 385 uc32 c0_; |
| 395 | 386 |
| 396 // The current and look-ahead token. | 387 // The current and look-ahead token. |
| 397 struct TokenDesc { | 388 struct TokenDesc { |
| 398 Token::Value token; | 389 Token::Value token; |
| 399 Location location; | 390 Location location; |
| 400 UTF8Buffer* literal_buffer; | 391 UTF8Buffer* literal_buffer; |
| 401 }; | 392 }; |
| 402 | 393 |
| 403 TokenDesc current_; // desc for current token (as returned by Next()) | 394 TokenDesc current_; // desc for current token (as returned by Next()) |
| 404 TokenDesc next_; // desc for next token (one token look-ahead) | 395 TokenDesc next_; // desc for next token (one token look-ahead) |
| 405 bool has_line_terminator_before_next_; | 396 bool has_line_terminator_before_next_; |
| 406 bool is_pre_parsing_; | 397 bool is_pre_parsing_; |
| 407 bool is_parsing_json_; | 398 bool is_parsing_json_; |
| 408 | 399 |
| 400 ScannerCharacterClasses* const character_classes_; |
| 401 |
| 409 // Literal buffer support | 402 // Literal buffer support |
| 410 void StartLiteral(); | 403 void StartLiteral(); |
| 411 void AddChar(uc32 ch); | 404 void AddChar(uc32 ch); |
| 412 void AddCharAdvance(); | 405 void AddCharAdvance(); |
| 413 void TerminateLiteral(); | 406 void TerminateLiteral(); |
| 414 | 407 |
| 415 // Low-level scanning support. | 408 // Low-level scanning support. |
| 416 void Advance() { c0_ = source_->Advance(); } | 409 void Advance() { c0_ = source_->Advance(); } |
| 417 void PushBack(uc32 ch) { | 410 void PushBack(uc32 ch) { |
| 418 source_->PushBack(ch); | 411 source_->PushBack(ch); |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 484 // Return the current source position. | 477 // Return the current source position. |
| 485 int source_pos() { | 478 int source_pos() { |
| 486 return source_->pos() - kCharacterLookaheadBufferSize; | 479 return source_->pos() - kCharacterLookaheadBufferSize; |
| 487 } | 480 } |
| 488 | 481 |
| 489 // Decodes a unicode escape-sequence which is part of an identifier. | 482 // Decodes a unicode escape-sequence which is part of an identifier. |
| 490 // If the escape sequence cannot be decoded the result is kBadRune. | 483 // If the escape sequence cannot be decoded the result is kBadRune. |
| 491 uc32 ScanIdentifierUnicodeEscape(); | 484 uc32 ScanIdentifierUnicodeEscape(); |
| 492 }; | 485 }; |
| 493 | 486 |
| 487 |
| 488 class ScannerCharacterClasses { |
| 489 public: |
| 490 StaticResource<Scanner::Utf8Decoder>* utf8_decoder() { |
| 491 return &utf8_decoder_; |
| 492 } |
| 493 |
| 494 // Tells whether the buffer contains an identifier (no escapes). |
| 495 // Used for checking if a property name is an identifier. |
| 496 bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 497 |
| 498 bool IsWhiteSpace(unibrow::uchar c) { return is_white_space_.get(c); } |
| 499 |
| 500 private: |
| 501 ScannerCharacterClasses() {} |
| 502 |
| 503 // -------------------------------------------------------------------------- |
| 504 // Character predicates |
| 505 |
| 506 unibrow::Predicate<IdentifierStart, 128> is_identifier_start_; |
| 507 unibrow::Predicate<IdentifierPart, 128> is_identifier_part_; |
| 508 unibrow::Predicate<unibrow::LineTerminator, 128> is_line_terminator_; |
| 509 unibrow::Predicate<unibrow::WhiteSpace, 128> is_white_space_; |
| 510 |
| 511 StaticResource<Scanner::Utf8Decoder> utf8_decoder_; |
| 512 |
| 513 friend class Isolate; |
| 514 friend class Scanner; |
| 515 |
| 516 DISALLOW_COPY_AND_ASSIGN(ScannerCharacterClasses); |
| 517 }; |
| 518 |
| 494 } } // namespace v8::internal | 519 } } // namespace v8::internal |
| 495 | 520 |
| 496 #endif // V8_SCANNER_H_ | 521 #endif // V8_SCANNER_H_ |
| OLD | NEW |