OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
11 // with the distribution. | 11 // with the distribution. |
12 // * Neither the name of Google Inc. nor the names of its | 12 // * Neither the name of Google Inc. nor the names of its |
13 // contributors may be used to endorse or promote products derived | 13 // contributors may be used to endorse or promote products derived |
14 // from this software without specific prior written permission. | 14 // from this software without specific prior written permission. |
15 // | 15 // |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
| 28 // Features shared by parsing and pre-parsing scanners. |
| 29 |
28 #ifndef V8_SCANNER_H_ | 30 #ifndef V8_SCANNER_H_ |
29 #define V8_SCANNER_H_ | 31 #define V8_SCANNER_H_ |
30 | 32 |
31 #include "scanner-base.h" | 33 #include "allocation.h" |
| 34 #include "char-predicates.h" |
| 35 #include "checks.h" |
| 36 #include "globals.h" |
| 37 #include "token.h" |
| 38 #include "unicode-inl.h" |
| 39 #include "utils.h" |
32 | 40 |
33 namespace v8 { | 41 namespace v8 { |
34 namespace internal { | 42 namespace internal { |
35 | 43 |
36 // A buffered character stream based on a random access character | 44 // Returns the value (0 .. 15) of a hexadecimal character c. |
37 // source (ReadBlock can be called with pos_ pointing to any position, | 45 // If c is not a legal hexadecimal character, returns a value < 0. |
38 // even positions before the current). | 46 inline int HexValue(uc32 c) { |
39 class BufferedUC16CharacterStream: public UC16CharacterStream { | 47 c -= '0'; |
| 48 if (static_cast<unsigned>(c) <= 9) return c; |
| 49 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 50 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 51 return -1; |
| 52 } |
| 53 |
| 54 |
| 55 // --------------------------------------------------------------------- |
| 56 // Buffered stream of characters, using an internal UC16 buffer. |
| 57 |
| 58 class UC16CharacterStream { |
40 public: | 59 public: |
41 BufferedUC16CharacterStream(); | 60 UC16CharacterStream() : pos_(0) { } |
42 virtual ~BufferedUC16CharacterStream(); | 61 virtual ~UC16CharacterStream() { } |
43 | 62 |
44 virtual void PushBack(uc32 character); | 63 // Returns and advances past the next UC16 character in the input |
| 64 // stream. If there are no more characters, it returns a negative |
| 65 // value. |
| 66 inline uc32 Advance() { |
| 67 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { |
| 68 pos_++; |
| 69 return static_cast<uc32>(*(buffer_cursor_++)); |
| 70 } |
| 71 // Note: currently the following increment is necessary to avoid a |
| 72 // parser problem! The scanner treats the final kEndOfInput as |
| 73 // a character with a position, and does math relative to that |
| 74 // position. |
| 75 pos_++; |
| 76 |
| 77 return kEndOfInput; |
| 78 } |
| 79 |
| 80 // Return the current position in the character stream. |
| 81 // Starts at zero. |
| 82 inline unsigned pos() const { return pos_; } |
| 83 |
| 84 // Skips forward past the next character_count UC16 characters |
| 85 // in the input, or until the end of input if that comes sooner. |
| 86 // Returns the number of characters actually skipped. If less |
| 87 // than character_count, |
| 88 inline unsigned SeekForward(unsigned character_count) { |
| 89 unsigned buffered_chars = |
| 90 static_cast<unsigned>(buffer_end_ - buffer_cursor_); |
| 91 if (character_count <= buffered_chars) { |
| 92 buffer_cursor_ += character_count; |
| 93 pos_ += character_count; |
| 94 return character_count; |
| 95 } |
| 96 return SlowSeekForward(character_count); |
| 97 } |
| 98 |
| 99 // Pushes back the most recently read UC16 character (or negative |
| 100 // value if at end of input), i.e., the value returned by the most recent |
| 101 // call to Advance. |
| 102 // Must not be used right after calling SeekForward. |
| 103 virtual void PushBack(int32_t character) = 0; |
45 | 104 |
46 protected: | 105 protected: |
47 static const unsigned kBufferSize = 512; | 106 static const uc32 kEndOfInput = -1; |
48 static const unsigned kPushBackStepSize = 16; | 107 |
49 | 108 // Ensures that the buffer_cursor_ points to the character at |
50 virtual unsigned SlowSeekForward(unsigned delta); | 109 // position pos_ of the input, if possible. If the position |
51 virtual bool ReadBlock(); | 110 // is at or after the end of the input, return false. If there |
52 virtual void SlowPushBack(uc16 character); | 111 // are more characters available, return true. |
53 | 112 virtual bool ReadBlock() = 0; |
54 virtual unsigned BufferSeekForward(unsigned delta) = 0; | 113 virtual unsigned SlowSeekForward(unsigned character_count) = 0; |
55 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; | 114 |
56 | 115 const uc16* buffer_cursor_; |
57 const uc16* pushback_limit_; | 116 const uc16* buffer_end_; |
58 uc16 buffer_[kBufferSize]; | 117 unsigned pos_; |
59 }; | 118 }; |
60 | 119 |
61 | 120 |
62 // Generic string stream. | 121 class UnicodeCache { |
63 class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { | 122 // --------------------------------------------------------------------- |
| 123 // Caching predicates used by scanners. |
64 public: | 124 public: |
65 GenericStringUC16CharacterStream(Handle<String> data, | 125 UnicodeCache() {} |
66 unsigned start_position, | 126 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
67 unsigned end_position); | 127 |
68 virtual ~GenericStringUC16CharacterStream(); | 128 StaticResource<Utf8Decoder>* utf8_decoder() { |
| 129 return &utf8_decoder_; |
| 130 } |
| 131 |
| 132 bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } |
| 133 bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } |
| 134 bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } |
| 135 bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } |
| 136 |
| 137 private: |
| 138 |
| 139 unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; |
| 140 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; |
| 141 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; |
| 142 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; |
| 143 StaticResource<Utf8Decoder> utf8_decoder_; |
| 144 |
| 145 DISALLOW_COPY_AND_ASSIGN(UnicodeCache); |
| 146 }; |
| 147 |
| 148 |
| 149 // ---------------------------------------------------------------------------- |
| 150 // LiteralBuffer - Collector of chars of literals. |
| 151 |
| 152 class LiteralBuffer { |
| 153 public: |
| 154 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } |
| 155 |
| 156 ~LiteralBuffer() { |
| 157 if (backing_store_.length() > 0) { |
| 158 backing_store_.Dispose(); |
| 159 } |
| 160 } |
| 161 |
| 162 inline void AddChar(uc16 character) { |
| 163 if (position_ >= backing_store_.length()) ExpandBuffer(); |
| 164 if (is_ascii_) { |
| 165 if (character < kMaxAsciiCharCodeU) { |
| 166 backing_store_[position_] = static_cast<byte>(character); |
| 167 position_ += kASCIISize; |
| 168 return; |
| 169 } |
| 170 ConvertToUC16(); |
| 171 } |
| 172 *reinterpret_cast<uc16*>(&backing_store_[position_]) = character; |
| 173 position_ += kUC16Size; |
| 174 } |
| 175 |
| 176 bool is_ascii() { return is_ascii_; } |
| 177 |
| 178 Vector<const uc16> uc16_literal() { |
| 179 ASSERT(!is_ascii_); |
| 180 ASSERT((position_ & 0x1) == 0); |
| 181 return Vector<const uc16>( |
| 182 reinterpret_cast<const uc16*>(backing_store_.start()), |
| 183 position_ >> 1); |
| 184 } |
| 185 |
| 186 Vector<const char> ascii_literal() { |
| 187 ASSERT(is_ascii_); |
| 188 return Vector<const char>( |
| 189 reinterpret_cast<const char*>(backing_store_.start()), |
| 190 position_); |
| 191 } |
| 192 |
| 193 int length() { |
| 194 return is_ascii_ ? position_ : (position_ >> 1); |
| 195 } |
| 196 |
| 197 void Reset() { |
| 198 position_ = 0; |
| 199 is_ascii_ = true; |
| 200 } |
| 201 private: |
| 202 static const int kInitialCapacity = 16; |
| 203 static const int kGrowthFactory = 4; |
| 204 static const int kMinConversionSlack = 256; |
| 205 static const int kMaxGrowth = 1 * MB; |
| 206 inline int NewCapacity(int min_capacity) { |
| 207 int capacity = Max(min_capacity, backing_store_.length()); |
| 208 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); |
| 209 return new_capacity; |
| 210 } |
| 211 |
| 212 void ExpandBuffer() { |
| 213 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); |
| 214 memcpy(new_store.start(), backing_store_.start(), position_); |
| 215 backing_store_.Dispose(); |
| 216 backing_store_ = new_store; |
| 217 } |
| 218 |
| 219 void ConvertToUC16() { |
| 220 ASSERT(is_ascii_); |
| 221 Vector<byte> new_store; |
| 222 int new_content_size = position_ * kUC16Size; |
| 223 if (new_content_size >= backing_store_.length()) { |
| 224 // Ensure room for all currently read characters as UC16 as well |
| 225 // as the character about to be stored. |
| 226 new_store = Vector<byte>::New(NewCapacity(new_content_size)); |
| 227 } else { |
| 228 new_store = backing_store_; |
| 229 } |
| 230 char* src = reinterpret_cast<char*>(backing_store_.start()); |
| 231 uc16* dst = reinterpret_cast<uc16*>(new_store.start()); |
| 232 for (int i = position_ - 1; i >= 0; i--) { |
| 233 dst[i] = src[i]; |
| 234 } |
| 235 if (new_store.start() != backing_store_.start()) { |
| 236 backing_store_.Dispose(); |
| 237 backing_store_ = new_store; |
| 238 } |
| 239 position_ = new_content_size; |
| 240 is_ascii_ = false; |
| 241 } |
| 242 |
| 243 bool is_ascii_; |
| 244 int position_; |
| 245 Vector<byte> backing_store_; |
| 246 |
| 247 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); |
| 248 }; |
| 249 |
| 250 |
| 251 // ---------------------------------------------------------------------------- |
| 252 // Scanner base-class. |
| 253 |
| 254 // Generic functionality used by both JSON and JavaScript scanners. |
| 255 class Scanner { |
| 256 public: |
| 257 // -1 is outside of the range of any real source code. |
| 258 static const int kNoOctalLocation = -1; |
| 259 |
| 260 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; |
| 261 |
| 262 class LiteralScope { |
| 263 public: |
| 264 explicit LiteralScope(Scanner* self); |
| 265 ~LiteralScope(); |
| 266 void Complete(); |
| 267 |
| 268 private: |
| 269 Scanner* scanner_; |
| 270 bool complete_; |
| 271 }; |
| 272 |
| 273 explicit Scanner(UnicodeCache* scanner_contants); |
| 274 |
| 275 // Returns the current token again. |
| 276 Token::Value current_token() { return current_.token; } |
| 277 |
| 278 // One token look-ahead (past the token returned by Next()). |
| 279 Token::Value peek() const { return next_.token; } |
| 280 |
| 281 struct Location { |
| 282 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 283 Location() : beg_pos(0), end_pos(0) { } |
| 284 |
| 285 bool IsValid() const { |
| 286 return beg_pos >= 0 && end_pos >= beg_pos; |
| 287 } |
| 288 |
| 289 static Location invalid() { return Location(-1, -1); } |
| 290 |
| 291 int beg_pos; |
| 292 int end_pos; |
| 293 }; |
| 294 |
| 295 // Returns the location information for the current token |
| 296 // (the token returned by Next()). |
| 297 Location location() const { return current_.location; } |
| 298 Location peek_location() const { return next_.location; } |
| 299 |
| 300 // Returns the literal string, if any, for the current token (the |
| 301 // token returned by Next()). The string is 0-terminated and in |
| 302 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 303 // collected for identifiers, strings, and numbers. |
| 304 // These functions only give the correct result if the literal |
| 305 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 306 bool is_literal_ascii() { |
| 307 ASSERT_NOT_NULL(current_.literal_chars); |
| 308 return current_.literal_chars->is_ascii(); |
| 309 } |
| 310 Vector<const char> literal_ascii_string() { |
| 311 ASSERT_NOT_NULL(current_.literal_chars); |
| 312 return current_.literal_chars->ascii_literal(); |
| 313 } |
| 314 Vector<const uc16> literal_uc16_string() { |
| 315 ASSERT_NOT_NULL(current_.literal_chars); |
| 316 return current_.literal_chars->uc16_literal(); |
| 317 } |
| 318 int literal_length() const { |
| 319 ASSERT_NOT_NULL(current_.literal_chars); |
| 320 return current_.literal_chars->length(); |
| 321 } |
| 322 |
| 323 bool literal_contains_escapes() const { |
| 324 Location location = current_.location; |
| 325 int source_length = (location.end_pos - location.beg_pos); |
| 326 if (current_.token == Token::STRING) { |
| 327 // Subtract delimiters. |
| 328 source_length -= 2; |
| 329 } |
| 330 return current_.literal_chars->length() != source_length; |
| 331 } |
| 332 |
| 333 // Returns the literal string for the next token (the token that |
| 334 // would be returned if Next() were called). |
| 335 bool is_next_literal_ascii() { |
| 336 ASSERT_NOT_NULL(next_.literal_chars); |
| 337 return next_.literal_chars->is_ascii(); |
| 338 } |
| 339 Vector<const char> next_literal_ascii_string() { |
| 340 ASSERT_NOT_NULL(next_.literal_chars); |
| 341 return next_.literal_chars->ascii_literal(); |
| 342 } |
| 343 Vector<const uc16> next_literal_uc16_string() { |
| 344 ASSERT_NOT_NULL(next_.literal_chars); |
| 345 return next_.literal_chars->uc16_literal(); |
| 346 } |
| 347 int next_literal_length() const { |
| 348 ASSERT_NOT_NULL(next_.literal_chars); |
| 349 return next_.literal_chars->length(); |
| 350 } |
| 351 |
| 352 UnicodeCache* unicode_cache() { return unicode_cache_; } |
| 353 |
| 354 static const int kCharacterLookaheadBufferSize = 1; |
69 | 355 |
70 protected: | 356 protected: |
71 virtual unsigned BufferSeekForward(unsigned delta); | 357 // The current and look-ahead token. |
72 virtual unsigned FillBuffer(unsigned position, unsigned length); | 358 struct TokenDesc { |
73 | 359 Token::Value token; |
74 Handle<String> string_; | 360 Location location; |
75 unsigned start_position_; | 361 LiteralBuffer* literal_chars; |
76 unsigned length_; | 362 }; |
| 363 |
| 364 // Call this after setting source_ to the input. |
| 365 void Init() { |
| 366 // Set c0_ (one character ahead) |
| 367 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 368 Advance(); |
| 369 // Initialize current_ to not refer to a literal. |
| 370 current_.literal_chars = NULL; |
| 371 } |
| 372 |
| 373 // Literal buffer support |
| 374 inline void StartLiteral() { |
| 375 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 376 &literal_buffer2_ : &literal_buffer1_; |
| 377 free_buffer->Reset(); |
| 378 next_.literal_chars = free_buffer; |
| 379 } |
| 380 |
| 381 inline void AddLiteralChar(uc32 c) { |
| 382 ASSERT_NOT_NULL(next_.literal_chars); |
| 383 next_.literal_chars->AddChar(c); |
| 384 } |
| 385 |
| 386 // Complete scanning of a literal. |
| 387 inline void TerminateLiteral() { |
| 388 // Does nothing in the current implementation. |
| 389 } |
| 390 |
| 391 // Stops scanning of a literal and drop the collected characters, |
| 392 // e.g., due to an encountered error. |
| 393 inline void DropLiteral() { |
| 394 next_.literal_chars = NULL; |
| 395 } |
| 396 |
| 397 inline void AddLiteralCharAdvance() { |
| 398 AddLiteralChar(c0_); |
| 399 Advance(); |
| 400 } |
| 401 |
| 402 // Low-level scanning support. |
| 403 void Advance() { c0_ = source_->Advance(); } |
| 404 void PushBack(uc32 ch) { |
| 405 source_->PushBack(c0_); |
| 406 c0_ = ch; |
| 407 } |
| 408 |
| 409 inline Token::Value Select(Token::Value tok) { |
| 410 Advance(); |
| 411 return tok; |
| 412 } |
| 413 |
| 414 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 415 Advance(); |
| 416 if (c0_ == next) { |
| 417 Advance(); |
| 418 return then; |
| 419 } else { |
| 420 return else_; |
| 421 } |
| 422 } |
| 423 |
| 424 uc32 ScanHexNumber(int expected_length); |
| 425 |
| 426 // Return the current source position. |
| 427 int source_pos() { |
| 428 return source_->pos() - kCharacterLookaheadBufferSize; |
| 429 } |
| 430 |
| 431 UnicodeCache* unicode_cache_; |
| 432 |
| 433 // Buffers collecting literal strings, numbers, etc. |
| 434 LiteralBuffer literal_buffer1_; |
| 435 LiteralBuffer literal_buffer2_; |
| 436 |
| 437 TokenDesc current_; // desc for current token (as returned by Next()) |
| 438 TokenDesc next_; // desc for next token (one token look-ahead) |
| 439 |
| 440 // Input stream. Must be initialized to an UC16CharacterStream. |
| 441 UC16CharacterStream* source_; |
| 442 |
| 443 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 444 uc32 c0_; |
77 }; | 445 }; |
78 | 446 |
79 | 447 // ---------------------------------------------------------------------------- |
80 // UC16 stream based on a literal UTF-8 string. | 448 // JavaScriptScanner - base logic for JavaScript scanning. |
81 class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { | 449 |
| 450 class JavaScriptScanner : public Scanner { |
82 public: | 451 public: |
83 Utf8ToUC16CharacterStream(const byte* data, unsigned length); | 452 // A LiteralScope that disables recording of some types of JavaScript |
84 virtual ~Utf8ToUC16CharacterStream(); | 453 // literals. If the scanner is configured to not record the specific |
| 454 // type of literal, the scope will not call StartLiteral. |
| 455 class LiteralScope { |
| 456 public: |
| 457 explicit LiteralScope(JavaScriptScanner* self) |
| 458 : scanner_(self), complete_(false) { |
| 459 scanner_->StartLiteral(); |
| 460 } |
| 461 ~LiteralScope() { |
| 462 if (!complete_) scanner_->DropLiteral(); |
| 463 } |
| 464 void Complete() { |
| 465 scanner_->TerminateLiteral(); |
| 466 complete_ = true; |
| 467 } |
| 468 |
| 469 private: |
| 470 JavaScriptScanner* scanner_; |
| 471 bool complete_; |
| 472 }; |
| 473 |
| 474 explicit JavaScriptScanner(UnicodeCache* scanner_contants); |
| 475 |
| 476 void Initialize(UC16CharacterStream* source); |
| 477 |
| 478 // Returns the next token. |
| 479 Token::Value Next(); |
| 480 |
| 481 // Returns true if there was a line terminator before the peek'ed token, |
| 482 // possibly inside a multi-line comment. |
| 483 bool HasAnyLineTerminatorBeforeNext() const { |
| 484 return has_line_terminator_before_next_ || |
| 485 has_multiline_comment_before_next_; |
| 486 } |
| 487 |
| 488 // Scans the input as a regular expression pattern, previous |
| 489 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 490 bool ScanRegExpPattern(bool seen_equal); |
| 491 // Returns true if regexp flags are scanned (always since flags can |
| 492 // be empty). |
| 493 bool ScanRegExpFlags(); |
| 494 |
| 495 // Tells whether the buffer contains an identifier (no escapes). |
| 496 // Used for checking if a property name is an identifier. |
| 497 static bool IsIdentifier(unibrow::CharacterStream* buffer); |
| 498 |
| 499 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 500 uc32 ScanOctalEscape(uc32 c, int length); |
| 501 |
| 502 // Returns the location of the last seen octal literal |
| 503 Location octal_position() const { return octal_pos_; } |
| 504 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 505 |
| 506 // Seek forward to the given position. This operation does not |
| 507 // work in general, for instance when there are pushed back |
| 508 // characters, but works for seeking forward until simple delimiter |
| 509 // tokens, which is what it is used for. |
| 510 void SeekForward(int pos); |
| 511 |
| 512 bool HarmonyBlockScoping() const { |
| 513 return harmony_block_scoping_; |
| 514 } |
| 515 void SetHarmonyBlockScoping(bool block_scoping) { |
| 516 harmony_block_scoping_ = block_scoping; |
| 517 } |
| 518 |
85 | 519 |
86 protected: | 520 protected: |
87 virtual unsigned BufferSeekForward(unsigned delta); | 521 bool SkipWhiteSpace(); |
88 virtual unsigned FillBuffer(unsigned char_position, unsigned length); | 522 Token::Value SkipSingleLineComment(); |
89 void SetRawPosition(unsigned char_position); | 523 Token::Value SkipMultiLineComment(); |
90 | 524 |
91 const byte* raw_data_; | 525 // Scans a single JavaScript token. |
92 unsigned raw_data_length_; // Measured in bytes, not characters. | 526 void Scan(); |
93 unsigned raw_data_pos_; | 527 |
94 // The character position of the character at raw_data[raw_data_pos_]. | 528 void ScanDecimalDigits(); |
95 // Not necessarily the same as pos_. | 529 Token::Value ScanNumber(bool seen_period); |
96 unsigned raw_character_position_; | 530 Token::Value ScanIdentifierOrKeyword(); |
| 531 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 532 |
| 533 void ScanEscape(); |
| 534 Token::Value ScanString(); |
| 535 |
| 536 // Scans a possible HTML comment -- begins with '<!'. |
| 537 Token::Value ScanHtmlComment(); |
| 538 |
| 539 // Decodes a unicode escape-sequence which is part of an identifier. |
| 540 // If the escape sequence cannot be decoded the result is kBadChar. |
| 541 uc32 ScanIdentifierUnicodeEscape(); |
| 542 // Recognizes a uniocde escape-sequence and adds its characters, |
| 543 // uninterpreted, to the current literal. Used for parsing RegExp |
| 544 // flags. |
| 545 bool ScanLiteralUnicodeEscape(); |
| 546 |
| 547 // Start position of the octal literal last scanned. |
| 548 Location octal_pos_; |
| 549 |
| 550 // Whether there is a line terminator whitespace character after |
| 551 // the current token, and before the next. Does not count newlines |
| 552 // inside multiline comments. |
| 553 bool has_line_terminator_before_next_; |
| 554 // Whether there is a multi-line comment that contains a |
| 555 // line-terminator after the current token, and before the next. |
| 556 bool has_multiline_comment_before_next_; |
| 557 // Whether we scan 'let' as a keyword for harmony block scoped |
| 558 // let bindings. |
| 559 bool harmony_block_scoping_; |
97 }; | 560 }; |
98 | 561 |
99 | |
100 // UTF16 buffer to read characters from an external string. | |
101 class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { | |
102 public: | |
103 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, | |
104 int start_position, | |
105 int end_position); | |
106 virtual ~ExternalTwoByteStringUC16CharacterStream(); | |
107 | |
108 virtual void PushBack(uc32 character) { | |
109 ASSERT(buffer_cursor_ > raw_data_); | |
110 buffer_cursor_--; | |
111 pos_--; | |
112 } | |
113 | |
114 protected: | |
115 virtual unsigned SlowSeekForward(unsigned delta) { | |
116 // Fast case always handles seeking. | |
117 return 0; | |
118 } | |
119 virtual bool ReadBlock() { | |
120 // Entire string is read at start. | |
121 return false; | |
122 } | |
123 Handle<ExternalTwoByteString> source_; | |
124 const uc16* raw_data_; // Pointer to the actual array of characters. | |
125 }; | |
126 | |
127 } } // namespace v8::internal | 562 } } // namespace v8::internal |
128 | 563 |
129 #endif // V8_SCANNER_H_ | 564 #endif // V8_SCANNER_H_ |
OLD | NEW |