| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 namespace v8 { | 34 namespace v8 { |
| 35 namespace internal { | 35 namespace internal { |
| 36 | 36 |
| 37 | 37 |
| 38 class UTF8Buffer { | 38 class UTF8Buffer { |
| 39 public: | 39 public: |
| 40 UTF8Buffer(); | 40 UTF8Buffer(); |
| 41 ~UTF8Buffer(); | 41 ~UTF8Buffer(); |
| 42 | 42 |
| 43 void AddChar(uc32 c) { | 43 void AddChar(uc32 c) { |
| 44 ASSERT_NOT_NULL(data_); | |
| 45 if (cursor_ <= limit_ && | 44 if (cursor_ <= limit_ && |
| 46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 47 *cursor_++ = static_cast<char>(c); | 46 *cursor_++ = static_cast<char>(c); |
| 48 } else { | 47 } else { |
| 49 AddCharSlow(c); | 48 AddCharSlow(c); |
| 50 } | 49 } |
| 51 } | 50 } |
| 52 | 51 |
| 53 void Reset() { | 52 void Reset() { cursor_ = data_; } |
| 54 if (data_ == NULL) { | 53 int pos() const { return cursor_ - data_; } |
| 55 data_ = NewArray<char>(kInitialCapacity); | |
| 56 limit_ = ComputeLimit(data_, kInitialCapacity); | |
| 57 } | |
| 58 cursor_ = data_; | |
| 59 } | |
| 60 | |
| 61 int pos() const { | |
| 62 ASSERT_NOT_NULL(data_); | |
| 63 return cursor_ - data_; | |
| 64 } | |
| 65 | |
| 66 char* data() const { return data_; } | 54 char* data() const { return data_; } |
| 67 | 55 |
| 68 private: | 56 private: |
| 69 static const int kInitialCapacity = 256; | |
| 70 char* data_; | 57 char* data_; |
| 71 char* cursor_; | 58 char* cursor_; |
| 72 char* limit_; | 59 char* limit_; |
| 73 | 60 |
| 74 int Capacity() const { | 61 int Capacity() const { |
| 75 ASSERT_NOT_NULL(data_); | |
| 76 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; | 62 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; |
| 77 } | 63 } |
| 78 | 64 |
| 79 static char* ComputeLimit(char* data, int capacity) { | 65 static char* ComputeLimit(char* data, int capacity) { |
| 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | 66 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; |
| 81 } | 67 } |
| 82 | 68 |
| 83 void AddCharSlow(uc32 c); | 69 void AddCharSlow(uc32 c); |
| 84 }; | 70 }; |
| 85 | 71 |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 285 | 271 |
| 286 // Returns the location information for the current token | 272 // Returns the location information for the current token |
| 287 // (the token returned by Next()). | 273 // (the token returned by Next()). |
| 288 Location location() const { return current_.location; } | 274 Location location() const { return current_.location; } |
| 289 Location peek_location() const { return next_.location; } | 275 Location peek_location() const { return next_.location; } |
| 290 | 276 |
| 291 // Returns the literal string, if any, for the current token (the | 277 // Returns the literal string, if any, for the current token (the |
| 292 // token returned by Next()). The string is 0-terminated and in | 278 // token returned by Next()). The string is 0-terminated and in |
| 293 // UTF-8 format; they may contain 0-characters. Literal strings are | 279 // UTF-8 format; they may contain 0-characters. Literal strings are |
| 294 // collected for identifiers, strings, and numbers. | 280 // collected for identifiers, strings, and numbers. |
| 295 // These functions only give the correct result if the literal | |
| 296 // was scanned between calls to StartLiteral() and TerminateLiteral(). | |
| 297 const char* literal_string() const { | 281 const char* literal_string() const { |
| 298 return current_.literal_buffer->data(); | 282 return &literals_.data()[current_.literal_pos]; |
| 299 } | 283 } |
| 300 int literal_length() const { | 284 int literal_length() const { |
| 301 // Excluding terminal '\0' added by TerminateLiteral(). | 285 return current_.literal_end - current_.literal_pos; |
| 302 return current_.literal_buffer->pos() - 1; | 286 } |
| 287 |
| 288 Vector<const char> next_literal() const { |
| 289 return Vector<const char>(next_literal_string(), next_literal_length()); |
| 303 } | 290 } |
| 304 | 291 |
| 305 // Returns the literal string for the next token (the token that | 292 // Returns the literal string for the next token (the token that |
| 306 // would be returned if Next() were called). | 293 // would be returned if Next() were called). |
| 307 const char* next_literal_string() const { | 294 const char* next_literal_string() const { |
| 308 return next_.literal_buffer->data(); | 295 return &literals_.data()[next_.literal_pos]; |
| 309 } | 296 } |
| 310 // Returns the length of the next token (that would be returned if | 297 // Returns the length of the next token (that would be returned if |
| 311 // Next() were called). | 298 // Next() were called). |
| 312 int next_literal_length() const { | 299 int next_literal_length() const { |
| 313 return next_.literal_buffer->pos() - 1; | 300 return next_.literal_end - next_.literal_pos; |
| 314 } | |
| 315 | |
| 316 Vector<const char> next_literal() const { | |
| 317 return Vector<const char>(next_literal_string(), | |
| 318 next_literal_length()); | |
| 319 } | 301 } |
| 320 | 302 |
| 321 // Scans the input as a regular expression pattern, previous | 303 // Scans the input as a regular expression pattern, previous |
| 322 // character(s) must be /(=). Returns true if a pattern is scanned. | 304 // character(s) must be /(=). Returns true if a pattern is scanned. |
| 323 bool ScanRegExpPattern(bool seen_equal); | 305 bool ScanRegExpPattern(bool seen_equal); |
| 324 // Returns true if regexp flags are scanned (always since flags can | 306 // Returns true if regexp flags are scanned (always since flags can |
| 325 // be empty). | 307 // be empty). |
| 326 bool ScanRegExpFlags(); | 308 bool ScanRegExpFlags(); |
| 327 | 309 |
| 328 // Seek forward to the given position. This operation does not | 310 // Seek forward to the given position. This operation does not |
| (...skipping 21 matching lines...) Expand all Loading... |
| 350 private: | 332 private: |
| 351 CharacterStreamUTF16Buffer char_stream_buffer_; | 333 CharacterStreamUTF16Buffer char_stream_buffer_; |
| 352 TwoByteStringUTF16Buffer two_byte_string_buffer_; | 334 TwoByteStringUTF16Buffer two_byte_string_buffer_; |
| 353 | 335 |
| 354 // Source. | 336 // Source. |
| 355 UTF16Buffer* source_; | 337 UTF16Buffer* source_; |
| 356 int position_; | 338 int position_; |
| 357 | 339 |
| 358 // Buffer to hold literal values (identifiers, strings, numbers) | 340 // Buffer to hold literal values (identifiers, strings, numbers) |
| 359 // using 0-terminated UTF-8 encoding. | 341 // using 0-terminated UTF-8 encoding. |
| 360 UTF8Buffer literal_buffer_1_; | 342 UTF8Buffer literals_; |
| 361 UTF8Buffer literal_buffer_2_; | |
| 362 | 343 |
| 363 bool stack_overflow_; | 344 bool stack_overflow_; |
| 364 static StaticResource<Utf8Decoder> utf8_decoder_; | 345 static StaticResource<Utf8Decoder> utf8_decoder_; |
| 365 | 346 |
| 366 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 347 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
| 367 uc32 c0_; | 348 uc32 c0_; |
| 368 | 349 |
| 369 // The current and look-ahead token. | 350 // The current and look-ahead token. |
| 370 struct TokenDesc { | 351 struct TokenDesc { |
| 371 Token::Value token; | 352 Token::Value token; |
| 372 Location location; | 353 Location location; |
| 373 UTF8Buffer* literal_buffer; | 354 int literal_pos, literal_end; |
| 374 }; | 355 }; |
| 375 | 356 |
| 376 TokenDesc current_; // desc for current token (as returned by Next()) | 357 TokenDesc current_; // desc for current token (as returned by Next()) |
| 377 TokenDesc next_; // desc for next token (one token look-ahead) | 358 TokenDesc next_; // desc for next token (one token look-ahead) |
| 378 bool has_line_terminator_before_next_; | 359 bool has_line_terminator_before_next_; |
| 379 bool is_pre_parsing_; | 360 bool is_pre_parsing_; |
| 380 | 361 |
| 381 // Literal buffer support | 362 // Literal buffer support |
| 382 void StartLiteral(); | 363 void StartLiteral(); |
| 383 void AddChar(uc32 ch); | 364 void AddChar(uc32 ch); |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 416 } | 397 } |
| 417 | 398 |
| 418 // Decodes a unicode escape-sequence which is part of an identifier. | 399 // Decodes a unicode escape-sequence which is part of an identifier. |
| 419 // If the escape sequence cannot be decoded the result is kBadRune. | 400 // If the escape sequence cannot be decoded the result is kBadRune. |
| 420 uc32 ScanIdentifierUnicodeEscape(); | 401 uc32 ScanIdentifierUnicodeEscape(); |
| 421 }; | 402 }; |
| 422 | 403 |
| 423 } } // namespace v8::internal | 404 } } // namespace v8::internal |
| 424 | 405 |
| 425 #endif // V8_SCANNER_H_ | 406 #endif // V8_SCANNER_H_ |
| OLD | NEW |