OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 23 matching lines...) Expand all Loading... |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 | 37 |
38 class UTF8Buffer { | 38 class UTF8Buffer { |
39 public: | 39 public: |
40 UTF8Buffer(); | 40 UTF8Buffer(); |
41 ~UTF8Buffer(); | 41 ~UTF8Buffer(); |
42 | 42 |
43 void AddChar(uc32 c) { | 43 void AddChar(uc32 c) { |
| 44 ASSERT_NOT_NULL(data_); |
44 if (cursor_ <= limit_ && | 45 if (cursor_ <= limit_ && |
45 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
46 *cursor_++ = static_cast<char>(c); | 47 *cursor_++ = static_cast<char>(c); |
47 } else { | 48 } else { |
48 AddCharSlow(c); | 49 AddCharSlow(c); |
49 } | 50 } |
50 } | 51 } |
51 | 52 |
52 void Reset() { cursor_ = data_; } | 53 void Reset() { |
53 int pos() const { return cursor_ - data_; } | 54 if (data_ == NULL) { |
| 55 data_ = NewArray<char>(kInitialCapacity); |
| 56 limit_ = ComputeLimit(data_, kInitialCapacity); |
| 57 } |
| 58 cursor_ = data_; |
| 59 } |
| 60 |
| 61 int pos() const { |
| 62 ASSERT_NOT_NULL(data_); |
| 63 return cursor_ - data_; |
| 64 } |
| 65 |
54 char* data() const { return data_; } | 66 char* data() const { return data_; } |
55 | 67 |
56 private: | 68 private: |
| 69 static const int kInitialCapacity = 256; |
57 char* data_; | 70 char* data_; |
58 char* cursor_; | 71 char* cursor_; |
59 char* limit_; | 72 char* limit_; |
60 | 73 |
61 int Capacity() const { | 74 int Capacity() const { |
| 75 ASSERT_NOT_NULL(data_); |
62 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; | 76 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; |
63 } | 77 } |
64 | 78 |
65 static char* ComputeLimit(char* data, int capacity) { | 79 static char* ComputeLimit(char* data, int capacity) { |
66 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | 80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; |
67 } | 81 } |
68 | 82 |
69 void AddCharSlow(uc32 c); | 83 void AddCharSlow(uc32 c); |
70 }; | 84 }; |
71 | 85 |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 | 285 |
272 // Returns the location information for the current token | 286 // Returns the location information for the current token |
273 // (the token returned by Next()). | 287 // (the token returned by Next()). |
274 Location location() const { return current_.location; } | 288 Location location() const { return current_.location; } |
275 Location peek_location() const { return next_.location; } | 289 Location peek_location() const { return next_.location; } |
276 | 290 |
277 // Returns the literal string, if any, for the current token (the | 291 // Returns the literal string, if any, for the current token (the |
278 // token returned by Next()). The string is 0-terminated and in | 292 // token returned by Next()). The string is 0-terminated and in |
279 // UTF-8 format; they may contain 0-characters. Literal strings are | 293 // UTF-8 format; they may contain 0-characters. Literal strings are |
280 // collected for identifiers, strings, and numbers. | 294 // collected for identifiers, strings, and numbers. |
| 295 // These functions only give the correct result if the literal |
| 296 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
281 const char* literal_string() const { | 297 const char* literal_string() const { |
282 return &literals_.data()[current_.literal_pos]; | 298 return current_.literal_buffer->data(); |
283 } | 299 } |
284 int literal_length() const { | 300 int literal_length() const { |
285 return current_.literal_end - current_.literal_pos; | 301 // Excluding terminal '\0' added by TerminateLiteral(). |
286 } | 302 return current_.literal_buffer->pos() - 1; |
287 | |
288 Vector<const char> next_literal() const { | |
289 return Vector<const char>(next_literal_string(), next_literal_length()); | |
290 } | 303 } |
291 | 304 |
292 // Returns the literal string for the next token (the token that | 305 // Returns the literal string for the next token (the token that |
293 // would be returned if Next() were called). | 306 // would be returned if Next() were called). |
294 const char* next_literal_string() const { | 307 const char* next_literal_string() const { |
295 return &literals_.data()[next_.literal_pos]; | 308 return next_.literal_buffer->data(); |
296 } | 309 } |
297 // Returns the length of the next token (that would be returned if | 310 // Returns the length of the next token (that would be returned if |
298 // Next() were called). | 311 // Next() were called). |
299 int next_literal_length() const { | 312 int next_literal_length() const { |
300 return next_.literal_end - next_.literal_pos; | 313 return next_.literal_buffer->pos() - 1; |
| 314 } |
| 315 |
| 316 Vector<const char> next_literal() const { |
| 317 return Vector<const char>(next_literal_string(), |
| 318 next_literal_length()); |
301 } | 319 } |
302 | 320 |
303 // Scans the input as a regular expression pattern, previous | 321 // Scans the input as a regular expression pattern, previous |
304 // character(s) must be /(=). Returns true if a pattern is scanned. | 322 // character(s) must be /(=). Returns true if a pattern is scanned. |
305 bool ScanRegExpPattern(bool seen_equal); | 323 bool ScanRegExpPattern(bool seen_equal); |
306 // Returns true if regexp flags are scanned (always since flags can | 324 // Returns true if regexp flags are scanned (always since flags can |
307 // be empty). | 325 // be empty). |
308 bool ScanRegExpFlags(); | 326 bool ScanRegExpFlags(); |
309 | 327 |
310 // Seek forward to the given position. This operation does not | 328 // Seek forward to the given position. This operation does not |
(...skipping 21 matching lines...) Expand all Loading... |
332 private: | 350 private: |
333 CharacterStreamUTF16Buffer char_stream_buffer_; | 351 CharacterStreamUTF16Buffer char_stream_buffer_; |
334 TwoByteStringUTF16Buffer two_byte_string_buffer_; | 352 TwoByteStringUTF16Buffer two_byte_string_buffer_; |
335 | 353 |
336 // Source. | 354 // Source. |
337 UTF16Buffer* source_; | 355 UTF16Buffer* source_; |
338 int position_; | 356 int position_; |
339 | 357 |
340 // Buffer to hold literal values (identifiers, strings, numbers) | 358 // Buffer to hold literal values (identifiers, strings, numbers) |
341 // using 0-terminated UTF-8 encoding. | 359 // using 0-terminated UTF-8 encoding. |
342 UTF8Buffer literals_; | 360 UTF8Buffer literal_buffer_1_; |
| 361 UTF8Buffer literal_buffer_2_; |
343 | 362 |
344 bool stack_overflow_; | 363 bool stack_overflow_; |
345 static StaticResource<Utf8Decoder> utf8_decoder_; | 364 static StaticResource<Utf8Decoder> utf8_decoder_; |
346 | 365 |
347 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 366 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
348 uc32 c0_; | 367 uc32 c0_; |
349 | 368 |
350 // The current and look-ahead token. | 369 // The current and look-ahead token. |
351 struct TokenDesc { | 370 struct TokenDesc { |
352 Token::Value token; | 371 Token::Value token; |
353 Location location; | 372 Location location; |
354 int literal_pos, literal_end; | 373 UTF8Buffer* literal_buffer; |
355 }; | 374 }; |
356 | 375 |
357 TokenDesc current_; // desc for current token (as returned by Next()) | 376 TokenDesc current_; // desc for current token (as returned by Next()) |
358 TokenDesc next_; // desc for next token (one token look-ahead) | 377 TokenDesc next_; // desc for next token (one token look-ahead) |
359 bool has_line_terminator_before_next_; | 378 bool has_line_terminator_before_next_; |
360 bool is_pre_parsing_; | 379 bool is_pre_parsing_; |
361 | 380 |
362 // Literal buffer support | 381 // Literal buffer support |
363 void StartLiteral(); | 382 void StartLiteral(); |
364 void AddChar(uc32 ch); | 383 void AddChar(uc32 ch); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
397 } | 416 } |
398 | 417 |
399 // Decodes a unicode escape-sequence which is part of an identifier. | 418 // Decodes a unicode escape-sequence which is part of an identifier. |
400 // If the escape sequence cannot be decoded the result is kBadRune. | 419 // If the escape sequence cannot be decoded the result is kBadRune. |
401 uc32 ScanIdentifierUnicodeEscape(); | 420 uc32 ScanIdentifierUnicodeEscape(); |
402 }; | 421 }; |
403 | 422 |
404 } } // namespace v8::internal | 423 } } // namespace v8::internal |
405 | 424 |
406 #endif // V8_SCANNER_H_ | 425 #endif // V8_SCANNER_H_ |
OLD | NEW |