OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 23 matching lines...) Expand all Loading... |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 | 37 |
38 class UTF8Buffer { | 38 class UTF8Buffer { |
39 public: | 39 public: |
40 UTF8Buffer(); | 40 UTF8Buffer(); |
41 ~UTF8Buffer(); | 41 ~UTF8Buffer(); |
42 | 42 |
43 void AddChar(uc32 c) { | 43 void AddChar(uc32 c) { |
44 ASSERT_NOT_NULL(data_); | |
45 if (cursor_ <= limit_ && | 44 if (cursor_ <= limit_ && |
46 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | 45 static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
47 *cursor_++ = static_cast<char>(c); | 46 *cursor_++ = static_cast<char>(c); |
48 } else { | 47 } else { |
49 AddCharSlow(c); | 48 AddCharSlow(c); |
50 } | 49 } |
51 } | 50 } |
52 | 51 |
53 void Reset() { | 52 void Reset() { cursor_ = data_; } |
54 if (data_ == NULL) { | 53 int pos() const { return cursor_ - data_; } |
55 data_ = NewArray<char>(kInitialCapacity); | |
56 limit_ = ComputeLimit(data_, kInitialCapacity); | |
57 } | |
58 cursor_ = data_; | |
59 } | |
60 | |
61 int pos() const { | |
62 ASSERT_NOT_NULL(data_); | |
63 return cursor_ - data_; | |
64 } | |
65 | |
66 char* data() const { return data_; } | 54 char* data() const { return data_; } |
67 | 55 |
68 private: | 56 private: |
69 static const int kInitialCapacity = 256; | |
70 char* data_; | 57 char* data_; |
71 char* cursor_; | 58 char* cursor_; |
72 char* limit_; | 59 char* limit_; |
73 | 60 |
74 int Capacity() const { | 61 int Capacity() const { |
75 ASSERT_NOT_NULL(data_); | |
76 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; | 62 return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; |
77 } | 63 } |
78 | 64 |
79 static char* ComputeLimit(char* data, int capacity) { | 65 static char* ComputeLimit(char* data, int capacity) { |
80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; | 66 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; |
81 } | 67 } |
82 | 68 |
83 void AddCharSlow(uc32 c); | 69 void AddCharSlow(uc32 c); |
84 }; | 70 }; |
85 | 71 |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
285 | 271 |
286 // Returns the location information for the current token | 272 // Returns the location information for the current token |
287 // (the token returned by Next()). | 273 // (the token returned by Next()). |
288 Location location() const { return current_.location; } | 274 Location location() const { return current_.location; } |
289 Location peek_location() const { return next_.location; } | 275 Location peek_location() const { return next_.location; } |
290 | 276 |
291 // Returns the literal string, if any, for the current token (the | 277 // Returns the literal string, if any, for the current token (the |
292 // token returned by Next()). The string is 0-terminated and in | 278 // token returned by Next()). The string is 0-terminated and in |
293 // UTF-8 format; they may contain 0-characters. Literal strings are | 279 // UTF-8 format; they may contain 0-characters. Literal strings are |
294 // collected for identifiers, strings, and numbers. | 280 // collected for identifiers, strings, and numbers. |
295 // These functions only give the correct result if the literal | |
296 // was scanned between calls to StartLiteral() and TerminateLiteral(). | |
297 const char* literal_string() const { | 281 const char* literal_string() const { |
298 return current_.literal_buffer->data(); | 282 return &literals_.data()[current_.literal_pos]; |
299 } | 283 } |
300 int literal_length() const { | 284 int literal_length() const { |
301 // Excluding terminal '\0' added by TerminateLiteral(). | 285 return current_.literal_end - current_.literal_pos; |
302 return current_.literal_buffer->pos() - 1; | 286 } |
| 287 |
| 288 Vector<const char> next_literal() const { |
| 289 return Vector<const char>(next_literal_string(), next_literal_length()); |
303 } | 290 } |
304 | 291 |
305 // Returns the literal string for the next token (the token that | 292 // Returns the literal string for the next token (the token that |
306 // would be returned if Next() were called). | 293 // would be returned if Next() were called). |
307 const char* next_literal_string() const { | 294 const char* next_literal_string() const { |
308 return next_.literal_buffer->data(); | 295 return &literals_.data()[next_.literal_pos]; |
309 } | 296 } |
310 // Returns the length of the next token (that would be returned if | 297 // Returns the length of the next token (that would be returned if |
311 // Next() were called). | 298 // Next() were called). |
312 int next_literal_length() const { | 299 int next_literal_length() const { |
313 return next_.literal_buffer->pos() - 1; | 300 return next_.literal_end - next_.literal_pos; |
314 } | |
315 | |
316 Vector<const char> next_literal() const { | |
317 return Vector<const char>(next_literal_string(), | |
318 next_literal_length()); | |
319 } | 301 } |
320 | 302 |
321 // Scans the input as a regular expression pattern, previous | 303 // Scans the input as a regular expression pattern, previous |
322 // character(s) must be /(=). Returns true if a pattern is scanned. | 304 // character(s) must be /(=). Returns true if a pattern is scanned. |
323 bool ScanRegExpPattern(bool seen_equal); | 305 bool ScanRegExpPattern(bool seen_equal); |
324 // Returns true if regexp flags are scanned (always since flags can | 306 // Returns true if regexp flags are scanned (always since flags can |
325 // be empty). | 307 // be empty). |
326 bool ScanRegExpFlags(); | 308 bool ScanRegExpFlags(); |
327 | 309 |
328 // Seek forward to the given position. This operation does not | 310 // Seek forward to the given position. This operation does not |
(...skipping 21 matching lines...) Expand all Loading... |
350 private: | 332 private: |
351 CharacterStreamUTF16Buffer char_stream_buffer_; | 333 CharacterStreamUTF16Buffer char_stream_buffer_; |
352 TwoByteStringUTF16Buffer two_byte_string_buffer_; | 334 TwoByteStringUTF16Buffer two_byte_string_buffer_; |
353 | 335 |
354 // Source. | 336 // Source. |
355 UTF16Buffer* source_; | 337 UTF16Buffer* source_; |
356 int position_; | 338 int position_; |
357 | 339 |
358 // Buffer to hold literal values (identifiers, strings, numbers) | 340 // Buffer to hold literal values (identifiers, strings, numbers) |
359 // using 0-terminated UTF-8 encoding. | 341 // using 0-terminated UTF-8 encoding. |
360 UTF8Buffer literal_buffer_1_; | 342 UTF8Buffer literals_; |
361 UTF8Buffer literal_buffer_2_; | |
362 | 343 |
363 bool stack_overflow_; | 344 bool stack_overflow_; |
364 static StaticResource<Utf8Decoder> utf8_decoder_; | 345 static StaticResource<Utf8Decoder> utf8_decoder_; |
365 | 346 |
366 // One Unicode character look-ahead; c0_ < 0 at the end of the input. | 347 // One Unicode character look-ahead; c0_ < 0 at the end of the input. |
367 uc32 c0_; | 348 uc32 c0_; |
368 | 349 |
369 // The current and look-ahead token. | 350 // The current and look-ahead token. |
370 struct TokenDesc { | 351 struct TokenDesc { |
371 Token::Value token; | 352 Token::Value token; |
372 Location location; | 353 Location location; |
373 UTF8Buffer* literal_buffer; | 354 int literal_pos, literal_end; |
374 }; | 355 }; |
375 | 356 |
376 TokenDesc current_; // desc for current token (as returned by Next()) | 357 TokenDesc current_; // desc for current token (as returned by Next()) |
377 TokenDesc next_; // desc for next token (one token look-ahead) | 358 TokenDesc next_; // desc for next token (one token look-ahead) |
378 bool has_line_terminator_before_next_; | 359 bool has_line_terminator_before_next_; |
379 bool is_pre_parsing_; | 360 bool is_pre_parsing_; |
380 | 361 |
381 // Literal buffer support | 362 // Literal buffer support |
382 void StartLiteral(); | 363 void StartLiteral(); |
383 void AddChar(uc32 ch); | 364 void AddChar(uc32 ch); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
416 } | 397 } |
417 | 398 |
418 // Decodes a unicode escape-sequence which is part of an identifier. | 399 // Decodes a unicode escape-sequence which is part of an identifier. |
419 // If the escape sequence cannot be decoded the result is kBadRune. | 400 // If the escape sequence cannot be decoded the result is kBadRune. |
420 uc32 ScanIdentifierUnicodeEscape(); | 401 uc32 ScanIdentifierUnicodeEscape(); |
421 }; | 402 }; |
422 | 403 |
423 } } // namespace v8::internal | 404 } } // namespace v8::internal |
424 | 405 |
425 #endif // V8_SCANNER_H_ | 406 #endif // V8_SCANNER_H_ |
OLD | NEW |