Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: src/scanner.h

Issue 5188006: Push version 2.5.7 to trunk.... (Closed) Base URL: http://v8.googlecode.com/svn/trunk/
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/regexp.js ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 24 matching lines...) Expand all
35 namespace v8 { 35 namespace v8 {
36 namespace internal { 36 namespace internal {
37 37
38 38
39 class UTF8Buffer { 39 class UTF8Buffer {
40 public: 40 public:
41 UTF8Buffer(); 41 UTF8Buffer();
42 ~UTF8Buffer(); 42 ~UTF8Buffer();
43 43
44 inline void AddChar(uc32 c) { 44 inline void AddChar(uc32 c) {
45 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { 45 if (recording_) {
46 buffer_.Add(static_cast<char>(c)); 46 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
47 } else { 47 buffer_.Add(static_cast<char>(c));
48 AddCharSlow(c); 48 } else {
49 AddCharSlow(c);
50 }
49 } 51 }
50 } 52 }
51 53
52 void StartLiteral() { 54 void StartLiteral() {
53 buffer_.StartSequence(); 55 buffer_.StartSequence();
56 recording_ = true;
54 } 57 }
55 58
56 Vector<const char> EndLiteral() { 59 Vector<const char> EndLiteral() {
57 buffer_.Add(kEndMarker); 60 if (recording_) {
58 Vector<char> sequence = buffer_.EndSequence(); 61 recording_ = false;
59 return Vector<const char>(sequence.start(), sequence.length()); 62 buffer_.Add(kEndMarker);
63 Vector<char> sequence = buffer_.EndSequence();
64 return Vector<const char>(sequence.start(), sequence.length());
65 }
66 return Vector<const char>();
60 } 67 }
61 68
62 void DropLiteral() { 69 void DropLiteral() {
63 buffer_.DropSequence(); 70 if (recording_) {
71 recording_ = false;
72 buffer_.DropSequence();
73 }
64 } 74 }
65 75
66 void Reset() { 76 void Reset() {
67 buffer_.Reset(); 77 buffer_.Reset();
68 } 78 }
69 79
70 // The end marker added after a parsed literal. 80 // The end marker added after a parsed literal.
71 // Using zero allows the usage of strlen and similar functions on 81 // Using zero allows the usage of strlen and similar functions on
72 // identifiers and numbers (but not strings, since they may contain zero 82 // identifiers and numbers (but not strings, since they may contain zero
73 // bytes). 83 // bytes).
74 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside 84 // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside
75 // an utf-8 string. This requires changes in all places that uses 85 // an utf-8 string. This requires changes in all places that uses
76 // str-functions on the literals, but allows a single pointer to represent 86 // str-functions on the literals, but allows a single pointer to represent
77 // the literal, even if it contains embedded zeros. 87 // the literal, even if it contains embedded zeros.
78 static const char kEndMarker = '\x00'; 88 static const char kEndMarker = '\x00';
79 private: 89 private:
80 static const int kInitialCapacity = 256; 90 static const int kInitialCapacity = 256;
81 SequenceCollector<char, 4> buffer_; 91 SequenceCollector<char, 4> buffer_;
82 92 bool recording_;
83 void AddCharSlow(uc32 c); 93 void AddCharSlow(uc32 c);
84 }; 94 };
85 95
86 96
87 // Interface through which the scanner reads characters from the input source.
88 class UTF16Buffer {
89 public:
90 UTF16Buffer();
91 virtual ~UTF16Buffer() {}
92
93 virtual void PushBack(uc32 ch) = 0;
94 // Returns a value < 0 when the buffer end is reached.
95 virtual uc32 Advance() = 0;
96 virtual void SeekForward(int pos) = 0;
97
98 int pos() const { return pos_; }
99
100 protected:
101 int pos_; // Current position in the buffer.
102 int end_; // Position where scanning should stop (EOF).
103 };
104
105
106 // UTF16 buffer to read characters from a character stream. 97 // UTF16 buffer to read characters from a character stream.
107 class CharacterStreamUTF16Buffer: public UTF16Buffer { 98 class CharacterStreamUTF16Buffer: public UTF16Buffer {
108 public: 99 public:
109 CharacterStreamUTF16Buffer(); 100 CharacterStreamUTF16Buffer();
110 virtual ~CharacterStreamUTF16Buffer() {} 101 virtual ~CharacterStreamUTF16Buffer() {}
111 void Initialize(Handle<String> data, 102 void Initialize(Handle<String> data,
112 unibrow::CharacterStream* stream, 103 unibrow::CharacterStream* stream,
113 int start_position, 104 int start_position,
114 int end_position); 105 int end_position);
115 virtual void PushBack(uc32 ch); 106 virtual void PushBack(uc32 ch);
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 bool ScanRegExpFlags(); 235 bool ScanRegExpFlags();
245 236
246 // Seek forward to the given position. This operation does not 237 // Seek forward to the given position. This operation does not
247 // work in general, for instance when there are pushed back 238 // work in general, for instance when there are pushed back
248 // characters, but works for seeking forward until simple delimiter 239 // characters, but works for seeking forward until simple delimiter
249 // tokens, which is what it is used for. 240 // tokens, which is what it is used for.
250 void SeekForward(int pos); 241 void SeekForward(int pos);
251 242
252 bool stack_overflow() { return stack_overflow_; } 243 bool stack_overflow() { return stack_overflow_; }
253 244
254 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
255
256 // Tells whether the buffer contains an identifier (no escapes). 245 // Tells whether the buffer contains an identifier (no escapes).
257 // Used for checking if a property name is an identifier. 246 // Used for checking if a property name is an identifier.
258 static bool IsIdentifier(unibrow::CharacterStream* buffer); 247 static bool IsIdentifier(unibrow::CharacterStream* buffer);
259 248
260 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
261 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
262 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
263 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
264
265 static const int kCharacterLookaheadBufferSize = 1; 249 static const int kCharacterLookaheadBufferSize = 1;
266 static const int kNoEndPosition = 1; 250 static const int kNoEndPosition = 1;
267 251
268 private: 252 private:
269 // The current and look-ahead token. 253 // The current and look-ahead token.
270 struct TokenDesc { 254 struct TokenDesc {
271 Token::Value token; 255 Token::Value token;
272 Location location; 256 Location location;
273 Vector<const char> literal_chars; 257 Vector<const char> literal_chars;
274 }; 258 };
275 259
276 void Init(Handle<String> source, 260 void Init(Handle<String> source,
277 unibrow::CharacterStream* stream, 261 unibrow::CharacterStream* stream,
278 int start_position, int end_position, 262 int start_position, int end_position,
279 ParserLanguage language); 263 ParserLanguage language);
280 264
281 // Literal buffer support 265 // Literal buffer support
282 inline void StartLiteral(); 266 inline void StartLiteral();
283 inline void AddChar(uc32 ch); 267 inline void AddLiteralChar(uc32 ch);
284 inline void AddCharAdvance(); 268 inline void AddLiteralCharAdvance();
285 inline void TerminateLiteral(); 269 inline void TerminateLiteral();
286 // Stops scanning of a literal, e.g., due to an encountered error. 270 // Stops scanning of a literal, e.g., due to an encountered error.
287 inline void DropLiteral(); 271 inline void DropLiteral();
288 272
289 // Low-level scanning support. 273 // Low-level scanning support.
290 void Advance() { c0_ = source_->Advance(); } 274 void Advance() { c0_ = source_->Advance(); }
291 void PushBack(uc32 ch) { 275 void PushBack(uc32 ch) {
292 source_->PushBack(ch); 276 source_->PushBack(ch);
293 c0_ = ch; 277 c0_ = ch;
294 } 278 }
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 368
385 // Used to convert the source string into a character stream when a stream 369 // Used to convert the source string into a character stream when a stream
386 // is not passed to the scanner. 370 // is not passed to the scanner.
387 SafeStringInputBuffer safe_string_input_buffer_; 371 SafeStringInputBuffer safe_string_input_buffer_;
388 372
389 // Buffer to hold literal values (identifiers, strings, numbers) 373 // Buffer to hold literal values (identifiers, strings, numbers)
390 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. 374 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
391 UTF8Buffer literal_buffer_; 375 UTF8Buffer literal_buffer_;
392 376
393 bool stack_overflow_; 377 bool stack_overflow_;
394 static StaticResource<Utf8Decoder> utf8_decoder_;
395 378
396 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 379 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
397 uc32 c0_; 380 uc32 c0_;
398 }; 381 };
399 382
383
384 // ExternalStringUTF16Buffer
385 template <typename StringType, typename CharType>
386 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
387 : raw_data_(NULL) { }
388
389
390 template <typename StringType, typename CharType>
391 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
392 Handle<StringType> data,
393 int start_position,
394 int end_position) {
395 ASSERT(!data.is_null());
396 raw_data_ = data->resource()->data();
397
398 ASSERT(end_position <= data->length());
399 if (start_position > 0) {
400 SeekForward(start_position);
401 }
402 end_ =
403 end_position != Scanner::kNoEndPosition ? end_position : data->length();
404 }
405
406
407 template <typename StringType, typename CharType>
408 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
409 if (pos_ < end_) {
410 return raw_data_[pos_++];
411 } else {
412 // note: currently the following increment is necessary to avoid a
413 // test-parser problem!
414 pos_++;
415 return static_cast<uc32>(-1);
416 }
417 }
418
419
420 template <typename StringType, typename CharType>
421 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
422 pos_--;
423 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
424 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
425 }
426
427
428 template <typename StringType, typename CharType>
429 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
430 pos_ = pos;
431 }
432
400 } } // namespace v8::internal 433 } } // namespace v8::internal
401 434
402 #endif // V8_SCANNER_H_ 435 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/regexp.js ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698