Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(250)

Side by Side Diff: src/scanner-base.h

Issue 6577036: [Isolates] Merge from bleeding_edge to isolates, revisions 6100-6300. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
142 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 142 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
143 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 143 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
144 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 144 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
145 StaticResource<Utf8Decoder> utf8_decoder_; 145 StaticResource<Utf8Decoder> utf8_decoder_;
146 146
147 friend class Isolate; 147 friend class Isolate;
148 DISALLOW_COPY_AND_ASSIGN(ScannerConstants); 148 DISALLOW_COPY_AND_ASSIGN(ScannerConstants);
149 }; 149 };
150 150
151 // ---------------------------------------------------------------------------- 151 // ----------------------------------------------------------------------------
152 // LiteralCollector - Collector of chars of literals. 152 // LiteralBuffer - Collector of chars of literals.
153 153
154 class LiteralCollector { 154 class LiteralBuffer {
155 public: 155 public:
156 LiteralCollector(); 156 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
157 ~LiteralCollector();
158 157
159 inline void AddChar(uc32 c) { 158 ~LiteralBuffer() {
160 if (recording_) { 159 if (backing_store_.length() > 0) {
161 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { 160 backing_store_.Dispose();
162 buffer_.Add(static_cast<char>(c));
163 } else {
164 AddCharSlow(c);
165 }
166 } 161 }
167 } 162 }
168 163
169 void StartLiteral() { 164 inline void AddChar(uc16 character) {
170 buffer_.StartSequence(); 165 if (position_ >= backing_store_.length()) ExpandBuffer();
171 recording_ = true; 166 if (is_ascii_) {
167 if (character < kMaxAsciiCharCodeU) {
168 backing_store_[position_] = static_cast<byte>(character);
169 position_ += kASCIISize;
170 return;
171 }
172 ConvertToUC16();
173 }
174 *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
175 position_ += kUC16Size;
172 } 176 }
173 177
174 Vector<const char> EndLiteral() { 178 bool is_ascii() { return is_ascii_; }
175 if (recording_) { 179
176 recording_ = false; 180 Vector<const uc16> uc16_literal() {
177 buffer_.Add(kEndMarker); 181 ASSERT(!is_ascii_);
178 Vector<char> sequence = buffer_.EndSequence(); 182 ASSERT((position_ & 0x1) == 0);
179 return Vector<const char>(sequence.start(), sequence.length()); 183 return Vector<const uc16>(
180 } 184 reinterpret_cast<const uc16*>(backing_store_.start()),
181 return Vector<const char>(); 185 position_ >> 1);
182 } 186 }
183 187
184 void DropLiteral() { 188 Vector<const char> ascii_literal() {
185 if (recording_) { 189 ASSERT(is_ascii_);
186 recording_ = false; 190 return Vector<const char>(
187 buffer_.DropSequence(); 191 reinterpret_cast<const char*>(backing_store_.start()),
188 } 192 position_);
193 }
194
195 int length() {
196 return is_ascii_ ? position_ : (position_ >> 1);
189 } 197 }
190 198
191 void Reset() { 199 void Reset() {
192 buffer_.Reset(); 200 position_ = 0;
201 is_ascii_ = true;
202 }
203 private:
204 static const int kInitialCapacity = 16;
205 static const int kGrowthFactory = 4;
206 static const int kMinConversionSlack = 256;
207 static const int kMaxGrowth = 1 * MB;
208 inline int NewCapacity(int min_capacity) {
209 int capacity = Max(min_capacity, backing_store_.length());
210 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
211 return new_capacity;
193 } 212 }
194 213
195 // The end marker added after a parsed literal. 214 void ExpandBuffer() {
196 // Using zero allows the usage of strlen and similar functions on 215 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
197 // identifiers and numbers (but not strings, since they may contain zero 216 memcpy(new_store.start(), backing_store_.start(), position_);
198 // bytes). 217 backing_store_.Dispose();
199 static const char kEndMarker = '\x00'; 218 backing_store_ = new_store;
200 private: 219 }
201 static const int kInitialCapacity = 256; 220
202 SequenceCollector<char, 4> buffer_; 221 void ConvertToUC16() {
203 bool recording_; 222 ASSERT(is_ascii_);
204 void AddCharSlow(uc32 c); 223 Vector<byte> new_store;
224 int new_content_size = position_ * kUC16Size;
225 if (new_content_size >= backing_store_.length()) {
226 // Ensure room for all currently read characters as UC16 as well
227 // as the character about to be stored.
228 new_store = Vector<byte>::New(NewCapacity(new_content_size));
229 } else {
230 new_store = backing_store_;
231 }
232 char* src = reinterpret_cast<char*>(backing_store_.start());
233 uc16* dst = reinterpret_cast<uc16*>(new_store.start());
234 for (int i = position_ - 1; i >= 0; i--) {
235 dst[i] = src[i];
236 }
237 if (new_store.start() != backing_store_.start()) {
238 backing_store_.Dispose();
239 backing_store_ = new_store;
240 }
241 position_ = new_content_size;
242 is_ascii_ = false;
243 }
244
245 bool is_ascii_;
246 int position_;
247 Vector<byte> backing_store_;
205 }; 248 };
206 249
250
207 // ---------------------------------------------------------------------------- 251 // ----------------------------------------------------------------------------
208 // Scanner base-class. 252 // Scanner base-class.
209 253
210 // Generic functionality used by both JSON and JavaScript scanners. 254 // Generic functionality used by both JSON and JavaScript scanners.
211 class Scanner { 255 class Scanner {
212 public: 256 public:
213 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 257 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
214 258
215 class LiteralScope { 259 class LiteralScope {
216 public: 260 public:
(...skipping 25 matching lines...) Expand all
242 // (the token returned by Next()). 286 // (the token returned by Next()).
243 Location location() const { return current_.location; } 287 Location location() const { return current_.location; }
244 Location peek_location() const { return next_.location; } 288 Location peek_location() const { return next_.location; }
245 289
246 // Returns the literal string, if any, for the current token (the 290 // Returns the literal string, if any, for the current token (the
247 // token returned by Next()). The string is 0-terminated and in 291 // token returned by Next()). The string is 0-terminated and in
248 // UTF-8 format; they may contain 0-characters. Literal strings are 292 // UTF-8 format; they may contain 0-characters. Literal strings are
249 // collected for identifiers, strings, and numbers. 293 // collected for identifiers, strings, and numbers.
250 // These functions only give the correct result if the literal 294 // These functions only give the correct result if the literal
251 // was scanned between calls to StartLiteral() and TerminateLiteral(). 295 // was scanned between calls to StartLiteral() and TerminateLiteral().
252 const char* literal_string() const { 296 bool is_literal_ascii() {
253 return current_.literal_chars.start(); 297 ASSERT_NOT_NULL(current_.literal_chars);
298 return current_.literal_chars->is_ascii();
254 } 299 }
255 300 Vector<const char> literal_ascii_string() {
301 ASSERT_NOT_NULL(current_.literal_chars);
302 return current_.literal_chars->ascii_literal();
303 }
304 Vector<const uc16> literal_uc16_string() {
305 ASSERT_NOT_NULL(current_.literal_chars);
306 return current_.literal_chars->uc16_literal();
307 }
256 int literal_length() const { 308 int literal_length() const {
257 // Excluding terminal '\x00' added by TerminateLiteral(). 309 ASSERT_NOT_NULL(current_.literal_chars);
258 return current_.literal_chars.length() - 1; 310 return current_.literal_chars->length();
259 }
260
261 Vector<const char> literal() const {
262 return Vector<const char>(literal_string(), literal_length());
263 } 311 }
264 312
265 // Returns the literal string for the next token (the token that 313 // Returns the literal string for the next token (the token that
266 // would be returned if Next() were called). 314 // would be returned if Next() were called).
267 const char* next_literal_string() const { 315 bool is_next_literal_ascii() {
268 return next_.literal_chars.start(); 316 ASSERT_NOT_NULL(next_.literal_chars);
317 return next_.literal_chars->is_ascii();
269 } 318 }
270 319 Vector<const char> next_literal_ascii_string() {
271 320 ASSERT_NOT_NULL(next_.literal_chars);
272 // Returns the length of the next token (that would be returned if 321 return next_.literal_chars->ascii_literal();
273 // Next() were called). 322 }
323 Vector<const uc16> next_literal_uc16_string() {
324 ASSERT_NOT_NULL(next_.literal_chars);
325 return next_.literal_chars->uc16_literal();
326 }
274 int next_literal_length() const { 327 int next_literal_length() const {
275 // Excluding terminal '\x00' added by TerminateLiteral(). 328 ASSERT_NOT_NULL(next_.literal_chars);
276 return next_.literal_chars.length() - 1; 329 return next_.literal_chars->length();
277 }
278
279 Vector<const char> next_literal() const {
280 return Vector<const char>(next_literal_string(), next_literal_length());
281 } 330 }
282 331
283 static const int kCharacterLookaheadBufferSize = 1; 332 static const int kCharacterLookaheadBufferSize = 1;
284 333
285 protected: 334 protected:
286 // The current and look-ahead token. 335 // The current and look-ahead token.
287 struct TokenDesc { 336 struct TokenDesc {
288 Token::Value token; 337 Token::Value token;
289 Location location; 338 Location location;
290 Vector<const char> literal_chars; 339 LiteralBuffer* literal_chars;
291 }; 340 };
292 341
293 // Call this after setting source_ to the input. 342 // Call this after setting source_ to the input.
294 void Init() { 343 void Init() {
295 // Set c0_ (one character ahead) 344 // Set c0_ (one character ahead)
296 ASSERT(kCharacterLookaheadBufferSize == 1); 345 ASSERT(kCharacterLookaheadBufferSize == 1);
297 Advance(); 346 Advance();
298 // Initialize current_ to not refer to a literal. 347 // Initialize current_ to not refer to a literal.
299 current_.literal_chars = Vector<const char>(); 348 current_.literal_chars = NULL;
300 // Reset literal buffer.
301 literal_buffer_.Reset();
302 } 349 }
303 350
304 // Literal buffer support 351 // Literal buffer support
305 inline void StartLiteral() { 352 inline void StartLiteral() {
306 literal_buffer_.StartLiteral(); 353 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
354 &literal_buffer2_ : &literal_buffer1_;
355 free_buffer->Reset();
356 next_.literal_chars = free_buffer;
307 } 357 }
308 358
309 inline void AddLiteralChar(uc32 c) { 359 inline void AddLiteralChar(uc32 c) {
310 literal_buffer_.AddChar(c); 360 ASSERT_NOT_NULL(next_.literal_chars);
361 next_.literal_chars->AddChar(c);
311 } 362 }
312 363
313 // Complete scanning of a literal. 364 // Complete scanning of a literal.
314 inline void TerminateLiteral() { 365 inline void TerminateLiteral() {
315 next_.literal_chars = literal_buffer_.EndLiteral(); 366 // Does nothing in the current implementation.
316 } 367 }
317 368
318 // Stops scanning of a literal and drop the collected characters, 369 // Stops scanning of a literal and drop the collected characters,
319 // e.g., due to an encountered error. 370 // e.g., due to an encountered error.
320 inline void DropLiteral() { 371 inline void DropLiteral() {
321 literal_buffer_.DropLiteral(); 372 next_.literal_chars = NULL;
322 } 373 }
323 374
324 inline void AddLiteralCharAdvance() { 375 inline void AddLiteralCharAdvance() {
325 AddLiteralChar(c0_); 376 AddLiteralChar(c0_);
326 Advance(); 377 Advance();
327 } 378 }
328 379
329 // Low-level scanning support. 380 // Low-level scanning support.
330 void Advance() { c0_ = source_->Advance(); } 381 void Advance() { c0_ = source_->Advance(); }
331 void PushBack(uc32 ch) { 382 void PushBack(uc32 ch) {
(...skipping 19 matching lines...) Expand all
351 uc32 ScanHexEscape(uc32 c, int length); 402 uc32 ScanHexEscape(uc32 c, int length);
352 uc32 ScanOctalEscape(uc32 c, int length); 403 uc32 ScanOctalEscape(uc32 c, int length);
353 404
354 // Return the current source position. 405 // Return the current source position.
355 int source_pos() { 406 int source_pos() {
356 return source_->pos() - kCharacterLookaheadBufferSize; 407 return source_->pos() - kCharacterLookaheadBufferSize;
357 } 408 }
358 409
359 ScannerConstants* scanner_constants_; 410 ScannerConstants* scanner_constants_;
360 411
412 // Buffers collecting literal strings, numbers, etc.
413 LiteralBuffer literal_buffer1_;
414 LiteralBuffer literal_buffer2_;
415
361 TokenDesc current_; // desc for current token (as returned by Next()) 416 TokenDesc current_; // desc for current token (as returned by Next())
362 TokenDesc next_; // desc for next token (one token look-ahead) 417 TokenDesc next_; // desc for next token (one token look-ahead)
363 418
364 // Input stream. Must be initialized to an UC16CharacterStream. 419 // Input stream. Must be initialized to an UC16CharacterStream.
365 UC16CharacterStream* source_; 420 UC16CharacterStream* source_;
366 421
367 // Buffer to hold literal values (identifiers, strings, numbers)
368 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
369 LiteralCollector literal_buffer_;
370 422
371 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 423 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
372 uc32 c0_; 424 uc32 c0_;
373 }; 425 };
374 426
375 // ---------------------------------------------------------------------------- 427 // ----------------------------------------------------------------------------
376 // JavaScriptScanner - base logic for JavaScript scanning. 428 // JavaScriptScanner - base logic for JavaScript scanning.
377 429
378 class JavaScriptScanner : public Scanner { 430 class JavaScriptScanner : public Scanner {
379 public: 431 public:
380
381 // Bit vector representing set of types of literals.
382 enum LiteralType {
383 kNoLiterals = 0,
384 kLiteralNumber = 1,
385 kLiteralIdentifier = 2,
386 kLiteralString = 4,
387 kLiteralRegExp = 8,
388 kLiteralRegExpFlags = 16,
389 kAllLiterals = 31
390 };
391
392 // A LiteralScope that disables recording of some types of JavaScript 432 // A LiteralScope that disables recording of some types of JavaScript
393 // literals. If the scanner is configured to not record the specific 433 // literals. If the scanner is configured to not record the specific
394 // type of literal, the scope will not call StartLiteral. 434 // type of literal, the scope will not call StartLiteral.
395 class LiteralScope { 435 class LiteralScope {
396 public: 436 public:
397 LiteralScope(JavaScriptScanner* self, LiteralType type) 437 explicit LiteralScope(JavaScriptScanner* self)
398 : scanner_(self), complete_(false) { 438 : scanner_(self), complete_(false) {
399 if (scanner_->RecordsLiteral(type)) { 439 scanner_->StartLiteral();
400 scanner_->StartLiteral();
401 }
402 } 440 }
403 ~LiteralScope() { 441 ~LiteralScope() {
404 if (!complete_) scanner_->DropLiteral(); 442 if (!complete_) scanner_->DropLiteral();
405 } 443 }
406 void Complete() { 444 void Complete() {
407 scanner_->TerminateLiteral(); 445 scanner_->TerminateLiteral();
408 complete_ = true; 446 complete_ = true;
409 } 447 }
410 448
411 private: 449 private:
(...skipping 21 matching lines...) Expand all
433 // Tells whether the buffer contains an identifier (no escapes). 471 // Tells whether the buffer contains an identifier (no escapes).
434 // Used for checking if a property name is an identifier. 472 // Used for checking if a property name is an identifier.
435 static bool IsIdentifier(unibrow::CharacterStream* buffer); 473 static bool IsIdentifier(unibrow::CharacterStream* buffer);
436 474
437 // Seek forward to the given position. This operation does not 475 // Seek forward to the given position. This operation does not
438 // work in general, for instance when there are pushed back 476 // work in general, for instance when there are pushed back
439 // characters, but works for seeking forward until simple delimiter 477 // characters, but works for seeking forward until simple delimiter
440 // tokens, which is what it is used for. 478 // tokens, which is what it is used for.
441 void SeekForward(int pos); 479 void SeekForward(int pos);
442 480
443 // Whether this scanner records the given literal type or not.
444 bool RecordsLiteral(LiteralType type) {
445 return (literal_flags_ & type) != 0;
446 }
447
448 protected: 481 protected:
449 bool SkipWhiteSpace(); 482 bool SkipWhiteSpace();
450 Token::Value SkipSingleLineComment(); 483 Token::Value SkipSingleLineComment();
451 Token::Value SkipMultiLineComment(); 484 Token::Value SkipMultiLineComment();
452 485
453 // Scans a single JavaScript token. 486 // Scans a single JavaScript token.
454 void Scan(); 487 void Scan();
455 488
456 void ScanDecimalDigits(); 489 void ScanDecimalDigits();
457 Token::Value ScanNumber(bool seen_period); 490 Token::Value ScanNumber(bool seen_period);
458 Token::Value ScanIdentifierOrKeyword(); 491 Token::Value ScanIdentifierOrKeyword();
459 Token::Value ScanIdentifierSuffix(LiteralScope* literal); 492 Token::Value ScanIdentifierSuffix(LiteralScope* literal);
460 493
461 void ScanEscape(); 494 void ScanEscape();
462 Token::Value ScanString(); 495 Token::Value ScanString();
463 496
464 // Scans a possible HTML comment -- begins with '<!'. 497 // Scans a possible HTML comment -- begins with '<!'.
465 Token::Value ScanHtmlComment(); 498 Token::Value ScanHtmlComment();
466 499
467 // Decodes a unicode escape-sequence which is part of an identifier. 500 // Decodes a unicode escape-sequence which is part of an identifier.
468 // If the escape sequence cannot be decoded the result is kBadChar. 501 // If the escape sequence cannot be decoded the result is kBadChar.
469 uc32 ScanIdentifierUnicodeEscape(); 502 uc32 ScanIdentifierUnicodeEscape();
470 503
471 int literal_flags_;
472 bool has_line_terminator_before_next_; 504 bool has_line_terminator_before_next_;
473 }; 505 };
474 506
475 507
476 // ---------------------------------------------------------------------------- 508 // ----------------------------------------------------------------------------
477 // Keyword matching state machine. 509 // Keyword matching state machine.
478 510
479 class KeywordMatcher { 511 class KeywordMatcher {
480 // Incrementally recognize keywords. 512 // Incrementally recognize keywords.
481 // 513 //
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
594 // keyword with the current prefix). 626 // keyword with the current prefix).
595 const char* keyword_; 627 const char* keyword_;
596 int counter_; 628 int counter_;
597 Token::Value keyword_token_; 629 Token::Value keyword_token_;
598 }; 630 };
599 631
600 632
601 } } // namespace v8::internal 633 } } // namespace v8::internal
602 634
603 #endif // V8_SCANNER_BASE_H_ 635 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698