Index: src/scanner-base.h |
diff --git a/src/scanner-base.h b/src/scanner-base.h |
index c50b8f3ef62dee77f0daba35aa61225de62a9d04..e773586d044ef047e5a0a88dba2f0754e3a6a412 100644 |
--- a/src/scanner-base.h |
+++ b/src/scanner-base.h |
@@ -141,61 +141,103 @@ class ScannerConstants : AllStatic { |
}; |
// ---------------------------------------------------------------------------- |
-// LiteralCollector - Collector of chars of literals. |
+// LiteralBuffer - Collector of chars of literals. |
-class LiteralCollector { |
+class LiteralBuffer { |
public: |
- LiteralCollector(); |
- ~LiteralCollector(); |
- |
- inline void AddChar(uc32 c) { |
- if (recording_) { |
- if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
- buffer_.Add(static_cast<char>(c)); |
- } else { |
- AddCharSlow(c); |
+ LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } |
+ |
+ ~LiteralBuffer() { |
+ if (backing_store_.length() > 0) { |
+ backing_store_.Dispose(); |
+ } |
+ } |
+ |
+ inline void AddChar(uc16 character) { |
+ if (position_ >= backing_store_.length()) ExpandBuffer(); |
+ if (is_ascii_) { |
+ if (character < kMaxAsciiCharCodeU) { |
+ backing_store_[position_] = static_cast<byte>(character); |
+ position_ += kASCIISize; |
+ return; |
} |
+ ConvertToUC16(); |
} |
+ *reinterpret_cast<uc16*>(&backing_store_[position_]) = character; |
+ position_ += kUC16Size; |
} |
- void StartLiteral() { |
- buffer_.StartSequence(); |
- recording_ = true; |
+ bool is_ascii() { return is_ascii_; } |
+ |
+ Vector<const uc16> uc16_literal() { |
+ ASSERT(!is_ascii_); |
+ ASSERT((position_ & 0x1) == 0); |
+ return Vector<const uc16>( |
+ reinterpret_cast<const uc16*>(backing_store_.start()), |
+ position_ >> 1); |
} |
- Vector<const char> EndLiteral() { |
- if (recording_) { |
- recording_ = false; |
- buffer_.Add(kEndMarker); |
- Vector<char> sequence = buffer_.EndSequence(); |
- return Vector<const char>(sequence.start(), sequence.length()); |
- } |
- return Vector<const char>(); |
+ Vector<const char> ascii_literal() { |
+ ASSERT(is_ascii_); |
+ return Vector<const char>( |
+ reinterpret_cast<const char*>(backing_store_.start()), |
+ position_); |
} |
- void DropLiteral() { |
- if (recording_) { |
- recording_ = false; |
- buffer_.DropSequence(); |
- } |
+ int length() { |
+ return is_ascii_ ? position_ : (position_ >> 1); |
} |
void Reset() { |
- buffer_.Reset(); |
+ position_ = 0; |
+ is_ascii_ = true; |
} |
- |
- // The end marker added after a parsed literal. |
- // Using zero allows the usage of strlen and similar functions on |
- // identifiers and numbers (but not strings, since they may contain zero |
- // bytes). |
- static const char kEndMarker = '\x00'; |
private: |
- static const int kInitialCapacity = 256; |
- SequenceCollector<char, 4> buffer_; |
- bool recording_; |
- void AddCharSlow(uc32 c); |
+ static const int kInitialCapacity = 16; |
+ static const int kGrowthFactory = 4; |
+ static const int kMinConversionSlack = 256; |
+ static const int kMaxGrowth = 1 * MB; |
+ inline int NewCapacity(int min_capacity) { |
+ int capacity = Max(min_capacity, backing_store_.length()); |
+ int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); |
+ return new_capacity; |
+ } |
+ |
+ void ExpandBuffer() { |
+ Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); |
+ memcpy(new_store.start(), backing_store_.start(), position_); |
+ backing_store_.Dispose(); |
+ backing_store_ = new_store; |
+ } |
+ |
+ void ConvertToUC16() { |
+ ASSERT(is_ascii_); |
+ Vector<byte> new_store; |
+ int new_content_size = position_ * kUC16Size; |
+ if (new_content_size > backing_store_.length()) { |
+ new_store = Vector<byte>::New(NewCapacity(new_content_size)); |
+ } else { |
+ new_store = backing_store_; |
+ } |
+ char* src = reinterpret_cast<char*>(backing_store_.start()); |
+ uc16* dst = reinterpret_cast<uc16*>(new_store.start()); |
+ for (int i = position_ - 1; i >= 0; i--) { |
+ dst[i] = src[i]; |
+ } |
+ if (new_store.start() != backing_store_.start()) { |
+ backing_store_.Dispose(); |
+ backing_store_ = new_store; |
+ } |
+ position_ = new_content_size; |
+ is_ascii_ = false; |
+ } |
+ |
+ bool is_ascii_; |
+ int position_; |
+ Vector<byte> backing_store_; |
}; |
+ |
// ---------------------------------------------------------------------------- |
// Scanner base-class. |
@@ -241,35 +283,40 @@ class Scanner { |
// collected for identifiers, strings, and numbers. |
// These functions only give the correct result if the literal |
// was scanned between calls to StartLiteral() and TerminateLiteral(). |
- const char* literal_string() const { |
- return current_.literal_chars.start(); |
+ bool is_literal_ascii() { |
+ ASSERT_NOT_NULL(current_.literal_chars); |
+ return current_.literal_chars->is_ascii(); |
} |
- |
- int literal_length() const { |
- // Excluding terminal '\x00' added by TerminateLiteral(). |
- return current_.literal_chars.length() - 1; |
+ Vector<const char> literal_ascii_string() { |
+ ASSERT_NOT_NULL(current_.literal_chars); |
+ return current_.literal_chars->ascii_literal(); |
} |
- |
- Vector<const char> literal() const { |
- return Vector<const char>(literal_string(), literal_length()); |
+ Vector<const uc16> literal_uc16_string() { |
+ ASSERT_NOT_NULL(current_.literal_chars); |
+ return current_.literal_chars->uc16_literal(); |
+ } |
+ int literal_length() const { |
+ ASSERT_NOT_NULL(current_.literal_chars); |
+ return current_.literal_chars->length(); |
} |
// Returns the literal string for the next token (the token that |
// would be returned if Next() were called). |
- const char* next_literal_string() const { |
- return next_.literal_chars.start(); |
+ bool is_next_literal_ascii() { |
+ ASSERT_NOT_NULL(next_.literal_chars); |
+ return next_.literal_chars->is_ascii(); |
} |
- |
- |
- // Returns the length of the next token (that would be returned if |
- // Next() were called). |
- int next_literal_length() const { |
- // Excluding terminal '\x00' added by TerminateLiteral(). |
- return next_.literal_chars.length() - 1; |
+ Vector<const char> next_literal_ascii_string() { |
+ ASSERT_NOT_NULL(next_.literal_chars); |
+ return next_.literal_chars->ascii_literal(); |
} |
- |
- Vector<const char> next_literal() const { |
- return Vector<const char>(next_literal_string(), next_literal_length()); |
+ Vector<const uc16> next_literal_uc16_string() { |
+ ASSERT_NOT_NULL(next_.literal_chars); |
+ return next_.literal_chars->uc16_literal(); |
+ } |
+ int next_literal_length() const { |
+ ASSERT_NOT_NULL(next_.literal_chars); |
+ return next_.literal_chars->length(); |
} |
static const int kCharacterLookaheadBufferSize = 1; |
@@ -279,7 +326,7 @@ class Scanner { |
struct TokenDesc { |
Token::Value token; |
Location location; |
- Vector<const char> literal_chars; |
+ LiteralBuffer* literal_chars; |
}; |
// Call this after setting source_ to the input. |
@@ -288,29 +335,31 @@ class Scanner { |
ASSERT(kCharacterLookaheadBufferSize == 1); |
Advance(); |
// Initialize current_ to not refer to a literal. |
- current_.literal_chars = Vector<const char>(); |
- // Reset literal buffer. |
- literal_buffer_.Reset(); |
+ current_.literal_chars = NULL; |
} |
// Literal buffer support |
inline void StartLiteral() { |
- literal_buffer_.StartLiteral(); |
+ LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
+ &literal_buffer2_ : &literal_buffer1_; |
+ free_buffer->Reset(); |
+ next_.literal_chars = free_buffer; |
} |
inline void AddLiteralChar(uc32 c) { |
- literal_buffer_.AddChar(c); |
+ ASSERT_NOT_NULL(next_.literal_chars); |
+ next_.literal_chars->AddChar(c); |
} |
// Complete scanning of a literal. |
inline void TerminateLiteral() { |
- next_.literal_chars = literal_buffer_.EndLiteral(); |
+ // Does nothing in the current implementation. |
} |
// Stops scanning of a literal and drop the collected characters, |
// e.g., due to an encountered error. |
inline void DropLiteral() { |
- literal_buffer_.DropLiteral(); |
+ next_.literal_chars = NULL; |
} |
inline void AddLiteralCharAdvance() { |
@@ -348,15 +397,16 @@ class Scanner { |
return source_->pos() - kCharacterLookaheadBufferSize; |
} |
+ // Buffers collecting literal strings, numbers, etc. |
+ LiteralBuffer literal_buffer1_; |
+ LiteralBuffer literal_buffer2_; |
+ |
TokenDesc current_; // desc for current token (as returned by Next()) |
TokenDesc next_; // desc for next token (one token look-ahead) |
// Input stream. Must be initialized to an UC16CharacterStream. |
UC16CharacterStream* source_; |
- // Buffer to hold literal values (identifiers, strings, numbers) |
- // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. |
- LiteralCollector literal_buffer_; |
// One Unicode character look-ahead; c0_ < 0 at the end of the input. |
uc32 c0_; |
@@ -367,28 +417,14 @@ class Scanner { |
class JavaScriptScanner : public Scanner { |
public: |
- |
- // Bit vector representing set of types of literals. |
- enum LiteralType { |
- kNoLiterals = 0, |
- kLiteralNumber = 1, |
- kLiteralIdentifier = 2, |
- kLiteralString = 4, |
- kLiteralRegExp = 8, |
- kLiteralRegExpFlags = 16, |
- kAllLiterals = 31 |
- }; |
- |
// A LiteralScope that disables recording of some types of JavaScript |
// literals. If the scanner is configured to not record the specific |
// type of literal, the scope will not call StartLiteral. |
class LiteralScope { |
public: |
- LiteralScope(JavaScriptScanner* self, LiteralType type) |
+ explicit LiteralScope(JavaScriptScanner* self) |
: scanner_(self), complete_(false) { |
- if (scanner_->RecordsLiteral(type)) { |
- scanner_->StartLiteral(); |
- } |
+ scanner_->StartLiteral(); |
} |
~LiteralScope() { |
if (!complete_) scanner_->DropLiteral(); |
@@ -430,11 +466,6 @@ class JavaScriptScanner : public Scanner { |
// tokens, which is what it is used for. |
void SeekForward(int pos); |
- // Whether this scanner records the given literal type or not. |
- bool RecordsLiteral(LiteralType type) { |
- return (literal_flags_ & type) != 0; |
- } |
- |
protected: |
bool SkipWhiteSpace(); |
Token::Value SkipSingleLineComment(); |
@@ -458,7 +489,6 @@ class JavaScriptScanner : public Scanner { |
// If the escape sequence cannot be decoded the result is kBadChar. |
uc32 ScanIdentifierUnicodeEscape(); |
- int literal_flags_; |
bool has_line_terminator_before_next_; |
}; |