| Index: src/scanner.h
|
| diff --git a/src/scanner.h b/src/scanner.h
|
| index 201803da5e0f4d02654df44dac66d232ae1f2b79..dc903cd6afc7181334bc4afebcc3d1923fb57c60 100644
|
| --- a/src/scanner.h
|
| +++ b/src/scanner.h
|
| @@ -41,6 +41,7 @@ class UTF8Buffer {
|
| ~UTF8Buffer();
|
|
|
| void AddChar(uc32 c) {
|
| + ASSERT_NOT_NULL(data_);
|
| if (cursor_ <= limit_ &&
|
| static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
|
| *cursor_++ = static_cast<char>(c);
|
| @@ -49,16 +50,29 @@ class UTF8Buffer {
|
| }
|
| }
|
|
|
| - void Reset() { cursor_ = data_; }
|
| - int pos() const { return cursor_ - data_; }
|
| + void Reset() {
|
| + if (data_ == NULL) {
|
| + data_ = NewArray<char>(kInitialCapacity);
|
| + limit_ = ComputeLimit(data_, kInitialCapacity);
|
| + }
|
| + cursor_ = data_;
|
| + }
|
| +
|
| + int pos() const {
|
| + ASSERT_NOT_NULL(data_);
|
| + return cursor_ - data_;
|
| + }
|
| +
|
| char* data() const { return data_; }
|
|
|
| private:
|
| + static const int kInitialCapacity = 256;
|
| char* data_;
|
| char* cursor_;
|
| char* limit_;
|
|
|
| int Capacity() const {
|
| + ASSERT_NOT_NULL(data_);
|
| return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
|
| }
|
|
|
| @@ -278,26 +292,30 @@ class Scanner {
|
| // token returned by Next()). The string is 0-terminated and in
|
| // UTF-8 format; they may contain 0-characters. Literal strings are
|
| // collected for identifiers, strings, and numbers.
|
| + // These functions only give the correct result if the literal
|
| + // was scanned between calls to StartLiteral() and TerminateLiteral().
|
| const char* literal_string() const {
|
| - return &literals_.data()[current_.literal_pos];
|
| + return current_.literal_buffer->data();
|
| }
|
| int literal_length() const {
|
| - return current_.literal_end - current_.literal_pos;
|
| - }
|
| -
|
| - Vector<const char> next_literal() const {
|
| - return Vector<const char>(next_literal_string(), next_literal_length());
|
| + // Excluding terminal '\0' added by TerminateLiteral().
|
| + return current_.literal_buffer->pos() - 1;
|
| }
|
|
|
| // Returns the literal string for the next token (the token that
|
| // would be returned if Next() were called).
|
| const char* next_literal_string() const {
|
| - return &literals_.data()[next_.literal_pos];
|
| + return next_.literal_buffer->data();
|
| }
|
| // Returns the length of the next token (that would be returned if
|
| // Next() were called).
|
| int next_literal_length() const {
|
| - return next_.literal_end - next_.literal_pos;
|
| + return next_.literal_buffer->pos() - 1;
|
| + }
|
| +
|
| + Vector<const char> next_literal() const {
|
| + return Vector<const char>(next_literal_string(),
|
| + next_literal_length());
|
| }
|
|
|
| // Scans the input as a regular expression pattern, previous
|
| @@ -339,7 +357,8 @@ class Scanner {
|
|
|
| // Buffer to hold literal values (identifiers, strings, numbers)
|
| // using 0-terminated UTF-8 encoding.
|
| - UTF8Buffer literals_;
|
| + UTF8Buffer literal_buffer_1_;
|
| + UTF8Buffer literal_buffer_2_;
|
|
|
| bool stack_overflow_;
|
| static StaticResource<Utf8Decoder> utf8_decoder_;
|
| @@ -351,7 +370,7 @@ class Scanner {
|
| struct TokenDesc {
|
| Token::Value token;
|
| Location location;
|
| - int literal_pos, literal_end;
|
| + UTF8Buffer* literal_buffer;
|
| };
|
|
|
| TokenDesc current_; // desc for current token (as returned by Next())
|
|
|