| Index: src/scanner-base.h
 | 
| diff --git a/src/scanner-base.h b/src/scanner-base.h
 | 
| index c50b8f3ef62dee77f0daba35aa61225de62a9d04..e773586d044ef047e5a0a88dba2f0754e3a6a412 100644
 | 
| --- a/src/scanner-base.h
 | 
| +++ b/src/scanner-base.h
 | 
| @@ -141,61 +141,103 @@ class ScannerConstants : AllStatic {
 | 
|  };
 | 
|  
 | 
|  // ----------------------------------------------------------------------------
 | 
| -// LiteralCollector -  Collector of chars of literals.
 | 
| +// LiteralBuffer -  Collector of chars of literals.
 | 
|  
 | 
| -class LiteralCollector {
 | 
| +class LiteralBuffer {
 | 
|   public:
 | 
| -  LiteralCollector();
 | 
| -  ~LiteralCollector();
 | 
| -
 | 
| -  inline void AddChar(uc32 c) {
 | 
| -    if (recording_) {
 | 
| -      if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
 | 
| -        buffer_.Add(static_cast<char>(c));
 | 
| -      } else {
 | 
| -        AddCharSlow(c);
 | 
| +  LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
 | 
| +
 | 
| +  ~LiteralBuffer() {
 | 
| +    if (backing_store_.length() > 0) {
 | 
| +      backing_store_.Dispose();
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  inline void AddChar(uc16 character) {
 | 
| +    if (position_ >= backing_store_.length()) ExpandBuffer();
 | 
| +    if (is_ascii_) {
 | 
| +      if (character < kMaxAsciiCharCodeU) {
 | 
| +        backing_store_[position_] = static_cast<byte>(character);
 | 
| +        position_ += kASCIISize;
 | 
| +        return;
 | 
|        }
 | 
| +      ConvertToUC16();
 | 
|      }
 | 
| +    *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
 | 
| +    position_ += kUC16Size;
 | 
|    }
 | 
|  
 | 
| -  void StartLiteral() {
 | 
| -    buffer_.StartSequence();
 | 
| -    recording_ = true;
 | 
| +  bool is_ascii() { return is_ascii_; }
 | 
| +
 | 
| +  Vector<const uc16> uc16_literal() {
 | 
| +    ASSERT(!is_ascii_);
 | 
| +    ASSERT((position_ & 0x1) == 0);
 | 
| +    return Vector<const uc16>(
 | 
| +        reinterpret_cast<const uc16*>(backing_store_.start()),
 | 
| +        position_ >> 1);
 | 
|    }
 | 
|  
 | 
| -  Vector<const char> EndLiteral() {
 | 
| -    if (recording_) {
 | 
| -      recording_ = false;
 | 
| -      buffer_.Add(kEndMarker);
 | 
| -      Vector<char> sequence = buffer_.EndSequence();
 | 
| -      return Vector<const char>(sequence.start(), sequence.length());
 | 
| -    }
 | 
| -    return Vector<const char>();
 | 
| +  Vector<const char> ascii_literal() {
 | 
| +    ASSERT(is_ascii_);
 | 
| +    return Vector<const char>(
 | 
| +        reinterpret_cast<const char*>(backing_store_.start()),
 | 
| +        position_);
 | 
|    }
 | 
|  
 | 
| -  void DropLiteral() {
 | 
| -    if (recording_) {
 | 
| -      recording_ = false;
 | 
| -      buffer_.DropSequence();
 | 
| -    }
 | 
| +  int length() {
 | 
| +    return is_ascii_ ? position_ : (position_ >> 1);
 | 
|    }
 | 
|  
 | 
|    void Reset() {
 | 
| -    buffer_.Reset();
 | 
| +    position_ = 0;
 | 
| +    is_ascii_ = true;
 | 
|    }
 | 
| -
 | 
| -  // The end marker added after a parsed literal.
 | 
| -  // Using zero allows the usage of strlen and similar functions on
 | 
| -  // identifiers and numbers (but not strings, since they may contain zero
 | 
| -  // bytes).
 | 
| -  static const char kEndMarker = '\x00';
 | 
|   private:
 | 
| -  static const int kInitialCapacity = 256;
 | 
| -  SequenceCollector<char, 4> buffer_;
 | 
| -  bool recording_;
 | 
| -  void AddCharSlow(uc32 c);
 | 
| +  static const int kInitialCapacity = 16;
 | 
| +  static const int kGrowthFactory = 4;
 | 
| +  static const int kMinConversionSlack = 256;
 | 
| +  static const int kMaxGrowth = 1 * MB;
 | 
| +  inline int NewCapacity(int min_capacity) {
 | 
| +    int capacity = Max(min_capacity, backing_store_.length());
 | 
| +    int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
 | 
| +    return new_capacity;
 | 
| +  }
 | 
| +
 | 
| +  void ExpandBuffer() {
 | 
| +    Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
 | 
| +    memcpy(new_store.start(), backing_store_.start(), position_);
 | 
| +    backing_store_.Dispose();
 | 
| +    backing_store_ = new_store;
 | 
| +  }
 | 
| +
 | 
| +  void ConvertToUC16() {
 | 
| +    ASSERT(is_ascii_);
 | 
| +    Vector<byte> new_store;
 | 
| +    int new_content_size = position_ * kUC16Size;
 | 
| +    if (new_content_size > backing_store_.length()) {
 | 
| +      new_store = Vector<byte>::New(NewCapacity(new_content_size));
 | 
| +    } else {
 | 
| +      new_store = backing_store_;
 | 
| +    }
 | 
| +    char* src = reinterpret_cast<char*>(backing_store_.start());
 | 
| +    uc16* dst = reinterpret_cast<uc16*>(new_store.start());
 | 
| +    for (int i = position_ - 1; i >= 0; i--) {
 | 
| +      dst[i] = src[i];
 | 
| +    }
 | 
| +    if (new_store.start() != backing_store_.start()) {
 | 
| +      backing_store_.Dispose();
 | 
| +      backing_store_ = new_store;
 | 
| +    }
 | 
| +    position_ = new_content_size;
 | 
| +    is_ascii_ = false;
 | 
| +  }
 | 
| +
 | 
| +  bool is_ascii_;
 | 
| +  int position_;
 | 
| +  Vector<byte> backing_store_;
 | 
|  };
 | 
|  
 | 
| +
 | 
|  // ----------------------------------------------------------------------------
 | 
|  // Scanner base-class.
 | 
|  
 | 
| @@ -241,35 +283,40 @@ class Scanner {
 | 
|    // collected for identifiers, strings, and numbers.
 | 
|    // These functions only give the correct result if the literal
 | 
|    // was scanned between calls to StartLiteral() and TerminateLiteral().
 | 
| -  const char* literal_string() const {
 | 
| -    return current_.literal_chars.start();
 | 
| +  bool is_literal_ascii() {
 | 
| +    ASSERT_NOT_NULL(current_.literal_chars);
 | 
| +    return current_.literal_chars->is_ascii();
 | 
|    }
 | 
| -
 | 
| -  int literal_length() const {
 | 
| -    // Excluding terminal '\x00' added by TerminateLiteral().
 | 
| -    return current_.literal_chars.length() - 1;
 | 
| +  Vector<const char> literal_ascii_string() {
 | 
| +    ASSERT_NOT_NULL(current_.literal_chars);
 | 
| +    return current_.literal_chars->ascii_literal();
 | 
|    }
 | 
| -
 | 
| -  Vector<const char> literal() const {
 | 
| -    return Vector<const char>(literal_string(), literal_length());
 | 
| +  Vector<const uc16> literal_uc16_string() {
 | 
| +    ASSERT_NOT_NULL(current_.literal_chars);
 | 
| +    return current_.literal_chars->uc16_literal();
 | 
| +  }
 | 
| +  int literal_length() const {
 | 
| +    ASSERT_NOT_NULL(current_.literal_chars);
 | 
| +    return current_.literal_chars->length();
 | 
|    }
 | 
|  
 | 
|    // Returns the literal string for the next token (the token that
 | 
|    // would be returned if Next() were called).
 | 
| -  const char* next_literal_string() const {
 | 
| -    return next_.literal_chars.start();
 | 
| +  bool is_next_literal_ascii() {
 | 
| +    ASSERT_NOT_NULL(next_.literal_chars);
 | 
| +    return next_.literal_chars->is_ascii();
 | 
|    }
 | 
| -
 | 
| -
 | 
| -  // Returns the length of the next token (that would be returned if
 | 
| -  // Next() were called).
 | 
| -  int next_literal_length() const {
 | 
| -    // Excluding terminal '\x00' added by TerminateLiteral().
 | 
| -    return next_.literal_chars.length() - 1;
 | 
| +  Vector<const char> next_literal_ascii_string() {
 | 
| +    ASSERT_NOT_NULL(next_.literal_chars);
 | 
| +    return next_.literal_chars->ascii_literal();
 | 
|    }
 | 
| -
 | 
| -  Vector<const char> next_literal() const {
 | 
| -    return Vector<const char>(next_literal_string(), next_literal_length());
 | 
| +  Vector<const uc16> next_literal_uc16_string() {
 | 
| +    ASSERT_NOT_NULL(next_.literal_chars);
 | 
| +    return next_.literal_chars->uc16_literal();
 | 
| +  }
 | 
| +  int next_literal_length() const {
 | 
| +    ASSERT_NOT_NULL(next_.literal_chars);
 | 
| +    return next_.literal_chars->length();
 | 
|    }
 | 
|  
 | 
|    static const int kCharacterLookaheadBufferSize = 1;
 | 
| @@ -279,7 +326,7 @@ class Scanner {
 | 
|    struct TokenDesc {
 | 
|      Token::Value token;
 | 
|      Location location;
 | 
| -    Vector<const char> literal_chars;
 | 
| +    LiteralBuffer* literal_chars;
 | 
|    };
 | 
|  
 | 
|    // Call this after setting source_ to the input.
 | 
| @@ -288,29 +335,31 @@ class Scanner {
 | 
|      ASSERT(kCharacterLookaheadBufferSize == 1);
 | 
|      Advance();
 | 
|      // Initialize current_ to not refer to a literal.
 | 
| -    current_.literal_chars = Vector<const char>();
 | 
| -    // Reset literal buffer.
 | 
| -    literal_buffer_.Reset();
 | 
| +    current_.literal_chars = NULL;
 | 
|    }
 | 
|  
 | 
|    // Literal buffer support
 | 
|    inline void StartLiteral() {
 | 
| -    literal_buffer_.StartLiteral();
 | 
| +    LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
 | 
| +            &literal_buffer2_ : &literal_buffer1_;
 | 
| +    free_buffer->Reset();
 | 
| +    next_.literal_chars = free_buffer;
 | 
|    }
 | 
|  
 | 
|    inline void AddLiteralChar(uc32 c) {
 | 
| -    literal_buffer_.AddChar(c);
 | 
| +    ASSERT_NOT_NULL(next_.literal_chars);
 | 
| +    next_.literal_chars->AddChar(c);
 | 
|    }
 | 
|  
 | 
|    // Complete scanning of a literal.
 | 
|    inline void TerminateLiteral() {
 | 
| -    next_.literal_chars = literal_buffer_.EndLiteral();
 | 
| +    // Does nothing in the current implementation.
 | 
|    }
 | 
|  
 | 
|    // Stops scanning of a literal and drop the collected characters,
 | 
|    // e.g., due to an encountered error.
 | 
|    inline void DropLiteral() {
 | 
| -    literal_buffer_.DropLiteral();
 | 
| +    next_.literal_chars = NULL;
 | 
|    }
 | 
|  
 | 
|    inline void AddLiteralCharAdvance() {
 | 
| @@ -348,15 +397,16 @@ class Scanner {
 | 
|      return source_->pos() - kCharacterLookaheadBufferSize;
 | 
|    }
 | 
|  
 | 
| +  // Buffers collecting literal strings, numbers, etc.
 | 
| +  LiteralBuffer literal_buffer1_;
 | 
| +  LiteralBuffer literal_buffer2_;
 | 
| +
 | 
|    TokenDesc current_;  // desc for current token (as returned by Next())
 | 
|    TokenDesc next_;     // desc for next token (one token look-ahead)
 | 
|  
 | 
|    // Input stream. Must be initialized to an UC16CharacterStream.
 | 
|    UC16CharacterStream* source_;
 | 
|  
 | 
| -  // Buffer to hold literal values (identifiers, strings, numbers)
 | 
| -  // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
 | 
| -  LiteralCollector literal_buffer_;
 | 
|  
 | 
|    // One Unicode character look-ahead; c0_ < 0 at the end of the input.
 | 
|    uc32 c0_;
 | 
| @@ -367,28 +417,14 @@ class Scanner {
 | 
|  
 | 
|  class JavaScriptScanner : public Scanner {
 | 
|   public:
 | 
| -
 | 
| -  // Bit vector representing set of types of literals.
 | 
| -  enum LiteralType {
 | 
| -    kNoLiterals = 0,
 | 
| -    kLiteralNumber = 1,
 | 
| -    kLiteralIdentifier = 2,
 | 
| -    kLiteralString = 4,
 | 
| -    kLiteralRegExp = 8,
 | 
| -    kLiteralRegExpFlags = 16,
 | 
| -    kAllLiterals = 31
 | 
| -  };
 | 
| -
 | 
|    // A LiteralScope that disables recording of some types of JavaScript
 | 
|    // literals. If the scanner is configured to not record the specific
 | 
|    // type of literal, the scope will not call StartLiteral.
 | 
|    class LiteralScope {
 | 
|     public:
 | 
| -    LiteralScope(JavaScriptScanner* self, LiteralType type)
 | 
| +    explicit LiteralScope(JavaScriptScanner* self)
 | 
|          : scanner_(self), complete_(false) {
 | 
| -      if (scanner_->RecordsLiteral(type)) {
 | 
| -        scanner_->StartLiteral();
 | 
| -      }
 | 
| +      scanner_->StartLiteral();
 | 
|      }
 | 
|       ~LiteralScope() {
 | 
|         if (!complete_) scanner_->DropLiteral();
 | 
| @@ -430,11 +466,6 @@ class JavaScriptScanner : public Scanner {
 | 
|    // tokens, which is what it is used for.
 | 
|    void SeekForward(int pos);
 | 
|  
 | 
| -  // Whether this scanner records the given literal type or not.
 | 
| -  bool RecordsLiteral(LiteralType type) {
 | 
| -    return (literal_flags_ & type) != 0;
 | 
| -  }
 | 
| -
 | 
|   protected:
 | 
|    bool SkipWhiteSpace();
 | 
|    Token::Value SkipSingleLineComment();
 | 
| @@ -458,7 +489,6 @@ class JavaScriptScanner : public Scanner {
 | 
|    // If the escape sequence cannot be decoded the result is kBadChar.
 | 
|    uc32 ScanIdentifierUnicodeEscape();
 | 
|  
 | 
| -  int literal_flags_;
 | 
|    bool has_line_terminator_before_next_;
 | 
|  };
 | 
|  
 | 
| 
 |