Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(232)

Unified Diff: src/scanner-base.h

Issue 6075005: Change scanner buffers to not use utf-8. (Closed)
Patch Set: Fixed linto. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/scanner-base.h
diff --git a/src/scanner-base.h b/src/scanner-base.h
index c50b8f3ef62dee77f0daba35aa61225de62a9d04..e773586d044ef047e5a0a88dba2f0754e3a6a412 100644
--- a/src/scanner-base.h
+++ b/src/scanner-base.h
@@ -141,61 +141,103 @@ class ScannerConstants : AllStatic {
};
// ----------------------------------------------------------------------------
-// LiteralCollector - Collector of chars of literals.
+// LiteralBuffer - Collector of chars of literals.
-class LiteralCollector {
+class LiteralBuffer {
public:
- LiteralCollector();
- ~LiteralCollector();
-
- inline void AddChar(uc32 c) {
- if (recording_) {
- if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
- buffer_.Add(static_cast<char>(c));
- } else {
- AddCharSlow(c);
+ LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
+
+ ~LiteralBuffer() {
+ if (backing_store_.length() > 0) {
+ backing_store_.Dispose();
+ }
+ }
+
+ inline void AddChar(uc16 character) {
+ if (position_ >= backing_store_.length()) ExpandBuffer();
+ if (is_ascii_) {
+ if (character < kMaxAsciiCharCodeU) {
+ backing_store_[position_] = static_cast<byte>(character);
+ position_ += kASCIISize;
+ return;
}
+ ConvertToUC16();
}
+ *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
+ position_ += kUC16Size;
}
- void StartLiteral() {
- buffer_.StartSequence();
- recording_ = true;
+ bool is_ascii() { return is_ascii_; }
+
+ Vector<const uc16> uc16_literal() {
+ ASSERT(!is_ascii_);
+ ASSERT((position_ & 0x1) == 0);
+ return Vector<const uc16>(
+ reinterpret_cast<const uc16*>(backing_store_.start()),
+ position_ >> 1);
}
- Vector<const char> EndLiteral() {
- if (recording_) {
- recording_ = false;
- buffer_.Add(kEndMarker);
- Vector<char> sequence = buffer_.EndSequence();
- return Vector<const char>(sequence.start(), sequence.length());
- }
- return Vector<const char>();
+ Vector<const char> ascii_literal() {
+ ASSERT(is_ascii_);
+ return Vector<const char>(
+ reinterpret_cast<const char*>(backing_store_.start()),
+ position_);
}
- void DropLiteral() {
- if (recording_) {
- recording_ = false;
- buffer_.DropSequence();
- }
+ int length() {
+ return is_ascii_ ? position_ : (position_ >> 1);
}
void Reset() {
- buffer_.Reset();
+ position_ = 0;
+ is_ascii_ = true;
}
-
- // The end marker added after a parsed literal.
- // Using zero allows the usage of strlen and similar functions on
- // identifiers and numbers (but not strings, since they may contain zero
- // bytes).
- static const char kEndMarker = '\x00';
private:
- static const int kInitialCapacity = 256;
- SequenceCollector<char, 4> buffer_;
- bool recording_;
- void AddCharSlow(uc32 c);
+ static const int kInitialCapacity = 16;
+ static const int kGrowthFactory = 4;
+ static const int kMinConversionSlack = 256;
+ static const int kMaxGrowth = 1 * MB;
+ inline int NewCapacity(int min_capacity) {
+ int capacity = Max(min_capacity, backing_store_.length());
+ int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
+ return new_capacity;
+ }
+
+ void ExpandBuffer() {
+ Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
+ memcpy(new_store.start(), backing_store_.start(), position_);
+ backing_store_.Dispose();
+ backing_store_ = new_store;
+ }
+
+ void ConvertToUC16() {
+ ASSERT(is_ascii_);
+ Vector<byte> new_store;
+ int new_content_size = position_ * kUC16Size;
+ if (new_content_size > backing_store_.length()) {
+ new_store = Vector<byte>::New(NewCapacity(new_content_size));
+ } else {
+ new_store = backing_store_;
+ }
+ char* src = reinterpret_cast<char*>(backing_store_.start());
+ uc16* dst = reinterpret_cast<uc16*>(new_store.start());
+ for (int i = position_ - 1; i >= 0; i--) {
+ dst[i] = src[i];
+ }
+ if (new_store.start() != backing_store_.start()) {
+ backing_store_.Dispose();
+ backing_store_ = new_store;
+ }
+ position_ = new_content_size;
+ is_ascii_ = false;
+ }
+
+ bool is_ascii_;
+ int position_;
+ Vector<byte> backing_store_;
};
+
// ----------------------------------------------------------------------------
// Scanner base-class.
@@ -241,35 +283,40 @@ class Scanner {
// collected for identifiers, strings, and numbers.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
- const char* literal_string() const {
- return current_.literal_chars.start();
+ bool is_literal_ascii() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->is_ascii();
}
-
- int literal_length() const {
- // Excluding terminal '\x00' added by TerminateLiteral().
- return current_.literal_chars.length() - 1;
+ Vector<const char> literal_ascii_string() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->ascii_literal();
}
-
- Vector<const char> literal() const {
- return Vector<const char>(literal_string(), literal_length());
+ Vector<const uc16> literal_uc16_string() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->uc16_literal();
+ }
+ int literal_length() const {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->length();
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
- const char* next_literal_string() const {
- return next_.literal_chars.start();
+ bool is_next_literal_ascii() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->is_ascii();
}
-
-
- // Returns the length of the next token (that would be returned if
- // Next() were called).
- int next_literal_length() const {
- // Excluding terminal '\x00' added by TerminateLiteral().
- return next_.literal_chars.length() - 1;
+ Vector<const char> next_literal_ascii_string() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->ascii_literal();
}
-
- Vector<const char> next_literal() const {
- return Vector<const char>(next_literal_string(), next_literal_length());
+ Vector<const uc16> next_literal_uc16_string() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->uc16_literal();
+ }
+ int next_literal_length() const {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->length();
}
static const int kCharacterLookaheadBufferSize = 1;
@@ -279,7 +326,7 @@ class Scanner {
struct TokenDesc {
Token::Value token;
Location location;
- Vector<const char> literal_chars;
+ LiteralBuffer* literal_chars;
};
// Call this after setting source_ to the input.
@@ -288,29 +335,31 @@ class Scanner {
ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
- current_.literal_chars = Vector<const char>();
- // Reset literal buffer.
- literal_buffer_.Reset();
+ current_.literal_chars = NULL;
}
// Literal buffer support
inline void StartLiteral() {
- literal_buffer_.StartLiteral();
+ LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
+ &literal_buffer2_ : &literal_buffer1_;
+ free_buffer->Reset();
+ next_.literal_chars = free_buffer;
}
inline void AddLiteralChar(uc32 c) {
- literal_buffer_.AddChar(c);
+ ASSERT_NOT_NULL(next_.literal_chars);
+ next_.literal_chars->AddChar(c);
}
// Complete scanning of a literal.
inline void TerminateLiteral() {
- next_.literal_chars = literal_buffer_.EndLiteral();
+ // Does nothing in the current implementation.
}
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
- literal_buffer_.DropLiteral();
+ next_.literal_chars = NULL;
}
inline void AddLiteralCharAdvance() {
@@ -348,15 +397,16 @@ class Scanner {
return source_->pos() - kCharacterLookaheadBufferSize;
}
+ // Buffers collecting literal strings, numbers, etc.
+ LiteralBuffer literal_buffer1_;
+ LiteralBuffer literal_buffer2_;
+
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
// Input stream. Must be initialized to an UC16CharacterStream.
UC16CharacterStream* source_;
- // Buffer to hold literal values (identifiers, strings, numbers)
- // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
- LiteralCollector literal_buffer_;
// One Unicode character look-ahead; c0_ < 0 at the end of the input.
uc32 c0_;
@@ -367,28 +417,14 @@ class Scanner {
class JavaScriptScanner : public Scanner {
public:
-
- // Bit vector representing set of types of literals.
- enum LiteralType {
- kNoLiterals = 0,
- kLiteralNumber = 1,
- kLiteralIdentifier = 2,
- kLiteralString = 4,
- kLiteralRegExp = 8,
- kLiteralRegExpFlags = 16,
- kAllLiterals = 31
- };
-
// A LiteralScope that disables recording of some types of JavaScript
// literals. If the scanner is configured to not record the specific
// type of literal, the scope will not call StartLiteral.
class LiteralScope {
public:
- LiteralScope(JavaScriptScanner* self, LiteralType type)
+ explicit LiteralScope(JavaScriptScanner* self)
: scanner_(self), complete_(false) {
- if (scanner_->RecordsLiteral(type)) {
- scanner_->StartLiteral();
- }
+ scanner_->StartLiteral();
}
~LiteralScope() {
if (!complete_) scanner_->DropLiteral();
@@ -430,11 +466,6 @@ class JavaScriptScanner : public Scanner {
// tokens, which is what it is used for.
void SeekForward(int pos);
- // Whether this scanner records the given literal type or not.
- bool RecordsLiteral(LiteralType type) {
- return (literal_flags_ & type) != 0;
- }
-
protected:
bool SkipWhiteSpace();
Token::Value SkipSingleLineComment();
@@ -458,7 +489,6 @@ class JavaScriptScanner : public Scanner {
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
- int literal_flags_;
bool has_line_terminator_before_next_;
};
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698