Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Unified Diff: src/scanner.cc

Issue 113336: Optimize the lexical scanner by selective inlining, and... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/scanner.cc
===================================================================
--- src/scanner.cc (revision 1926)
+++ src/scanner.cc (working copy)
@@ -48,8 +48,12 @@
// ----------------------------------------------------------------------------
// UTF8Buffer
-UTF8Buffer::UTF8Buffer() : data_(NULL) {
- Initialize(NULL, 0);
+UTF8Buffer::UTF8Buffer() {
+ static const int kInitialCapacity = 1 * KB;
+ data_ = NewArray<char>(kInitialCapacity);
+ limit_ = ComputeLimit(data_, kInitialCapacity);
+ Reset();
+ ASSERT(Capacity() == kInitialCapacity && pos() == 0);
}
@@ -58,33 +62,27 @@
}
-void UTF8Buffer::Initialize(char* src, int length) {
- DeleteArray(data_);
- data_ = src;
- size_ = length;
- Reset();
-}
-
-
-void UTF8Buffer::AddChar(uc32 c) {
- const int min_size = 1024;
- if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) {
- int new_size = size_ * 2;
- if (new_size < min_size) {
- new_size = min_size;
- }
- char* new_data = NewArray<char>(new_size);
- memcpy(new_data, data_, pos_);
+void UTF8Buffer::AddCharSlow(uc32 c) {
+ static const int kCapacityGrowthLimit = 1 * MB;
+ if (cursor_ > limit_) {
+ int old_capacity = Capacity();
+ int old_position = pos();
+ int new_capacity =
+ Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit);
+ char* new_data = NewArray<char>(new_capacity);
+ memcpy(new_data, data_, old_position);
DeleteArray(data_);
data_ = new_data;
- size_ = new_size;
+ cursor_ = new_data + old_position;
+ limit_ = ComputeLimit(new_data, new_capacity);
+ ASSERT(Capacity() == new_capacity && pos() == old_position);
}
- if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) {
- data_[pos_++] = c; // common case: 7bit ASCII
+ if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
+ *cursor_++ = c; // Common case: 7-bit ASCII.
} else {
- pos_ += unibrow::Utf8::Encode(&data_[pos_], c);
+ cursor_ += unibrow::Utf8::Encode(cursor_, c);
}
- ASSERT(pos_ <= size_);
+ ASSERT(pos() <= Capacity());
}
@@ -172,9 +170,10 @@
ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
- // Skip initial whitespace (allowing HTML comment ends) and scan
- // first token.
- SkipWhiteSpace(true);
+ // Skip initial whitespace allowing HTML comment ends just like
+ // after a newline and scan first token.
+ has_line_terminator_before_next_ = true;
+ SkipWhiteSpace();
Scan();
}
@@ -246,18 +245,19 @@
}
-void Scanner::SkipWhiteSpace(bool initial) {
- has_line_terminator_before_next_ = initial;
+bool Scanner::SkipWhiteSpace() {
+ int start_position = source_pos();
while (true) {
// We treat byte-order marks (BOMs) as whitespace for better
// compatibility with Spidermonkey and other JavaScript engines.
while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
// IsWhiteSpace() includes line terminators!
- if (kIsLineTerminator.get(c0_))
+ if (kIsLineTerminator.get(c0_)) {
// Ignore line terminators, but remember them. This is necessary
// for automatic semicolon insertion.
has_line_terminator_before_next_ = true;
+ }
Advance();
}
@@ -279,7 +279,8 @@
}
PushBack('-'); // undo Advance()
}
- return;
+ // Return whether or not we skipped any characters.
+ return source_pos() != start_position;
}
}
@@ -296,7 +297,7 @@
Advance();
}
- return Token::COMMENT;
+ return Token::WHITESPACE;
}
@@ -316,7 +317,7 @@
// matches the behaviour of SpiderMonkey and KJS.
if (ch == '*' && c0_ == '/') {
c0_ = ' ';
- return Token::COMMENT;
+ return Token::WHITESPACE;
}
}
@@ -342,19 +343,239 @@
void Scanner::Scan() {
Token::Value token;
- bool has_line_terminator = false;
+ has_line_terminator_before_next_ = false;
do {
- SkipWhiteSpace(has_line_terminator);
-
- // Remember the line terminator in previous loop
- has_line_terminator = has_line_terminator_before_next();
-
// Remember the position of the next token
next_.location.beg_pos = source_pos();
- token = ScanToken();
- } while (token == Token::COMMENT);
+ switch (c0_) {
+ case ' ':
+ case '\t':
+ Advance();
+ token = Token::WHITESPACE;
+ break;
+ case '\n':
+ Advance();
+ has_line_terminator_before_next_ = true;
+ token = Token::WHITESPACE;
+ break;
+
+ case '"': case '\'':
+ token = ScanString();
+ break;
+
+ case '<':
+ // < <= << <<= <!--
+ Advance();
+ if (c0_ == '=') {
+ token = Select(Token::LTE);
+ } else if (c0_ == '<') {
+ token = Select('=', Token::ASSIGN_SHL, Token::SHL);
+ } else if (c0_ == '!') {
+ token = ScanHtmlComment();
+ } else {
+ token = Token::LT;
+ }
+ break;
+
+ case '>':
+ // > >= >> >>= >>> >>>=
+ Advance();
+ if (c0_ == '=') {
+ token = Select(Token::GTE);
+ } else if (c0_ == '>') {
+ // >> >>= >>> >>>=
+ Advance();
+ if (c0_ == '=') {
+ token = Select(Token::ASSIGN_SAR);
+ } else if (c0_ == '>') {
+ token = Select('=', Token::ASSIGN_SHR, Token::SHR);
+ } else {
+ token = Token::SAR;
+ }
+ } else {
+ token = Token::GT;
+ }
+ break;
+
+ case '=':
+ // = == ===
+ Advance();
+ if (c0_ == '=') {
+ token = Select('=', Token::EQ_STRICT, Token::EQ);
+ } else {
+ token = Token::ASSIGN;
+ }
+ break;
+
+ case '!':
+ // ! != !==
+ Advance();
+ if (c0_ == '=') {
+ token = Select('=', Token::NE_STRICT, Token::NE);
+ } else {
+ token = Token::NOT;
+ }
+ break;
+
+ case '+':
+ // + ++ +=
+ Advance();
+ if (c0_ == '+') {
+ token = Select(Token::INC);
+ } else if (c0_ == '=') {
+ token = Select(Token::ASSIGN_ADD);
+ } else {
+ token = Token::ADD;
+ }
+ break;
+
+ case '-':
+ // - -- --> -=
+ Advance();
+ if (c0_ == '-') {
+ Advance();
+ if (c0_ == '>' && has_line_terminator_before_next_) {
+ // For compatibility with SpiderMonkey, we skip lines that
+ // start with an HTML comment end '-->'.
+ token = SkipSingleLineComment();
+ } else {
+ token = Token::DEC;
+ }
+ } else if (c0_ == '=') {
+ token = Select(Token::ASSIGN_SUB);
+ } else {
+ token = Token::SUB;
+ }
+ break;
+
+ case '*':
+ // * *=
+ token = Select('=', Token::ASSIGN_MUL, Token::MUL);
+ break;
+
+ case '%':
+ // % %=
+ token = Select('=', Token::ASSIGN_MOD, Token::MOD);
+ break;
+
+ case '/':
+ // / // /* /=
+ Advance();
+ if (c0_ == '/') {
+ token = SkipSingleLineComment();
+ } else if (c0_ == '*') {
+ token = SkipMultiLineComment();
+ } else if (c0_ == '=') {
+ token = Select(Token::ASSIGN_DIV);
+ } else {
+ token = Token::DIV;
+ }
+ break;
+
+ case '&':
+ // & && &=
+ Advance();
+ if (c0_ == '&') {
+ token = Select(Token::AND);
+ } else if (c0_ == '=') {
+ token = Select(Token::ASSIGN_BIT_AND);
+ } else {
+ token = Token::BIT_AND;
+ }
+ break;
+
+ case '|':
+ // | || |=
+ Advance();
+ if (c0_ == '|') {
+ token = Select(Token::OR);
+ } else if (c0_ == '=') {
+ token = Select(Token::ASSIGN_BIT_OR);
+ } else {
+ token = Token::BIT_OR;
+ }
+ break;
+
+ case '^':
+ // ^ ^=
+ token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
+ break;
+
+ case '.':
+ // . Number
+ Advance();
+ if (IsDecimalDigit(c0_)) {
+ token = ScanNumber(true);
+ } else {
+ token = Token::PERIOD;
+ }
+ break;
+
+ case ':':
+ token = Select(Token::COLON);
+ break;
+
+ case ';':
+ token = Select(Token::SEMICOLON);
+ break;
+
+ case ',':
+ token = Select(Token::COMMA);
+ break;
+
+ case '(':
+ token = Select(Token::LPAREN);
+ break;
+
+ case ')':
+ token = Select(Token::RPAREN);
+ break;
+
+ case '[':
+ token = Select(Token::LBRACK);
+ break;
+
+ case ']':
+ token = Select(Token::RBRACK);
+ break;
+
+ case '{':
+ token = Select(Token::LBRACE);
+ break;
+
+ case '}':
+ token = Select(Token::RBRACE);
+ break;
+
+ case '?':
+ token = Select(Token::CONDITIONAL);
+ break;
+
+ case '~':
+ token = Select(Token::BIT_NOT);
+ break;
+
+ default:
+ if (kIsIdentifierStart.get(c0_)) {
+ token = ScanIdentifier();
+ } else if (IsDecimalDigit(c0_)) {
+ token = ScanNumber(false);
+ } else if (SkipWhiteSpace()) {
+ token = Token::WHITESPACE;
+ } else if (c0_ < 0) {
+ token = Token::EOS;
+ } else {
+ token = Select(Token::ILLEGAL);
+ }
+ break;
+ }
+
+ // Continue scanning for tokens as long as we're just skipping
+ // whitespace.
+ } while (token == Token::WHITESPACE);
+
next_.location.end_pos = source_pos();
next_.token = token;
}
@@ -495,147 +716,6 @@
}
-Token::Value Scanner::ScanToken() {
- switch (c0_) {
- // strings
- case '"': case '\'':
- return ScanString();
-
- case '<':
- // < <= << <<= <!--
- Advance();
- if (c0_ == '=') return Select(Token::LTE);
- if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL);
- if (c0_ == '!') return ScanHtmlComment();
- return Token::LT;
-
- case '>':
- // > >= >> >>= >>> >>>=
- Advance();
- if (c0_ == '=') return Select(Token::GTE);
- if (c0_ == '>') {
- // >> >>= >>> >>>=
- Advance();
- if (c0_ == '=') return Select(Token::ASSIGN_SAR);
- if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR);
- return Token::SAR;
- }
- return Token::GT;
-
- case '=':
- // = == ===
- Advance();
- if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ);
- return Token::ASSIGN;
-
- case '!':
- // ! != !==
- Advance();
- if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE);
- return Token::NOT;
-
- case '+':
- // + ++ +=
- Advance();
- if (c0_ == '+') return Select(Token::INC);
- if (c0_ == '=') return Select(Token::ASSIGN_ADD);
- return Token::ADD;
-
- case '-':
- // - -- -=
- Advance();
- if (c0_ == '-') return Select(Token::DEC);
- if (c0_ == '=') return Select(Token::ASSIGN_SUB);
- return Token::SUB;
-
- case '*':
- // * *=
- return Select('=', Token::ASSIGN_MUL, Token::MUL);
-
- case '%':
- // % %=
- return Select('=', Token::ASSIGN_MOD, Token::MOD);
-
- case '/':
- // / // /* /=
- Advance();
- if (c0_ == '/') return SkipSingleLineComment();
- if (c0_ == '*') return SkipMultiLineComment();
- if (c0_ == '=') return Select(Token::ASSIGN_DIV);
- return Token::DIV;
-
- case '&':
- // & && &=
- Advance();
- if (c0_ == '&') return Select(Token::AND);
- if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND);
- return Token::BIT_AND;
-
- case '|':
- // | || |=
- Advance();
- if (c0_ == '|') return Select(Token::OR);
- if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR);
- return Token::BIT_OR;
-
- case '^':
- // ^ ^=
- return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
-
- case '.':
- // . Number
- Advance();
- if (IsDecimalDigit(c0_)) return ScanNumber(true);
- return Token::PERIOD;
-
- case ':':
- return Select(Token::COLON);
-
- case ';':
- return Select(Token::SEMICOLON);
-
- case ',':
- return Select(Token::COMMA);
-
- case '(':
- return Select(Token::LPAREN);
-
- case ')':
- return Select(Token::RPAREN);
-
- case '[':
- return Select(Token::LBRACK);
-
- case ']':
- return Select(Token::RBRACK);
-
- case '{':
- return Select(Token::LBRACE);
-
- case '}':
- return Select(Token::RBRACE);
-
- case '?':
- return Select(Token::CONDITIONAL);
-
- case '~':
- return Select(Token::BIT_NOT);
-
- default:
- if (kIsIdentifierStart.get(c0_))
- return ScanIdentifier();
- if (IsDecimalDigit(c0_))
- return ScanNumber(false);
- if (c0_ < 0)
- return Token::EOS;
- return Select(Token::ILLEGAL);
- }
-
- UNREACHABLE();
- return Token::ILLEGAL;
-}
-
-
// Returns true if any decimal digits were scanned, returns false otherwise.
void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_))
@@ -734,7 +814,6 @@
Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_));
-
bool has_escapes = false;
StartLiteral();
@@ -746,8 +825,10 @@
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
AddChar(c);
} else {
- AddCharAdvance();
+ AddChar(c0_);
+ Advance();
}
+
// Scan the rest of the identifier characters.
while (kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
@@ -757,19 +838,22 @@
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
AddChar(c);
} else {
- AddCharAdvance();
+ AddChar(c0_);
+ Advance();
}
}
TerminateLiteral();
// We don't have any 1-letter keywords (this is probably a common case).
- if ((next_.literal_end - next_.literal_pos) == 1)
+ if ((next_.literal_end - next_.literal_pos) == 1) {
return Token::IDENTIFIER;
+ }
// If the identifier contains unicode escapes, it must not be
// resolved to a keyword.
- if (has_escapes)
+ if (has_escapes) {
return Token::IDENTIFIER;
+ }
return Token::Lookup(&literals_.data()[next_.literal_pos]);
}
« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698