Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(505)

Unified Diff: src/scanner.h

Issue 5545006: Optimized scanner to avoid virtual calls for every character read. (Closed)
Patch Set: Addressed review comments. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/preparser-api.cc ('k') | src/scanner.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/scanner.h
diff --git a/src/scanner.h b/src/scanner.h
index adeea9b23a00d50d7f07a9203dcc311fd50cebe4..572778f8ac4688cdc43832a7b2423d378e97cbe0 100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -35,67 +35,97 @@
namespace v8 {
namespace internal {
-// UTF16 buffer to read characters from a character stream.
-class CharacterStreamUTF16Buffer: public UTF16Buffer {
+// A buffered character stream based on a random access character
+// source (ReadBlock can be called with pos_ pointing to any position,
+// even positions before the current).
+class BufferedUC16CharacterStream: public UC16CharacterStream {
public:
- CharacterStreamUTF16Buffer();
- virtual ~CharacterStreamUTF16Buffer() {}
- void Initialize(Handle<String> data,
- unibrow::CharacterStream* stream,
- int start_position,
- int end_position);
- virtual void PushBack(uc32 ch);
- virtual uc32 Advance();
- virtual void SeekForward(int pos);
-
- private:
- List<uc32> pushback_buffer_;
- uc32 last_;
- unibrow::CharacterStream* stream_;
-
- List<uc32>* pushback_buffer() { return &pushback_buffer_; }
+ BufferedUC16CharacterStream();
+ virtual ~BufferedUC16CharacterStream();
+
+ virtual void PushBack(uc16 character);
+
+ protected:
+ static const unsigned kBufferSize = 512;
+ static const unsigned kPushBackStepSize = 16;
+
+ virtual unsigned SlowSeekForward(unsigned delta);
+ virtual bool ReadBlock();
+ virtual void SlowPushBack(uc16 character);
+
+ virtual unsigned BufferSeekForward(unsigned delta) = 0;
+ virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
+
+ const uc16* pushback_limit_;
+ uc16 buffer_[kBufferSize];
};
-// UTF16 buffer to read characters from an external string.
-template <typename StringType, typename CharType>
-class ExternalStringUTF16Buffer: public UTF16Buffer {
+// Generic string stream.
+class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
public:
- ExternalStringUTF16Buffer();
- virtual ~ExternalStringUTF16Buffer() {}
- void Initialize(Handle<StringType> data,
- int start_position,
- int end_position);
- virtual void PushBack(uc32 ch);
- virtual uc32 Advance();
- virtual void SeekForward(int pos);
-
- private:
- const CharType* raw_data_; // Pointer to the actual array of characters.
+ GenericStringUC16CharacterStream(Handle<String> data,
+ unsigned start_position,
+ unsigned end_position);
+ virtual ~GenericStringUC16CharacterStream();
+
+ protected:
+ virtual unsigned BufferSeekForward(unsigned delta);
+ virtual unsigned FillBuffer(unsigned position, unsigned length);
+
+ Handle<String> string_;
+ unsigned start_position_;
+ unsigned length_;
};
-// Initializes a UTF16Buffer as input stream, using one of a number
-// of strategies depending on the available character sources.
-class StreamInitializer {
+// UC16 stream based on a literal UTF-8 string.
+class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
public:
- UTF16Buffer* Init(Handle<String> source,
- unibrow::CharacterStream* stream,
- int start_position,
- int end_position);
- private:
- // Different UTF16 buffers used to pull characters from. Based on input one of
- // these will be initialized as the actual data source.
- CharacterStreamUTF16Buffer char_stream_buffer_;
- ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
- two_byte_string_buffer_;
- ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
-
- // Used to convert the source string into a character stream when a stream
- // is not passed to the scanner.
- SafeStringInputBuffer safe_string_input_buffer_;
+ Utf8ToUC16CharacterStream(const byte* data, unsigned length);
+ virtual ~Utf8ToUC16CharacterStream();
+
+ protected:
+ virtual unsigned BufferSeekForward(unsigned delta);
+ virtual unsigned FillBuffer(unsigned char_position, unsigned length);
+ void SetRawPosition(unsigned char_position);
+
+ const byte* raw_data_;
+ unsigned raw_data_length_; // Measured in bytes, not characters.
+ unsigned raw_data_pos_;
+ // The character position of the character at raw_data[raw_data_pos_].
+ // Not necessarily the same as pos_.
+ unsigned raw_character_position_;
+};
+
+
+// UTF16 buffer to read characters from an external string.
+class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
+ public:
+ ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
+ int start_position,
+ int end_position);
+ virtual ~ExternalTwoByteStringUC16CharacterStream();
+
+ virtual void PushBack(uc16 character) {
+ ASSERT(buffer_cursor_ > raw_data_);
+ buffer_cursor_--;
+ pos_--;
+ }
+ protected:
+ virtual unsigned SlowSeekForward(unsigned delta) {
+ // Fast case always handles seeking.
+ return 0;
+ }
+ virtual bool ReadBlock() {
+ // Entire string is read at start.
+ return false;
+ }
+ Handle<ExternalTwoByteString> source_;
+ const uc16* raw_data_; // Pointer to the actual array of characters.
};
+
// ----------------------------------------------------------------------------
// V8JavaScriptScanner
// JavaScript scanner getting its input from either a V8 String or a unicode
@@ -103,19 +133,9 @@ class StreamInitializer {
class V8JavaScriptScanner : public JavaScriptScanner {
public:
- V8JavaScriptScanner() {}
-
- // Initialize the Scanner to scan source.
- void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
- void Initialize(Handle<String> source,
- unibrow::CharacterStream* stream,
- int literal_flags = kAllLiterals);
- void Initialize(Handle<String> source,
- int start_position, int end_position,
+ V8JavaScriptScanner();
+ void Initialize(UC16CharacterStream* source,
int literal_flags = kAllLiterals);
-
- protected:
- StreamInitializer stream_initializer_;
};
@@ -123,8 +143,7 @@ class JsonScanner : public Scanner {
public:
JsonScanner();
- // Initialize the Scanner to scan source.
- void Initialize(Handle<String> source);
+ void Initialize(UC16CharacterStream* source);
// Returns the next token.
Token::Value Next();
@@ -138,7 +157,7 @@ class JsonScanner : public Scanner {
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
- // carrige-return, newline and space.
+ // carriage-return, newline and space.
void ScanJson();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
@@ -159,60 +178,8 @@ class JsonScanner : public Scanner {
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
-
- StreamInitializer stream_initializer_;
};
-
-// ExternalStringUTF16Buffer
-template <typename StringType, typename CharType>
-ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
- : raw_data_(NULL) { }
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
- Handle<StringType> data,
- int start_position,
- int end_position) {
- ASSERT(!data.is_null());
- raw_data_ = data->resource()->data();
-
- ASSERT(end_position <= data->length());
- if (start_position > 0) {
- SeekForward(start_position);
- }
- end_ =
- end_position != kNoEndPosition ? end_position : data->length();
-}
-
-
-template <typename StringType, typename CharType>
-uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
- if (pos_ < end_) {
- return raw_data_[pos_++];
- } else {
- // note: currently the following increment is necessary to avoid a
- // test-parser problem!
- pos_++;
- return static_cast<uc32>(-1);
- }
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
- pos_--;
- ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
- ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
- pos_ = pos;
-}
-
} } // namespace v8::internal
#endif // V8_SCANNER_H_
« no previous file with comments | « src/preparser-api.cc ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698