Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(454)

Unified Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Some fixes, and marching down the very long road to make all compilers happy. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/parsing/scanner.h
diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h
index cb37f7cc1a5dd7025579d03902a789c6c1e530bb..54f819da4dd2eae4d8703511060c1f63a3349b9a 100644
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@@ -23,77 +23,108 @@ namespace internal {
class AstRawString;
class AstValueFactory;
class DuplicateFinder;
+class ExternalOneByteString;
+class ExternalTwoByteString;
class ParserRecorder;
class UnicodeCache;
-
// ---------------------------------------------------------------------
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
// A code unit is a 16 bit value representing either a 16 bit code point
// or one part of a surrogate pair that make a single 21 bit code point.
-
class Utf16CharacterStream {
public:
- Utf16CharacterStream() : pos_(0) { }
+ static const uc32 kEndOfInput = -1;
+
virtual ~Utf16CharacterStream() { }
// Returns and advances past the next UTF-16 code unit in the input
- // stream. If there are no more code units, it returns a negative
- // value.
+ // stream. If there are no more code units it returns kEndOfInput.
inline uc32 Advance() {
- if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
- pos_++;
+ if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
+ return static_cast<uc32>(*(buffer_cursor_++));
+ } else if (ReadBlock()) {
nickie 2016/09/07 13:28:28 I suppose you did not like how it was before: if (
vogelheim 2016/09/08 13:09:02 I liked it, but: V8_LIKELY supplies information ab
return static_cast<uc32>(*(buffer_cursor_++));
+ } else {
+ // Note: currently the following increment is necessary to avoid a
+ // parser problem! The scanner treats the final kEndOfInput as
+ // a code unit with a position, and does math relative to that
+ // position.
+ buffer_cursor_++;
+ return kEndOfInput;
}
- // Note: currently the following increment is necessary to avoid a
- // parser problem! The scanner treats the final kEndOfInput as
- // a code unit with a position, and does math relative to that
- // position.
- pos_++;
-
- return kEndOfInput;
- }
-
- // Return the current position in the code unit stream.
- // Starts at zero.
- inline size_t pos() const { return pos_; }
-
- // Skips forward past the next code_unit_count UTF-16 code units
- // in the input, or until the end of input if that comes sooner.
- // Returns the number of code units actually skipped. If less
- // than code_unit_count,
- inline size_t SeekForward(size_t code_unit_count) {
- size_t buffered_chars = buffer_end_ - buffer_cursor_;
- if (code_unit_count <= buffered_chars) {
- buffer_cursor_ += code_unit_count;
- pos_ += code_unit_count;
- return code_unit_count;
+ }
+
+ inline void Back() {
nickie 2016/09/07 13:28:28 A comment explaining what this should do would be
vogelheim 2016/09/08 13:09:03 Done.
+ if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
+ buffer_cursor_--;
marja 2016/09/07 09:17:57 Nit: --buffer_cursor_;
vogelheim 2016/09/08 13:09:02 Why? [Here and elsewhere.] ------ I find the pre
+ } else {
+ ReadBlockAt(pos() - 1);
nickie 2016/09/07 13:28:28 We discussed this offline. This can make a series
vogelheim 2016/09/08 13:09:02 That's almost the same as Seek. :) The Scanner on
}
- return SlowSeekForward(code_unit_count);
}
- // Pushes back the most recently read UTF-16 code unit (or negative
- // value if at end of input), i.e., the value returned by the most recent
- // call to Advance.
- // Must not be used right after calling SeekForward.
- virtual void PushBack(int32_t code_unit) = 0;
+ inline size_t pos() const {
+ return buffer_pos_ + (buffer_cursor_ - buffer_start_);
nickie 2016/09/07 13:28:28 This calculation is quite expensive and the pos()
vogelheim 2016/09/08 13:09:02 Hmm. I think I should try it both ways. (My thin
+ }
+
+ inline void Seek(size_t pos) {
+ if (V8_LIKELY(pos >= buffer_pos_ &&
+ pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
+ buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
+ } else {
+ ReadBlockAt(pos);
+ }
+ }
- virtual bool SetBookmark();
- virtual void ResetToBookmark();
+ // Legacy API:
+ void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
+ void PushBack(int32_t code_unit) {
+ Back();
+#ifdef DEBUG
+ uc32 t = Advance();
+ DCHECK_EQ(t, code_unit);
+ Back();
nickie 2016/09/07 13:28:28 Why not simply this? DCHECK_EQ(code_unit, static_c
+#endif // DEBUG
+ }
+ bool SetBookmark() {
nickie 2016/09/07 13:28:28 I don't understand why the bookmark should be stor
vogelheim 2016/09/08 13:09:03 You're exactly right: The bookmark doesn't belong
+ bookmark_ = pos();
+ return true;
+ }
+ void ResetToBookmark() {
+ DCHECK(bookmark_ != (size_t)-1);
nickie 2016/09/07 13:28:28 How about something like this? (with a better nam
vogelheim 2016/09/08 13:09:03 Done.
+ Seek(bookmark_);
+ }
protected:
- static const uc32 kEndOfInput = -1;
+ Utf16CharacterStream(const uint16_t* buffer_start,
+ const uint16_t* buffer_cursor,
+ const uint16_t* buffer_end, size_t buffer_pos)
+ : buffer_start_(buffer_start),
+ buffer_cursor_(buffer_cursor),
+ buffer_end_(buffer_end),
+ buffer_pos_(buffer_pos),
+ bookmark_((size_t)-1) {}
nickie 2016/09/07 13:28:28 Again, NO_BOOKMARK here.
vogelheim 2016/09/08 13:09:03 Done.
+ Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
+
+ void ReadBlockAt(size_t new_pos) {
+ // This shouldn't ever be called if new_pos is inside the current buffer.
+ DCHECK(new_pos < buffer_pos_ ||
+ new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
+ buffer_pos_ = new_pos;
+ buffer_cursor_ = buffer_start_;
nickie 2016/09/07 13:28:28 I don't understand this. Maybe related to my next
vogelheim 2016/09/08 13:09:02 See below. This wants to set the position for Rea
+ ReadBlock();
+ }
// Ensures that the buffer_cursor_ points to the code_unit at
- // position pos_ of the input, if possible. If the position
- // is at or after the end of the input, return false. If there
- // are more code_units available, return true.
+ // position pos() of the input. Returns true if data is available; false if
nickie 2016/09/07 13:28:29 How can this not be true? pos() is calculated in s
vogelheim 2016/09/08 13:09:02 It's meant to explain the post-condition for ReadB
+ // pos() is at (or after) the end of input
virtual bool ReadBlock() = 0;
- virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
+ const uint16_t* buffer_start_;
const uint16_t* buffer_cursor_;
const uint16_t* buffer_end_;
- size_t pos_;
+ size_t buffer_pos_;
+ size_t bookmark_;
};
@@ -138,6 +169,7 @@ class Scanner {
// -1 is outside of the range of any real source code.
static const int kNoOctalLocation = -1;
+ static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
explicit Scanner(UnicodeCache* scanner_contants);

Powered by Google App Engine
This is Rietveld 408576698