Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Some fixes, and marching down the very long road to make all compilers happy. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #ifndef V8_PARSING_SCANNER_H_ 7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_
9 9
10 #include "src/allocation.h" 10 #include "src/allocation.h"
11 #include "src/base/logging.h" 11 #include "src/base/logging.h"
12 #include "src/char-predicates.h" 12 #include "src/char-predicates.h"
13 #include "src/globals.h" 13 #include "src/globals.h"
14 #include "src/messages.h" 14 #include "src/messages.h"
15 #include "src/parsing/token.h" 15 #include "src/parsing/token.h"
16 #include "src/unicode-decoder.h" 16 #include "src/unicode-decoder.h"
17 #include "src/unicode.h" 17 #include "src/unicode.h"
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 22
23 class AstRawString; 23 class AstRawString;
24 class AstValueFactory; 24 class AstValueFactory;
25 class DuplicateFinder; 25 class DuplicateFinder;
26 class ExternalOneByteString;
27 class ExternalTwoByteString;
26 class ParserRecorder; 28 class ParserRecorder;
27 class UnicodeCache; 29 class UnicodeCache;
28 30
29
30 // --------------------------------------------------------------------- 31 // ---------------------------------------------------------------------
31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
32 // A code unit is a 16 bit value representing either a 16 bit code point 33 // A code unit is a 16 bit value representing either a 16 bit code point
33 // or one part of a surrogate pair that make a single 21 bit code point. 34 // or one part of a surrogate pair that make a single 21 bit code point.
34
35 class Utf16CharacterStream { 35 class Utf16CharacterStream {
36 public: 36 public:
37 Utf16CharacterStream() : pos_(0) { } 37 static const uc32 kEndOfInput = -1;
38
38 virtual ~Utf16CharacterStream() { } 39 virtual ~Utf16CharacterStream() { }
39 40
40 // Returns and advances past the next UTF-16 code unit in the input 41 // Returns and advances past the next UTF-16 code unit in the input
41 // stream. If there are no more code units, it returns a negative 42 // stream. If there are no more code units it returns kEndOfInput.
42 // value.
43 inline uc32 Advance() { 43 inline uc32 Advance() {
44 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
45 pos_++;
46 return static_cast<uc32>(*(buffer_cursor_++)); 45 return static_cast<uc32>(*(buffer_cursor_++));
46 } else if (ReadBlock()) {
nickie 2016/09/07 13:28:28 I suppose you did not like how it was before: if (
vogelheim 2016/09/08 13:09:02 I liked it, but: V8_LIKELY supplies information ab
47 return static_cast<uc32>(*(buffer_cursor_++));
48 } else {
49 // Note: currently the following increment is necessary to avoid a
50 // parser problem! The scanner treats the final kEndOfInput as
51 // a code unit with a position, and does math relative to that
52 // position.
53 buffer_cursor_++;
54 return kEndOfInput;
47 } 55 }
48 // Note: currently the following increment is necessary to avoid a
49 // parser problem! The scanner treats the final kEndOfInput as
50 // a code unit with a position, and does math relative to that
51 // position.
52 pos_++;
53
54 return kEndOfInput;
55 } 56 }
56 57
57 // Return the current position in the code unit stream. 58 inline void Back() {
nickie 2016/09/07 13:28:28 A comment explaining what this should do would be
vogelheim 2016/09/08 13:09:03 Done.
58 // Starts at zero. 59 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
59 inline size_t pos() const { return pos_; } 60 buffer_cursor_--;
marja 2016/09/07 09:17:57 Nit: --buffer_cursor_;
vogelheim 2016/09/08 13:09:02 Why? [Here and elsewhere.] ------ I find the pre
60 61 } else {
61 // Skips forward past the next code_unit_count UTF-16 code units 62 ReadBlockAt(pos() - 1);
nickie 2016/09/07 13:28:28 We discussed this offline. This can make a series
vogelheim 2016/09/08 13:09:02 That's almost the same as Seek. :) The Scanner on
62 // in the input, or until the end of input if that comes sooner.
63 // Returns the number of code units actually skipped. If less
64 // than code_unit_count,
65 inline size_t SeekForward(size_t code_unit_count) {
66 size_t buffered_chars = buffer_end_ - buffer_cursor_;
67 if (code_unit_count <= buffered_chars) {
68 buffer_cursor_ += code_unit_count;
69 pos_ += code_unit_count;
70 return code_unit_count;
71 } 63 }
72 return SlowSeekForward(code_unit_count);
73 } 64 }
74 65
75 // Pushes back the most recently read UTF-16 code unit (or negative 66 inline size_t pos() const {
76 // value if at end of input), i.e., the value returned by the most recent 67 return buffer_pos_ + (buffer_cursor_ - buffer_start_);
nickie 2016/09/07 13:28:28 This calculation is quite expensive and the pos()
vogelheim 2016/09/08 13:09:02 Hmm. I think I should try it both ways. (My thin
77 // call to Advance. 68 }
78 // Must not be used right after calling SeekForward.
79 virtual void PushBack(int32_t code_unit) = 0;
80 69
81 virtual bool SetBookmark(); 70 inline void Seek(size_t pos) {
82 virtual void ResetToBookmark(); 71 if (V8_LIKELY(pos >= buffer_pos_ &&
72 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
73 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
74 } else {
75 ReadBlockAt(pos);
76 }
77 }
78
79 // Legacy API:
80 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }
81 void PushBack(int32_t code_unit) {
82 Back();
83 #ifdef DEBUG
84 uc32 t = Advance();
85 DCHECK_EQ(t, code_unit);
86 Back();
nickie 2016/09/07 13:28:28 Why not simply this? DCHECK_EQ(code_unit, static_c
87 #endif // DEBUG
88 }
89 bool SetBookmark() {
nickie 2016/09/07 13:28:28 I don't understand why the bookmark should be stor
vogelheim 2016/09/08 13:09:03 You're exactly right: The bookmark doesn't belong
90 bookmark_ = pos();
91 return true;
92 }
93 void ResetToBookmark() {
94 DCHECK(bookmark_ != (size_t)-1);
nickie 2016/09/07 13:28:28 How about something like this? (with a better nam
vogelheim 2016/09/08 13:09:03 Done.
95 Seek(bookmark_);
96 }
83 97
84 protected: 98 protected:
85 static const uc32 kEndOfInput = -1; 99 Utf16CharacterStream(const uint16_t* buffer_start,
100 const uint16_t* buffer_cursor,
101 const uint16_t* buffer_end, size_t buffer_pos)
102 : buffer_start_(buffer_start),
103 buffer_cursor_(buffer_cursor),
104 buffer_end_(buffer_end),
105 buffer_pos_(buffer_pos),
106 bookmark_((size_t)-1) {}
nickie 2016/09/07 13:28:28 Again, NO_BOOKMARK here.
vogelheim 2016/09/08 13:09:03 Done.
107 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
108
109 void ReadBlockAt(size_t new_pos) {
110 // This shouldn't ever be called if new_pos is inside the current buffer.
111 DCHECK(new_pos < buffer_pos_ ||
112 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
113 buffer_pos_ = new_pos;
114 buffer_cursor_ = buffer_start_;
nickie 2016/09/07 13:28:28 I don't understand this. Maybe related to my next
vogelheim 2016/09/08 13:09:02 See below. This wants to set the position for Rea
115 ReadBlock();
116 }
86 117
87 // Ensures that the buffer_cursor_ points to the code_unit at 118 // Ensures that the buffer_cursor_ points to the code_unit at
88 // position pos_ of the input, if possible. If the position 119 // position pos() of the input. Returns true if data is available; false if
nickie 2016/09/07 13:28:29 How can this not be true? pos() is calculated in s
vogelheim 2016/09/08 13:09:02 It's meant to explain the post-condition for ReadB
89 // is at or after the end of the input, return false. If there 120 // pos() is at (or after) the end of input
90 // are more code_units available, return true.
91 virtual bool ReadBlock() = 0; 121 virtual bool ReadBlock() = 0;
92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
93 122
123 const uint16_t* buffer_start_;
94 const uint16_t* buffer_cursor_; 124 const uint16_t* buffer_cursor_;
95 const uint16_t* buffer_end_; 125 const uint16_t* buffer_end_;
96 size_t pos_; 126 size_t buffer_pos_;
127 size_t bookmark_;
97 }; 128 };
98 129
99 130
100 // ---------------------------------------------------------------------------- 131 // ----------------------------------------------------------------------------
101 // JavaScript Scanner. 132 // JavaScript Scanner.
102 133
103 class Scanner { 134 class Scanner {
104 public: 135 public:
105 // Scoped helper for a re-settable bookmark. 136 // Scoped helper for a re-settable bookmark.
106 class BookmarkScope { 137 class BookmarkScope {
(...skipping 24 matching lines...) Expand all
131 } 162 }
132 163
133 static Location invalid() { return Location(-1, -1); } 164 static Location invalid() { return Location(-1, -1); }
134 165
135 int beg_pos; 166 int beg_pos;
136 int end_pos; 167 int end_pos;
137 }; 168 };
138 169
139 // -1 is outside of the range of any real source code. 170 // -1 is outside of the range of any real source code.
140 static const int kNoOctalLocation = -1; 171 static const int kNoOctalLocation = -1;
172 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
141 173
142 explicit Scanner(UnicodeCache* scanner_contants); 174 explicit Scanner(UnicodeCache* scanner_contants);
143 175
144 void Initialize(Utf16CharacterStream* source); 176 void Initialize(Utf16CharacterStream* source);
145 177
146 // Returns the next token and advances input. 178 // Returns the next token and advances input.
147 Token::Value Next(); 179 Token::Value Next();
148 // Returns the token following peek() 180 // Returns the token following peek()
149 Token::Value PeekAhead(); 181 Token::Value PeekAhead();
150 // Returns the current token again. 182 // Returns the current token again.
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 bool found_html_comment_; 824 bool found_html_comment_;
793 825
794 MessageTemplate::Template scanner_error_; 826 MessageTemplate::Template scanner_error_;
795 Location scanner_error_location_; 827 Location scanner_error_location_;
796 }; 828 };
797 829
798 } // namespace internal 830 } // namespace internal
799 } // namespace v8 831 } // namespace v8
800 832
801 #endif // V8_PARSING_SCANNER_H_ 833 #endif // V8_PARSING_SCANNER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698