src/parsing/scanner.h - Issue 2314663002: Rework scanner-character-streams.

Side by Side Diff: src/parsing/scanner.h

Issue 2314663002: Rework scanner-character-streams. (Closed)

Patch Set: Niko's feedback and fix compile even harder Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #ifndef V8_PARSING_SCANNER_H_	7 #ifndef V8_PARSING_SCANNER_H_

8 #define V8_PARSING_SCANNER_H_	8 #define V8_PARSING_SCANNER_H_

9	9

10 #include "src/allocation.h"	10 #include "src/allocation.h"

11 #include "src/base/logging.h"	11 #include "src/base/logging.h"

12 #include "src/char-predicates.h"	12 #include "src/char-predicates.h"

13 #include "src/globals.h"	13 #include "src/globals.h"

14 #include "src/messages.h"	14 #include "src/messages.h"

15 #include "src/parsing/token.h"	15 #include "src/parsing/token.h"

16 #include "src/unicode-decoder.h"	16 #include "src/unicode-decoder.h"

17 #include "src/unicode.h"	17 #include "src/unicode.h"

18	18

19 namespace v8 {	19 namespace v8 {

20 namespace internal {	20 namespace internal {

21	21

22	22

23 class AstRawString;	23 class AstRawString;

24 class AstValueFactory;	24 class AstValueFactory;

25 class DuplicateFinder;	25 class DuplicateFinder;

	26 class ExternalOneByteString;

	27 class ExternalTwoByteString;

26 class ParserRecorder;	28 class ParserRecorder;

27 class UnicodeCache;	29 class UnicodeCache;

28	30

29

30 // ---------------------------------------------------------------------	31 // ---------------------------------------------------------------------

31 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.	32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.

32 // A code unit is a 16 bit value representing either a 16 bit code point	33 // A code unit is a 16 bit value representing either a 16 bit code point

33 // or one part of a surrogate pair that make a single 21 bit code point.	34 // or one part of a surrogate pair that make a single 21 bit code point.

34

35 class Utf16CharacterStream {	35 class Utf16CharacterStream {

36 public:	36 public:

37 Utf16CharacterStream() : pos_(0) { }	37 static const uc32 kEndOfInput = -1;

	38

38 virtual ~Utf16CharacterStream() { }	39 virtual ~Utf16CharacterStream() { }

39	40

40 // Returns and advances past the next UTF-16 code unit in the input	41 // Returns and advances past the next UTF-16 code unit in the input

41 // stream. If there are no more code units, it returns a negative	42 // stream. If there are no more code units it returns kEndOfInput.

42 // value.

43 inline uc32 Advance() {	43 inline uc32 Advance() {

44 if (buffer_cursor_ < buffer_end_ \|\| ReadBlock()) {	44 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {

45 pos_++;

46 return static_cast<uc32>(*(buffer_cursor_++));	45 return static_cast<uc32>(*(buffer_cursor_++));

	46 } else if (ReadBlock()) {

	47 return static_cast<uc32>(*(buffer_cursor_++));

	48 } else {

	49 // Note: currently the following increment is necessary to avoid a

	50 // parser problem! The scanner treats the final kEndOfInput as

	51 // a code unit with a position, and does math relative to that

	52 // position.

	53 buffer_cursor_++;

	54 return kEndOfInput;

47 }	55 }

48 // Note: currently the following increment is necessary to avoid a

49 // parser problem! The scanner treats the final kEndOfInput as

50 // a code unit with a position, and does math relative to that

51 // position.

52 pos_++;

53

54 return kEndOfInput;

55 }	56 }

56	57

57 // Return the current position in the code unit stream.	58 // Go back one by one character in the input stream.

58 // Starts at zero.	59 // This undoes the most recent Advance().

59 inline size_t pos() const { return pos_; }	60 inline void Back() {

60	61 // The common case - if the previous character is within

61 // Skips forward past the next code_unit_count UTF-16 code units	62 // buffer_start_ .. buffer_end_ will be handles locally.

62 // in the input, or until the end of input if that comes sooner.	63 // Otherwise, a new block is requested.

63 // Returns the number of code units actually skipped. If less	64 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {

64 // than code_unit_count,	65 buffer_cursor_--;

65 inline size_t SeekForward(size_t code_unit_count) {	66 } else {

66 size_t buffered_chars = buffer_end_ - buffer_cursor_;	67 ReadBlockAt(pos() - 1);

67 if (code_unit_count <= buffered_chars) {

68 buffer_cursor_ += code_unit_count;

69 pos_ += code_unit_count;

70 return code_unit_count;

71 }	68 }

72 return SlowSeekForward(code_unit_count);

73 }	69 }

74	70

75 // Pushes back the most recently read UTF-16 code unit (or negative	71 // Go back one by two characters in the input stream. (This is the same as

76 // value if at end of input), i.e., the value returned by the most recent	72 // calling Back() twice. But Back() may - in some instances - do substantial

77 // call to Advance.	73 // work. Back2() guarantees this work will be done only once.)

78 // Must not be used right after calling SeekForward.	74 inline void Back2() {

79 virtual void PushBack(int32_t code_unit) = 0;	75 if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {

	76 buffer_cursor_ -= 2;

	77 } else {

	78 ReadBlockAt(pos() - 2);

	79 }

	80 }

80	81

81 virtual bool SetBookmark();	82 inline size_t pos() const {

82 virtual void ResetToBookmark();	83 return buffer_pos_ + (buffer_cursor_ - buffer_start_);

	84 }

	85

	86 inline void Seek(size_t pos) {

	87 if (V8_LIKELY(pos >= buffer_pos_ &&

	88 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {

	89 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);

	90 } else {

	91 ReadBlockAt(pos);

	92 }

	93 }

	94

	95 // Legacy API:

	96 void SeekForward(size_t code_unit_count) { Seek(pos() + code_unit_count); }

	97 void PushBack(int32_t code_unit) {

	98 Back();

	99 #ifdef DEBUG

	100 uc32 t = Advance();

	101 DCHECK_EQ(t, code_unit);

	102 Back();

	103 #endif // DEBUG

	104 }

	105 void PushBack2(int32_t code_unit_back_1, int32_t code_unit_back_2) {

	106 Back2();

	107 #ifdef DEBUG

	108 DCHECK_EQ(Advance(), code_unit_back_2);

	109 DCHECK_EQ(Advance(), code_unit_back_1);

	110 Back2();

	111 #endif // DEBUG

	112 }

	113 bool SetBookmark() {

	114 bookmark_ = pos();

	115 return true;

	116 }

	117 void ResetToBookmark() {

	118 DCHECK_NE(bookmark_, kNoBookmark);

	119 Seek(bookmark_);

	120 }

83	121

84 protected:	122 protected:

85 static const uc32 kEndOfInput = -1;	123 static const size_t kNoBookmark;

86	124

87 // Ensures that the buffer_cursor_ points to the code_unit at	125 Utf16CharacterStream(const uint16_t* buffer_start,

88 // position pos_ of the input, if possible. If the position	126 const uint16_t* buffer_cursor,

89 // is at or after the end of the input, return false. If there	127 const uint16_t* buffer_end, size_t buffer_pos)

90 // are more code_units available, return true.	128 : buffer_start_(buffer_start),

	129 buffer_cursor_(buffer_cursor),

	130 buffer_end_(buffer_end),

	131 buffer_pos_(buffer_pos),

	132 bookmark_(kNoBookmark) {}

	133 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}

	134

	135 void ReadBlockAt(size_t new_pos) {

	136 // The callers of this method (Back/Back2/Seek) should handle the easy

	137 // case (seeking within the current buffer), and we should only get here

	138 // if we actually require new data.

	139 // (This is really an efficiency check, not a correctness invariant.)

	140 DCHECK(new_pos < buffer_pos_ \|\|

	141 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));

	142

	143 // Change pos() to point to new_pos.

	144 buffer_pos_ = new_pos;

	145 buffer_cursor_ = buffer_start_;

	146 bool success = ReadBlock();

	147 USE(success);

	148

	149 // Post-conditions: 1, on success, we should be at the right position.

	150 // 2, success == we should have more characters available.

	151 DCHECK_IMPLIES(success, pos() == new_pos);

	152 DCHECK_EQ(success, buffer_cursor_ < buffer_end_);

	153 DCHECK_EQ(success, buffer_start_ < buffer_end_);

	154 }

	155

	156 // Read more data, and update buffer_*_ to point to it.

	157 // Returns true if more data was available.

	158 //

	159 // ReadBlock() may modify any of the buffer_*_ members, but must sure that

	160 // the result of pos() remains unaffected.

	161 //

	162 // Examples:

	163 // - a stream could either fill a separate buffer. Then buffer_start_ and

	164 // buffer_cursor_ would point to the beginning of the buffer, and

	165 // buffer_pos would be the old pos().

	166 // - a stream with existing buffer chunks would set buffer_start_ and

	167 // buffer_end_ to cover the full chunk, and then buffer_cursor_ would

	168 // point into the middle of the buffer, while buffer_pos_ would describe

	169 // the start of the buffer.

91 virtual bool ReadBlock() = 0;	170 virtual bool ReadBlock() = 0;

92 virtual size_t SlowSeekForward(size_t code_unit_count) = 0;

93	171

	172 const uint16_t* buffer_start_;

94 const uint16_t* buffer_cursor_;	173 const uint16_t* buffer_cursor_;

95 const uint16_t* buffer_end_;	174 const uint16_t* buffer_end_;

96 size_t pos_;	175 size_t buffer_pos_;

	176 size_t bookmark_;

97 };	177 };

98	178

99	179

100 // ----------------------------------------------------------------------------	180 // ----------------------------------------------------------------------------

101 // JavaScript Scanner.	181 // JavaScript Scanner.

102	182

103 class Scanner {	183 class Scanner {

104 public:	184 public:

105 // Scoped helper for a re-settable bookmark.	185 // Scoped helper for a re-settable bookmark.

106 class BookmarkScope {	186 class BookmarkScope {

(...skipping 24 matching lines...) Expand all Loading...
131 }	211 }

132	212

133 static Location invalid() { return Location(-1, -1); }	213 static Location invalid() { return Location(-1, -1); }

134	214

135 int beg_pos;	215 int beg_pos;

136 int end_pos;	216 int end_pos;

137 };	217 };

138	218

139 // -1 is outside of the range of any real source code.	219 // -1 is outside of the range of any real source code.

140 static const int kNoOctalLocation = -1;	220 static const int kNoOctalLocation = -1;

	221 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;

141	222

142 explicit Scanner(UnicodeCache* scanner_contants);	223 explicit Scanner(UnicodeCache* scanner_contants);

143	224

144 void Initialize(Utf16CharacterStream* source);	225 void Initialize(Utf16CharacterStream* source);

145	226

146 // Returns the next token and advances input.	227 // Returns the next token and advances input.

147 Token::Value Next();	228 Token::Value Next();

148 // Returns the token following peek()	229 // Returns the token following peek()

149 Token::Value PeekAhead();	230 Token::Value PeekAhead();

150 // Returns the current token again.	231 // Returns the current token again.

(...skipping 641 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
792 bool found_html_comment_;	873 bool found_html_comment_;

793	874

794 MessageTemplate::Template scanner_error_;	875 MessageTemplate::Template scanner_error_;

795 Location scanner_error_location_;	876 Location scanner_error_location_;

796 };	877 };

797	878

798 } // namespace internal	879 } // namespace internal

799 } // namespace v8	880 } // namespace v8

800	881

801 #endif // V8_PARSING_SCANNER_H_	882 #endif // V8_PARSING_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/parsing/parser.cc ('k') | src/parsing/scanner.cc » ('j') | src/parsing/scanner-character-streams.cc » ('J')