src/scanner.cc - Issue 661367: Refactor the scanner interface...

Side by Side Diff: src/scanner.cc

Issue 661367: Refactor the scanner interface... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 10 matching lines...) Expand all Loading...
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "ast.h"	30 #include "ast.h"

	31 #include "handles.h"

31 #include "scanner.h"	32 #include "scanner.h"

32	33

33 namespace v8 {	34 namespace v8 {

34 namespace internal {	35 namespace internal {

35	36

36 // ----------------------------------------------------------------------------	37 // ----------------------------------------------------------------------------

37 // Character predicates	38 // Character predicates

38	39

39	40

40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;	41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
79 }	80 }

80 ASSERT(pos() <= Capacity());	81 ASSERT(pos() <= Capacity());

81 }	82 }

82	83

83	84

84 // ----------------------------------------------------------------------------	85 // ----------------------------------------------------------------------------

85 // UTF16Buffer	86 // UTF16Buffer

86	87

87	88

88 UTF16Buffer::UTF16Buffer()	89 UTF16Buffer::UTF16Buffer()

89 : pos_(0), size_(0) { }	90 : pos_(0), end_(Scanner::kNoEndPosition) { }

90

91

92 Handle<String> UTF16Buffer::SubString(int start, int end) {

93 return internal::SubString(data_, start, end);

94 }

95	91

96	92

97 // CharacterStreamUTF16Buffer	93 // CharacterStreamUTF16Buffer

98 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()	94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()

99 : pushback_buffer_(0), last_(0), stream_(NULL) { }	95 : pushback_buffer_(0), last_(0), stream_(NULL) { }

100	96

101	97

102 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,	98 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,

103 unibrow::CharacterStream* input) {	99 unibrow::CharacterStream* input,

104 data_ = data;	100 int start_position,

105 pos_ = 0;	101 int end_position) {

106 stream_ = input;	102 stream_ = input;

	103 if (start_position > 0) {

	104 SeekForward(start_position);

	105 }

	106 end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt;

107 }	107 }

108	108

109	109

110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {	110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {

111 pushback_buffer()->Add(last_);	111 pushback_buffer()->Add(last_);

112 last_ = ch;	112 last_ = ch;

113 pos_--;	113 pos_--;

114 }	114 }

115	115

116	116

117 uc32 CharacterStreamUTF16Buffer::Advance() {	117 uc32 CharacterStreamUTF16Buffer::Advance() {

	118 ASSERT(end_ != Scanner::kNoEndPosition);

	119 ASSERT(end_ >= 0);

118 // NOTE: It is of importance to Persian / Farsi resources that we do	120 // NOTE: It is of importance to Persian / Farsi resources that we do

119 // not strip format control characters in the scanner; see	121 // not strip format control characters in the scanner; see

120 //	122 //

121 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152	123 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152

122 //	124 //

123 // So, even though ECMA-262, section 7.1, page 11, dictates that we	125 // So, even though ECMA-262, section 7.1, page 11, dictates that we

124 // must remove Unicode format-control characters, we do not. This is	126 // must remove Unicode format-control characters, we do not. This is

125 // in line with how IE and SpiderMonkey handles it.	127 // in line with how IE and SpiderMonkey handles it.

126 if (!pushback_buffer()->is_empty()) {	128 if (!pushback_buffer()->is_empty()) {

127 pos_++;	129 pos_++;

128 return last_ = pushback_buffer()->RemoveLast();	130 return last_ = pushback_buffer()->RemoveLast();

129 } else if (stream_->has_more()) {	131 } else if (stream_->has_more() && pos_ < end_) {

130 pos_++;	132 pos_++;

131 uc32 next = stream_->GetNext();	133 uc32 next = stream_->GetNext();

132 return last_ = next;	134 return last_ = next;

133 } else {	135 } else {

134 // Note: currently the following increment is necessary to avoid a	136 // Note: currently the following increment is necessary to avoid a

135 // test-parser problem!	137 // test-parser problem!

136 pos_++;	138 pos_++;

137 return last_ = static_cast<uc32>(-1);	139 return last_ = static_cast<uc32>(-1);

138 }	140 }

139 }	141 }

140	142

141	143

142 void CharacterStreamUTF16Buffer::SeekForward(int pos) {	144 void CharacterStreamUTF16Buffer::SeekForward(int pos) {

143 pos_ = pos;	145 pos_ = pos;

144 ASSERT(pushback_buffer()->is_empty());	146 ASSERT(pushback_buffer()->is_empty());

145 stream_->Seek(pos);	147 stream_->Seek(pos);

146 }	148 }

147	149

148	150

149 // TwoByteStringUTF16Buffer	151 // ExternalStringUTF16Buffer

150 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()	152 template <typename StringType, typename CharType>

	153 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()

151 : raw_data_(NULL) { }	154 : raw_data_(NULL) { }

152	155

153	156

154 void TwoByteStringUTF16Buffer::Initialize(	157 template <typename StringType, typename CharType>

155 Handle<ExternalTwoByteString> data) {	158 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(

	159 Handle<StringType> data,

	160 int start_position,

	161 int end_position) {

156 ASSERT(!data.is_null());	162 ASSERT(!data.is_null());

	163 raw_data_ = data->resource()->data();

157	164

158 data_ = data;	165 ASSERT(end_position <= data->length());

159 pos_ = 0;	166 if (start_position > 0) {

160	167 SeekForward(start_position);

161 raw_data_ = data->resource()->data();	168 }

162 size_ = data->length();	169 end_ =

	170 end_position != Scanner::kNoEndPosition ? end_position : data->length();

163 }	171 }

164	172

165	173

166 uc32 TwoByteStringUTF16Buffer::Advance() {	174 template <typename StringType, typename CharType>

167 if (pos_ < size_) {	175 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {

	176 if (pos_ < end_) {

168 return raw_data_[pos_++];	177 return raw_data_[pos_++];

169 } else {	178 } else {

170 // note: currently the following increment is necessary to avoid a	179 // note: currently the following increment is necessary to avoid a

171 // test-parser problem!	180 // test-parser problem!

172 pos_++;	181 pos_++;

173 return static_cast<uc32>(-1);	182 return static_cast<uc32>(-1);

174 }	183 }

175 }	184 }

176	185

177	186

178 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {	187 template <typename StringType, typename CharType>

	188 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {

179 pos_--;	189 pos_--;

180 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);	190 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);

181 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);	191 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);

182 }	192 }

183	193

184	194

185 void TwoByteStringUTF16Buffer::SeekForward(int pos) {	195 template <typename StringType, typename CharType>

	196 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {

186 pos_ = pos;	197 pos_ = pos;

187 }	198 }

188	199

189	200

190 // ----------------------------------------------------------------------------	201 // ----------------------------------------------------------------------------

191 // Keyword Matcher	202 // Keyword Matcher

192 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {	203 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {

193 { "break", KEYWORD_PREFIX, Token::BREAK },	204 { "break", KEYWORD_PREFIX, Token::BREAK },

194 { NULL, C, Token::ILLEGAL },	205 { NULL, C, Token::ILLEGAL },

195 { NULL, D, Token::ILLEGAL },	206 { NULL, D, Token::ILLEGAL },

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
320 }	331 }

321	332

322	333

323 // ----------------------------------------------------------------------------	334 // ----------------------------------------------------------------------------

324 // Scanner	335 // Scanner

325	336

326 Scanner::Scanner(ParserMode pre)	337 Scanner::Scanner(ParserMode pre)

327 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }	338 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }

328	339

329	340

	341 void Scanner::Initialize(Handle<String> source,

	342 ParserLanguage language) {

	343 safe_string_input_buffer_.Reset(source.location());

	344 Init(source, &safe_string_input_buffer_, 0, source->length(), language);

	345 }

	346

	347

	348 void Scanner::Initialize(Handle<String> source,

	349 unibrow::CharacterStream* stream,

	350 ParserLanguage language) {

	351 Init(source, stream, 0, kNoEndPosition, language);

	352 }

	353

	354

	355 void Scanner::Initialize(Handle<String> source,

	356 int start_position,

	357 int end_position,

	358 ParserLanguage language) {

	359 safe_string_input_buffer_.Reset(source.location());

	360 Init(source, &safe_string_input_buffer_,

	361 start_position, end_position, language);

	362 }

	363

	364

330 void Scanner::Init(Handle<String> source,	365 void Scanner::Init(Handle<String> source,

331 unibrow::CharacterStream* stream,	366 unibrow::CharacterStream* stream,

332 int position,	367 int start_position,

	368 int end_position,

333 ParserLanguage language) {	369 ParserLanguage language) {

334 // Initialize the source buffer.	370 // Initialize the source buffer.

335 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {	371 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {

336 two_byte_string_buffer_.Initialize(	372 two_byte_string_buffer_.Initialize(

337 Handle<ExternalTwoByteString>::cast(source));	373 Handle<ExternalTwoByteString>::cast(source),

	374 start_position,

	375 end_position);

338 source_ = &two_byte_string_buffer_;	376 source_ = &two_byte_string_buffer_;

	377 } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {

	378 ascii_string_buffer_.Initialize(

	379 Handle<ExternalAsciiString>::cast(source),

	380 start_position,

	381 end_position);

	382 source_ = &ascii_string_buffer_;

339 } else {	383 } else {

340 char_stream_buffer_.Initialize(source, stream);	384 char_stream_buffer_.Initialize(source,

	385 stream,

	386 start_position,

	387 end_position);

341 source_ = &char_stream_buffer_;	388 source_ = &char_stream_buffer_;

342 }	389 }

343	390

344 position_ = position;

345 is_parsing_json_ = (language == JSON);	391 is_parsing_json_ = (language == JSON);

346	392

347 // Set c0_ (one character ahead)	393 // Set c0_ (one character ahead)

348 ASSERT(kCharacterLookaheadBufferSize == 1);	394 ASSERT(kCharacterLookaheadBufferSize == 1);

349 Advance();	395 Advance();

350 // Initializer current_ to not refer to a literal buffer.	396 // Initializer current_ to not refer to a literal buffer.

351 current_.literal_buffer = NULL;	397 current_.literal_buffer = NULL;

352	398

353 // Skip initial whitespace allowing HTML comment ends just like	399 // Skip initial whitespace allowing HTML comment ends just like

354 // after a newline and scan first token.	400 // after a newline and scan first token.

355 has_line_terminator_before_next_ = true;	401 has_line_terminator_before_next_ = true;

356 SkipWhiteSpace();	402 SkipWhiteSpace();

357 Scan();	403 Scan();

358 }	404 }

359	405

360	406

361 Handle<String> Scanner::SubString(int start, int end) {

362 return source_->SubString(start - position_, end - position_);

363 }

364

365

366 Token::Value Scanner::Next() {	407 Token::Value Scanner::Next() {

367 // BUG 1215673: Find a thread safe way to set a stack limit in	408 // BUG 1215673: Find a thread safe way to set a stack limit in

368 // pre-parse mode. Otherwise, we cannot safely pre-parse from other	409 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

369 // threads.	410 // threads.

370 current_ = next_;	411 current_ = next_;

371 // Check for stack-overflow before returning any tokens.	412 // Check for stack-overflow before returning any tokens.

372 StackLimitCheck check;	413 StackLimitCheck check;

373 if (check.HasOverflowed()) {	414 if (check.HasOverflowed()) {

374 stack_overflow_ = true;	415 stack_overflow_ = true;

375 next_.token = Token::ILLEGAL;	416 next_.token = Token::ILLEGAL;

(...skipping 914 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1290 }	1331 }

1291 AddCharAdvance();	1332 AddCharAdvance();

1292 }	1333 }

1293 TerminateLiteral();	1334 TerminateLiteral();

1294	1335

1295 next_.location.end_pos = source_pos() - 1;	1336 next_.location.end_pos = source_pos() - 1;

1296 return true;	1337 return true;

1297 }	1338 }

1298	1339

1299 } } // namespace v8::internal	1340 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »