src/scanner.cc - Issue 379005: Fix lint issue.

Side by Side Diff: src/scanner.cc

Issue 379005: Fix lint issue. (Closed)

Patch Set: Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 24 matching lines...) Expand all Loading...
35	35

36 // ----------------------------------------------------------------------------	36 // ----------------------------------------------------------------------------

37 // Character predicates	37 // Character predicates

38	38

39	39

40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;	40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;	41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;	42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;	43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

44	44

	45

45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;	46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

46	47

	48

47 // ----------------------------------------------------------------------------	49 // ----------------------------------------------------------------------------

48 // UTF8Buffer	50 // UTF8Buffer

49	51

50 UTF8Buffer::UTF8Buffer() :	52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }

51 data_(NULL), limit_(NULL) {	53

52 }

53	54

54 UTF8Buffer::~UTF8Buffer() {	55 UTF8Buffer::~UTF8Buffer() {

55 DeleteArray(data_);	56 DeleteArray(data_);

56 }	57 }

57	58

	59

58 void UTF8Buffer::AddCharSlow(uc32 c) {	60 void UTF8Buffer::AddCharSlow(uc32 c) {

59 static const int kCapacityGrowthLimit = 1 * MB;	61 static const int kCapacityGrowthLimit = 1 * MB;

60 if (cursor_ > limit_) {	62 if (cursor_ > limit_) {

61 int old_capacity = Capacity();	63 int old_capacity = Capacity();

62 int old_position = pos();	64 int old_position = pos();

63 int new_capacity = Min(old_capacity * 3, old_capacity	65 int new_capacity =

64 + kCapacityGrowthLimit);	66 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit);

65 char* new_data = NewArray<char> (new_capacity);	67 char* new_data = NewArray<char>(new_capacity);

66 memcpy(new_data, data_, old_position);	68 memcpy(new_data, data_, old_position);

67 DeleteArray(data_);	69 DeleteArray(data_);

68 data_ = new_data;	70 data_ = new_data;

69 cursor_ = new_data + old_position;	71 cursor_ = new_data + old_position;

70 limit_ = ComputeLimit(new_data, new_capacity);	72 limit_ = ComputeLimit(new_data, new_capacity);

71 ASSERT(Capacity() == new_capacity && pos() == old_position);	73 ASSERT(Capacity() == new_capacity && pos() == old_position);

72 }	74 }

73 if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) {	75 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

74 *cursor_++ = c; // Common case: 7-bit ASCII.	76 *cursor_++ = c; // Common case: 7-bit ASCII.

75 } else {	77 } else {

76 cursor_ += unibrow::Utf8::Encode(cursor_, c);	78 cursor_ += unibrow::Utf8::Encode(cursor_, c);

77 }	79 }

78 ASSERT(pos() <= Capacity());	80 ASSERT(pos() <= Capacity());

79 }	81 }

80	82

	83

81 // ----------------------------------------------------------------------------	84 // ----------------------------------------------------------------------------

82 // UTF16Buffer	85 // UTF16Buffer

83	86

84	87

85 UTF16Buffer::UTF16Buffer() :	88 UTF16Buffer::UTF16Buffer()

86 pos_(0), size_(0) {	89 : pos_(0), size_(0) { }

87 }	90

88	91

89 Handle<String> UTF16Buffer::SubString(int start, int end) {	92 Handle<String> UTF16Buffer::SubString(int start, int end) {

90 return internal::SubString(data_, start, end);	93 return internal::SubString(data_, start, end);

91 }	94 }

92	95

	96

93 // CharacterStreamUTF16Buffer	97 // CharacterStreamUTF16Buffer

94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() :	98 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()

95 pushback_buffer_(0), last_(0), stream_(NULL) {	99 : pushback_buffer_(0), last_(0), stream_(NULL) { }

96 }	100

97	101

98 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,	102 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,

99 unibrow::CharacterStream* input) {	103 unibrow::CharacterStream* input) {

100 data_ = data;	104 data_ = data;

101 pos_ = 0;	105 pos_ = 0;

102 stream_ = input;	106 stream_ = input;

103 }	107 }

104	108

	109

105 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {	110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {

106 pushback_buffer()->Add(last_);	111 pushback_buffer()->Add(last_);

107 last_ = ch;	112 last_ = ch;

108 pos_--;	113 pos_--;

109 }	114 }

110	115

	116

111 uc32 CharacterStreamUTF16Buffer::Advance() {	117 uc32 CharacterStreamUTF16Buffer::Advance() {

112 // NOTE: It is of importance to Persian / Farsi resources that we do	118 // NOTE: It is of importance to Persian / Farsi resources that we do

113 // not strip format control characters in the scanner; see	119 // not strip format control characters in the scanner; see

114 //	120 //

115 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152	121 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152

116 //	122 //

117 // So, even though ECMA-262, section 7.1, page 11, dictates that we	123 // So, even though ECMA-262, section 7.1, page 11, dictates that we

118 // must remove Unicode format-control characters, we do not. This is	124 // must remove Unicode format-control characters, we do not. This is

119 // in line with how IE and SpiderMonkey handles it.	125 // in line with how IE and SpiderMonkey handles it.

120 if (!pushback_buffer()->is_empty()) {	126 if (!pushback_buffer()->is_empty()) {

121 pos_++;	127 pos_++;

122 return last_ = pushback_buffer()->RemoveLast();	128 return last_ = pushback_buffer()->RemoveLast();

123 } else if (stream_->has_more()) {	129 } else if (stream_->has_more()) {

124 pos_++;	130 pos_++;

125 uc32 next = stream_->GetNext();	131 uc32 next = stream_->GetNext();

126 return last_ = next;	132 return last_ = next;

127 } else {	133 } else {

128 // Note: currently the following increment is necessary to avoid a	134 // Note: currently the following increment is necessary to avoid a

129 // test-parser problem!	135 // test-parser problem!

130 pos_++;	136 pos_++;

131 return last_ = static_cast<uc32> (-1);	137 return last_ = static_cast<uc32>(-1);

132 }	138 }

133 }	139 }

134	140

	141

135 void CharacterStreamUTF16Buffer::SeekForward(int pos) {	142 void CharacterStreamUTF16Buffer::SeekForward(int pos) {

136 pos_ = pos;	143 pos_ = pos;

137 ASSERT(pushback_buffer()->is_empty());	144 ASSERT(pushback_buffer()->is_empty());

138 stream_->Seek(pos);	145 stream_->Seek(pos);

139 }	146 }

140	147

	148

141 // TwoByteStringUTF16Buffer	149 // TwoByteStringUTF16Buffer

142 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() :	150 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()

143 raw_data_(NULL) {	151 : raw_data_(NULL) { }

144 }

145	152

146 void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) {	153

	154 void TwoByteStringUTF16Buffer::Initialize(

	155 Handle<ExternalTwoByteString> data) {

147 ASSERT(!data.is_null());	156 ASSERT(!data.is_null());

148	157

149 data_ = data;	158 data_ = data;

150 pos_ = 0;	159 pos_ = 0;

151	160

152 raw_data_ = data->resource()->data();	161 raw_data_ = data->resource()->data();

153 size_ = data->length();	162 size_ = data->length();

154 }	163 }

155	164

	165

156 uc32 TwoByteStringUTF16Buffer::Advance() {	166 uc32 TwoByteStringUTF16Buffer::Advance() {

157 if (pos_ < size_) {	167 if (pos_ < size_) {

158 return raw_data_[pos_++];	168 return raw_data_[pos_++];

159 } else {	169 } else {

160 // note: currently the following increment is necessary to avoid a	170 // note: currently the following increment is necessary to avoid a

161 // test-parser problem!	171 // test-parser problem!

162 pos_++;	172 pos_++;

163 return static_cast<uc32> (-1);	173 return static_cast<uc32>(-1);

164 }	174 }

165 }	175 }

166	176

	177

167 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {	178 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {

168 pos_--;	179 pos_--;

169 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);	180 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);

170 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);	181 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);

171 }	182 }

172	183

	184

173 void TwoByteStringUTF16Buffer::SeekForward(int pos) {	185 void TwoByteStringUTF16Buffer::SeekForward(int pos) {

174 pos_ = pos;	186 pos_ = pos;

175 }	187 }

176	188

	189

177 // ----------------------------------------------------------------------------	190 // ----------------------------------------------------------------------------

178 // Keyword Matcher	191 // Keyword Matcher

179 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break",	192 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {

180 KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D,	193 { "break", KEYWORD_PREFIX, Token::BREAK },

181 Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F,	194 { NULL, C, Token::ILLEGAL },

182 Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL,	195 { NULL, D, Token::ILLEGAL },

183 UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL,	196 { "else", KEYWORD_PREFIX, Token::ELSE },

184 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, {	197 { NULL, F, Token::ILLEGAL },

185 NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL },	198 { NULL, UNMATCHABLE, Token::ILLEGAL },

186 { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL,	199 { NULL, UNMATCHABLE, Token::ILLEGAL },

187 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, {	200 { NULL, I, Token::ILLEGAL },

188 "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX,	201 { NULL, UNMATCHABLE, Token::ILLEGAL },

189 Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE,	202 { NULL, UNMATCHABLE, Token::ILLEGAL },

190 Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W,	203 { NULL, UNMATCHABLE, Token::ILLEGAL },

191 Token::ILLEGAL } };	204 { NULL, UNMATCHABLE, Token::ILLEGAL },

	205 { NULL, N, Token::ILLEGAL },

	206 { NULL, UNMATCHABLE, Token::ILLEGAL },

	207 { NULL, UNMATCHABLE, Token::ILLEGAL },

	208 { NULL, UNMATCHABLE, Token::ILLEGAL },

	209 { "return", KEYWORD_PREFIX, Token::RETURN },

	210 { "switch", KEYWORD_PREFIX, Token::SWITCH },

	211 { NULL, T, Token::ILLEGAL },

	212 { NULL, UNMATCHABLE, Token::ILLEGAL },

	213 { NULL, V, Token::ILLEGAL },

	214 { NULL, W, Token::ILLEGAL }

	215 };

	216

192	217

193 void KeywordMatcher::Step(uc32 input) {	218 void KeywordMatcher::Step(uc32 input) {

194 switch (state_) {	219 switch (state_) {

195 case INITIAL: {	220 case INITIAL: {

196 // matching the first character is the only state with significant fanout.	221 // matching the first character is the only state with significant fanout.

197 // Match only lower-case letters in range 'b'..'w'.	222 // Match only lower-case letters in range 'b'..'w'.

198 unsigned int offset = input - kFirstCharRangeMin;	223 unsigned int offset = input - kFirstCharRangeMin;

199 if (offset < kFirstCharRangeLength) {	224 if (offset < kFirstCharRangeLength) {

200 state_ = first_states_[offset].state;	225 state_ = first_states_[offset].state;

201 if (state_ == KEYWORD_PREFIX) {	226 if (state_ == KEYWORD_PREFIX) {

(...skipping 13 matching lines...) Expand all Loading...
215 state_ = KEYWORD_MATCHED;	240 state_ = KEYWORD_MATCHED;

216 token_ = keyword_token_;	241 token_ = keyword_token_;

217 }	242 }

218 return;	243 return;

219 }	244 }

220 break;	245 break;

221 case KEYWORD_MATCHED:	246 case KEYWORD_MATCHED:

222 token_ = Token::IDENTIFIER;	247 token_ = Token::IDENTIFIER;

223 break;	248 break;

224 case C:	249 case C:

225 if (MatchState(input, 'a', CA))	250 if (MatchState(input, 'a', CA)) return;

226 return;	251 if (MatchState(input, 'o', CO)) return;

227 if (MatchState(input, 'o', CO))

228 return;

229 break;	252 break;

230 case CA:	253 case CA:

231 if (MatchKeywordStart(input, "case", 2, Token::CASE))	254 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;

232 return;	255 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;

233 if (MatchKeywordStart(input, "catch", 2, Token::CATCH))

234 return;

235 break;	256 break;

236 case CO:	257 case CO:

237 if (MatchState(input, 'n', CON))	258 if (MatchState(input, 'n', CON)) return;

238 return;

239 break;	259 break;

240 case CON:	260 case CON:

241 if (MatchKeywordStart(input, "const", 3, Token::CONST))	261 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;

242 return;	262 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;

243 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE))

244 return;

245 break;	263 break;

246 case D:	264 case D:

247 if (MatchState(input, 'e', DE))	265 if (MatchState(input, 'e', DE)) return;

248 return;	266 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;

249 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO))

250 return;

251 break;	267 break;

252 case DE:	268 case DE:

253 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER))	269 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;

254 return;	270 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;

255 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT))	271 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;

256 return;

257 if (MatchKeywordStart(input, "delete", 2, Token::DELETE))

258 return;

259 break;	272 break;

260 case F:	273 case F:

261 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL))	274 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;

262 return;	275 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;

263 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY))	276 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;

264 return;	277 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;

265 if (MatchKeywordStart(input, "for", 1, Token::FOR))

266 return;

267 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION))

268 return;

269 break;	278 break;

270 case I:	279 case I:

271 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF))	280 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;

272 return;	281 if (MatchKeyword(input, 'n', IN, Token::IN)) return;

273 if (MatchKeyword(input, 'n', IN, Token::IN))

274 return;

275 break;	282 break;

276 case IN:	283 case IN:

277 token_ = Token::IDENTIFIER;	284 token_ = Token::IDENTIFIER;

278 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {	285 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {

279 return;	286 return;

280 }	287 }

281 break;	288 break;

282 case N:	289 case N:

283 if (MatchKeywordStart(input, "native", 1, Token::NATIVE))	290 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;

284 return;	291 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;

285 if (MatchKeywordStart(input, "new", 1, Token::NEW))	292 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;

286 return;

287 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL))

288 return;

289 break;	293 break;

290 case T:	294 case T:

291 if (MatchState(input, 'h', TH))	295 if (MatchState(input, 'h', TH)) return;

292 return;	296 if (MatchState(input, 'r', TR)) return;

293 if (MatchState(input, 'r', TR))	297 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;

294 return;

295 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF))

296 return;

297 break;	298 break;

298 case TH:	299 case TH:

299 if (MatchKeywordStart(input, "this", 2, Token::THIS))	300 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;

300 return;	301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;

301 if (MatchKeywordStart(input, "throw", 2, Token::THROW))

302 return;

303 break;	302 break;

304 case TR:	303 case TR:

305 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL))	304 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;

306 return;	305 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;

307 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY))

308 return;

309 break;	306 break;

310 case V:	307 case V:

311 if (MatchKeywordStart(input, "var", 1, Token::VAR))	308 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;

312 return;	309 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;

313 if (MatchKeywordStart(input, "void", 1, Token::VOID))

314 return;

315 break;	310 break;

316 case W:	311 case W:

317 if (MatchKeywordStart(input, "while", 1, Token::WHILE))	312 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;

318 return;	313 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

319 if (MatchKeywordStart(input, "with", 1, Token::WITH))

320 return;

321 break;	314 break;

322 default:	315 default:

323 UNREACHABLE();	316 UNREACHABLE();

324 }	317 }

325 // On fallthrough, it's a failure.	318 // On fallthrough, it's a failure.

326 state_ = UNMATCHABLE;	319 state_ = UNMATCHABLE;

327 }	320 }

328	321

	322

329 // ----------------------------------------------------------------------------	323 // ----------------------------------------------------------------------------

330 // Scanner	324 // Scanner

331	325

332 Scanner::Scanner(bool pre) :	326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { }

333 stack_overflow_(false), is_pre_parsing_(pre) {	327

334 }

335	328

336 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,	329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,

337 int position) {	330 int position) {

338 // Initialize the source buffer.	331 // Initialize the source buffer.

339 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {	332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {

340 two_byte_string_buffer_.Initialize(	333 two_byte_string_buffer_.Initialize(

341 Handle<ExternalTwoByteString>::cast(	334 Handle<ExternalTwoByteString>::cast(source));

342 sourc e));

343 source_ = &two_byte_string_buffer_;	335 source_ = &two_byte_string_buffer_;

344 } else {	336 } else {

345 char_stream_buffer_.Initialize(source, stream);	337 char_stream_buffer_.Initialize(source, stream);

346 source_ = &char_stream_buffer_;	338 source_ = &char_stream_buffer_;

347 }	339 }

348	340

349 position_ = position;	341 position_ = position;

350	342

351 // Set c0_ (one character ahead)	343 // Set c0_ (one character ahead)

352 ASSERT(kCharacterLookaheadBufferSize == 1);	344 ASSERT(kCharacterLookaheadBufferSize == 1);

353 Advance();	345 Advance();

354	346

355 // Skip initial whitespace allowing HTML comment ends just like	347 // Skip initial whitespace allowing HTML comment ends just like

356 // after a newline and scan first token.	348 // after a newline and scan first token.

357 has_line_terminator_before_next_ = true;	349 has_line_terminator_before_next_ = true;

358 SkipWhiteSpace();	350 SkipWhiteSpace();

359 Scan();	351 Scan();

360 }	352 }

361	353

	354

362 Handle<String> Scanner::SubString(int start, int end) {	355 Handle<String> Scanner::SubString(int start, int end) {

363 return source_->SubString(start - position_, end - position_);	356 return source_->SubString(start - position_, end - position_);

364 }	357 }

365	358

	359

366 Token::Value Scanner::Next() {	360 Token::Value Scanner::Next() {

367 // BUG 1215673: Find a thread safe way to set a stack limit in	361 // BUG 1215673: Find a thread safe way to set a stack limit in

368 // pre-parse mode. Otherwise, we cannot safely pre-parse from other	362 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

369 // threads.	363 // threads.

370 current_ = next_;	364 current_ = next_;

371 // Check for stack-overflow before returning any tokens.	365 // Check for stack-overflow before returning any tokens.

372 StackLimitCheck check;	366 StackLimitCheck check;

373 if (check.HasOverflowed()) {	367 if (check.HasOverflowed()) {

374 stack_overflow_ = true;	368 stack_overflow_ = true;

375 next_.token = Token::ILLEGAL;	369 next_.token = Token::ILLEGAL;

376 next_.literal_buffer = NULL;	370 next_.literal_buffer = NULL;

377 } else {	371 } else {

378 Scan();	372 Scan();

379 }	373 }

380 return current_.token;	374 return current_.token;

381 }	375 }

382	376

	377

383 void Scanner::StartLiteral() {	378 void Scanner::StartLiteral() {

384 // Use the first buffer unless it's currently in use by the current_ token.	379 // Use the first buffer unless it's currently in use by the current_ token.

385 // In most cases we won't have two literals/identifiers in a row, so	380 // In most cases, we won't have two literals/identifiers in a row so

386 // the second buffer won't be used very often and is unlikely to grow much.	381 // the second buffer won't be used very often and is unlikely to grow much.

387 UTF8Buffer* free_buffer =	382 UTF8Buffer* free_buffer =

388 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_	383 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_

389 : &literal_buffer_2_;	384 : &literal_buffer_2_;

390 next_.literal_buffer = free_buffer;	385 next_.literal_buffer = free_buffer;

391 free_buffer->Reset();	386 free_buffer->Reset();

392 }	387 }

393	388

	389

394 void Scanner::AddChar(uc32 c) {	390 void Scanner::AddChar(uc32 c) {

395 next_.literal_buffer->AddChar(c);	391 next_.literal_buffer->AddChar(c);

396 }	392 }

397	393

	394

398 void Scanner::TerminateLiteral() {	395 void Scanner::TerminateLiteral() {

399 AddChar(0);	396 AddChar(0);

400 }	397 }

401	398

	399

402 void Scanner::AddCharAdvance() {	400 void Scanner::AddCharAdvance() {

403 AddChar(c0_);	401 AddChar(c0_);

404 Advance();	402 Advance();

405 }	403 }

406	404

	405

407 static inline bool IsByteOrderMark(uc32 c) {	406 static inline bool IsByteOrderMark(uc32 c) {

408 // The Unicode value U+FFFE is guaranteed never to be assigned as a	407 // The Unicode value U+FFFE is guaranteed never to be assigned as a

409 // Unicode character; this implies that in a Unicode context the	408 // Unicode character; this implies that in a Unicode context the

410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	409 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

411 // character expressed in little-endian byte order (since it could	410 // character expressed in little-endian byte order (since it could

412 // not be a U+FFFE character expressed in big-endian byte	411 // not be a U+FFFE character expressed in big-endian byte

413 // order). Nevertheless, we check for it to be compatible with	412 // order). Nevertheless, we check for it to be compatible with

414 // Spidermonkey.	413 // Spidermonkey.

415 return c == 0xFEFF \|\| c == 0xFFFE;	414 return c == 0xFEFF \|\| c == 0xFFFE;

416 }	415 }

417	416

	417

418 bool Scanner::SkipWhiteSpace() {	418 bool Scanner::SkipWhiteSpace() {

419 int start_position = source_pos();	419 int start_position = source_pos();

420	420

421 while (true) {	421 while (true) {

422 // We treat byte-order marks (BOMs) as whitespace for better	422 // We treat byte-order marks (BOMs) as whitespace for better

423 // compatibility with Spidermonkey and other JavaScript engines.	423 // compatibility with Spidermonkey and other JavaScript engines.

424 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	424 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

425 // IsWhiteSpace() includes line terminators!	425 // IsWhiteSpace() includes line terminators!

426 if (kIsLineTerminator.get(c0_)) {	426 if (kIsLineTerminator.get(c0_)) {

427 // Ignore line terminators, but remember them. This is necessary	427 // Ignore line terminators, but remember them. This is necessary

(...skipping 10 matching lines...) Expand all Loading...
438 if (c0_ == '-' && has_line_terminator_before_next_) {	438 if (c0_ == '-' && has_line_terminator_before_next_) {

439 Advance();	439 Advance();

440 if (c0_ == '-') {	440 if (c0_ == '-') {

441 Advance();	441 Advance();

442 if (c0_ == '>') {	442 if (c0_ == '>') {

443 // Treat the rest of the line as a comment.	443 // Treat the rest of the line as a comment.

444 SkipSingleLineComment();	444 SkipSingleLineComment();

445 // Continue skipping white space after the comment.	445 // Continue skipping white space after the comment.

446 continue;	446 continue;

447 }	447 }

448 PushBack('-'); // undo Advance()	448 PushBack('-'); // undo Advance()

449 }	449 }

450 PushBack('-'); // undo Advance()	450 PushBack('-'); // undo Advance()

451 }	451 }

452 // Return whether or not we skipped any characters.	452 // Return whether or not we skipped any characters.

453 return source_pos() != start_position;	453 return source_pos() != start_position;

454 }	454 }

455 }	455 }

456	456

	457

457 Token::Value Scanner::SkipSingleLineComment() {	458 Token::Value Scanner::SkipSingleLineComment() {

458 Advance();	459 Advance();

459	460

460 // The line terminator at the end of the line is not considered	461 // The line terminator at the end of the line is not considered

461 // to be part of the single-line comment; it is recognized	462 // to be part of the single-line comment; it is recognized

462 // separately by the lexical grammar and becomes part of the	463 // separately by the lexical grammar and becomes part of the

463 // stream of input elements for the syntactic grammar (see	464 // stream of input elements for the syntactic grammar (see

464 // ECMA-262, section 7.4, page 12).	465 // ECMA-262, section 7.4, page 12).

465 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	466 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

466 Advance();	467 Advance();

467 }	468 }

468	469

469 return Token::WHITESPACE;	470 return Token::WHITESPACE;

470 }	471 }

471	472

	473

472 Token::Value Scanner::SkipMultiLineComment() {	474 Token::Value Scanner::SkipMultiLineComment() {

473 ASSERT(c0_ == '*');	475 ASSERT(c0_ == '*');

474 Advance();	476 Advance();

475	477

476 while (c0_ >= 0) {	478 while (c0_ >= 0) {

477 char ch = c0_;	479 char ch = c0_;

478 Advance();	480 Advance();

479 // If we have reached the end of the multi-line comment, we	481 // If we have reached the end of the multi-line comment, we

480 // consume the '/' and insert a whitespace. This way all	482 // consume the '/' and insert a whitespace. This way all

481 // multi-line comments are treated as whitespace - even the ones	483 // multi-line comments are treated as whitespace - even the ones

482 // containing line terminators. This contradicts ECMA-262, section	484 // containing line terminators. This contradicts ECMA-262, section

483 // 7.4, page 12, that says that multi-line comments containing	485 // 7.4, page 12, that says that multi-line comments containing

484 // line terminators should be treated as a line terminator, but it	486 // line terminators should be treated as a line terminator, but it

485 // matches the behaviour of SpiderMonkey and KJS.	487 // matches the behaviour of SpiderMonkey and KJS.

486 if (ch == '*' && c0_ == '/') {	488 if (ch == '*' && c0_ == '/') {

487 c0_ = ' ';	489 c0_ = ' ';

488 return Token::WHITESPACE;	490 return Token::WHITESPACE;

489 }	491 }

490 }	492 }

491	493

492 // Unterminated multi-line comment.	494 // Unterminated multi-line comment.

493 return Token::ILLEGAL;	495 return Token::ILLEGAL;

494 }	496 }

495	497

	498

496 Token::Value Scanner::ScanHtmlComment() {	499 Token::Value Scanner::ScanHtmlComment() {

497 // Check for <!-- comments.	500 // Check for <!-- comments.

498 ASSERT(c0_ == '!');	501 ASSERT(c0_ == '!');

499 Advance();	502 Advance();

500 if (c0_ == '-') {	503 if (c0_ == '-') {

501 Advance();	504 Advance();

502 if (c0_ == '-')	505 if (c0_ == '-') return SkipSingleLineComment();

503 return SkipSingleLineComment();	506 PushBack('-'); // undo Advance()

504 PushBack('-'); // undo Advance()

505 }	507 }

506 PushBack('!'); // undo Advance()	508 PushBack('!'); // undo Advance()

507 ASSERT(c0_ == '!');	509 ASSERT(c0_ == '!');

508 return Token::LT;	510 return Token::LT;

509 }	511 }

510	512

	513

511 void Scanner::Scan() {	514 void Scanner::Scan() {

512 Token::Value token;	515 Token::Value token;

513 has_line_terminator_before_next_ = false;	516 has_line_terminator_before_next_ = false;

514 do {	517 do {

515 // Remember the position of the next token	518 // Remember the position of the next token

516 next_.location.beg_pos = source_pos();	519 next_.location.beg_pos = source_pos();

517	520

518 switch (c0_) {	521 switch (c0_) {

519 case ' ':	522 case ' ':

520 case '\t':	523 case '\t':

521 Advance();	524 Advance();

522 token = Token::WHITESPACE;	525 token = Token::WHITESPACE;

523 break;	526 break;

524	527

525 case '\n':	528 case '\n':

526 Advance();	529 Advance();

527 has_line_terminator_before_next_ = true;	530 has_line_terminator_before_next_ = true;

528 token = Token::WHITESPACE;	531 token = Token::WHITESPACE;

529 break;	532 break;

530	533

531 case '"':	534 case '"': case '\'':

532 case '\'':

533 token = ScanString();	535 token = ScanString();

534 break;	536 break;

535	537

536 case '<':	538 case '<':

537 // < <= << <<= <!--	539 // < <= << <<= <!--

538 Advance();	540 Advance();

539 if (c0_ == '=') {	541 if (c0_ == '=') {

540 token = Select(Token::LTE);	542 token = Select(Token::LTE);

541 } else if (c0_ == '<') {	543 } else if (c0_ == '<') {

542 token = Select('=', Token::ASSIGN_SHL, Token::SHL);	544 token = Select('=', Token::ASSIGN_SHL, Token::SHL);

(...skipping 198 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
741 }	743 }

742	744

743 // Continue scanning for tokens as long as we're just skipping	745 // Continue scanning for tokens as long as we're just skipping

744 // whitespace.	746 // whitespace.

745 } while (token == Token::WHITESPACE);	747 } while (token == Token::WHITESPACE);

746	748

747 next_.location.end_pos = source_pos();	749 next_.location.end_pos = source_pos();

748 next_.token = token;	750 next_.token = token;

749 }	751 }

750	752

	753

751 void Scanner::SeekForward(int pos) {	754 void Scanner::SeekForward(int pos) {

752 source_->SeekForward(pos - 1);	755 source_->SeekForward(pos - 1);

753 Advance();	756 Advance();

754 Scan();	757 Scan();

755 }	758 }

756	759

	760

757 uc32 Scanner::ScanHexEscape(uc32 c, int length) {	761 uc32 Scanner::ScanHexEscape(uc32 c, int length) {

758 ASSERT(length <= 4); // prevent overflow	762 ASSERT(length <= 4); // prevent overflow

759	763

760 uc32 digits[4];	764 uc32 digits[4];

761 uc32 x = 0;	765 uc32 x = 0;

762 for (int i = 0; i < length; i++) {	766 for (int i = 0; i < length; i++) {

763 digits[i] = c0_;	767 digits[i] = c0_;

764 int d = HexValue(c0_);	768 int d = HexValue(c0_);

765 if (d < 0) {	769 if (d < 0) {

766 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	770 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

767 // should be illegal, but other JS VMs just return the	771 // should be illegal, but other JS VMs just return the

768 // non-escaped version of the original character.	772 // non-escaped version of the original character.

769	773

770 // Push back digits read, except the last one (in c0_).	774 // Push back digits read, except the last one (in c0_).

771 for (int j = i - 1; j >= 0; j--) {	775 for (int j = i-1; j >= 0; j--) {

772 PushBack(digits[j]);	776 PushBack(digits[j]);

773 }	777 }

774 // Notice: No handling of error - treat it as "\u"->"u".	778 // Notice: No handling of error - treat it as "\u"->"u".

775 return c;	779 return c;

776 }	780 }

777 x = x * 16 + d;	781 x = x * 16 + d;

778 Advance();	782 Advance();

779 }	783 }

780	784

781 return x;	785 return x;

782 }	786 }

783	787

	788

784 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	789 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

785 // ECMA-262. Other JS VMs support them.	790 // ECMA-262. Other JS VMs support them.

786 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {	791 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

787 uc32 x = c - '0';	792 uc32 x = c - '0';

788 for (int i = 0; i < length; i++) {	793 for (int i = 0; i < length; i++) {

789 int d = c0_ - '0';	794 int d = c0_ - '0';

790 if (d < 0 \|\| d > 7)	795 if (d < 0 \|\| d > 7) break;

791 break;

792 int nx = x * 8 + d;	796 int nx = x * 8 + d;

793 if (nx >= 256)	797 if (nx >= 256) break;

794 break;

795 x = nx;	798 x = nx;

796 Advance();	799 Advance();

797 }	800 }

798 return x;	801 return x;

799 }	802 }

800	803

	804

801 void Scanner::ScanEscape() {	805 void Scanner::ScanEscape() {

802 uc32 c = c0_;	806 uc32 c = c0_;

803 Advance();	807 Advance();

804	808

805 // Skip escaped newlines.	809 // Skip escaped newlines.

806 if (kIsLineTerminator.get(c)) {	810 if (kIsLineTerminator.get(c)) {

807 // Allow CR+LF newlines in multiline string literals.	811 // Allow CR+LF newlines in multiline string literals.

808 if (IsCarriageReturn(c) && IsLineFeed(c0_))	812 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

809 Advance();

810 // Allow LF+CR newlines in multiline string literals.	813 // Allow LF+CR newlines in multiline string literals.

811 if (IsLineFeed(c) && IsCarriageReturn(c0_))	814 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

812 Advance();

813 return;	815 return;

814 }	816 }

815	817

816 switch (c) {	818 switch (c) {

817 case '\'': // fall through	819 case '\'': // fall through

818 case '"': // fall through	820 case '"' : // fall through

819 case '\\':	821 case '\\': break;

820 break;	822 case 'b' : c = '\b'; break;

821 case 'b':	823 case 'f' : c = '\f'; break;

822 c = '\b';	824 case 'n' : c = '\n'; break;

823 break;	825 case 'r' : c = '\r'; break;

824 case 'f':	826 case 't' : c = '\t'; break;

825 c = '\f';	827 case 'u' : c = ScanHexEscape(c, 4); break;

826 break;	828 case 'v' : c = '\v'; break;

827 case 'n':	829 case 'x' : c = ScanHexEscape(c, 2); break;

828 c = '\n';	830 case '0' : // fall through

829 break;	831 case '1' : // fall through

830 case 'r':	832 case '2' : // fall through

831 c = '\r';	833 case '3' : // fall through

832 break;	834 case '4' : // fall through

833 case 't':	835 case '5' : // fall through

834 c = '\t';	836 case '6' : // fall through

835 break;	837 case '7' : c = ScanOctalEscape(c, 2); break;

836 case 'u':

837 c = ScanHexEscape(c, 4);

838 break;

839 case 'v':

840 c = '\v';

841 break;

842 case 'x':

843 c = ScanHexEscape(c, 2);

844 break;

845 case '0': // fall through

846 case '1': // fall through

847 case '2': // fall through

848 case '3': // fall through

849 case '4': // fall through

850 case '5': // fall through

851 case '6': // fall through

852 case '7':

853 c = ScanOctalEscape(c, 2);

854 break;

855 }	838 }

856	839

857 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these	840 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

858 // should be illegal, but they are commonly handled	841 // should be illegal, but they are commonly handled

859 // as non-escaped characters by JS VMs.	842 // as non-escaped characters by JS VMs.

860 AddChar(c);	843 AddChar(c);

861 }	844 }

862	845

	846

863 Token::Value Scanner::ScanString() {	847 Token::Value Scanner::ScanString() {

864 uc32 quote = c0_;	848 uc32 quote = c0_;

865 Advance(); // consume quote	849 Advance(); // consume quote

866	850

867 StartLiteral();	851 StartLiteral();

868 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	852 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

869 uc32 c = c0_;	853 uc32 c = c0_;

870 Advance();	854 Advance();

871 if (c == '\\') {	855 if (c == '\\') {

872 if (c0_ < 0)	856 if (c0_ < 0) return Token::ILLEGAL;

873 return Token::ILLEGAL;

874 ScanEscape();	857 ScanEscape();

875 } else {	858 } else {

876 AddChar(c);	859 AddChar(c);

877 }	860 }

878 }	861 }

879 if (c0_ != quote) {	862 if (c0_ != quote) {

880 return Token::ILLEGAL;	863 return Token::ILLEGAL;

881 }	864 }

882 TerminateLiteral();	865 TerminateLiteral();

883	866

884 Advance(); // consume quote	867 Advance(); // consume quote

885 return Token::STRING;	868 return Token::STRING;

886 }	869 }

887	870

	871

888 Token::Value Scanner::Select(Token::Value tok) {	872 Token::Value Scanner::Select(Token::Value tok) {

889 Advance();	873 Advance();

890 return tok;	874 return tok;

891 }	875 }

892	876

	877

893 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {	878 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {

894 Advance();	879 Advance();

895 if (c0_ == next) {	880 if (c0_ == next) {

896 Advance();	881 Advance();

897 return then;	882 return then;

898 } else {	883 } else {

899 return else_;	884 return else_;

900 }	885 }

901 }	886 }

902	887

	888

903 // Returns true if any decimal digits were scanned, returns false otherwise.	889 // Returns true if any decimal digits were scanned, returns false otherwise.

904 void Scanner::ScanDecimalDigits() {	890 void Scanner::ScanDecimalDigits() {

905 while (IsDecimalDigit(c0_))	891 while (IsDecimalDigit(c0_))

906 AddCharAdvance();	892 AddCharAdvance();

907 }	893 }

908	894

	895

909 Token::Value Scanner::ScanNumber(bool seen_period) {	896 Token::Value Scanner::ScanNumber(bool seen_period) {

910 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	897 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

911	898

912 enum {	899 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

913 DECIMAL, HEX, OCTAL

914 } kind = DECIMAL;

915	900

916 StartLiteral();	901 StartLiteral();

917 if (seen_period) {	902 if (seen_period) {

918 // we have already seen a decimal point of the float	903 // we have already seen a decimal point of the float

919 AddChar('.');	904 AddChar('.');

920 ScanDecimalDigits(); // we know we have at least one digit	905 ScanDecimalDigits(); // we know we have at least one digit

921	906

922 } else {	907 } else {

923 // if the first character is '0' we must check for octals and hex	908 // if the first character is '0' we must check for octals and hex

924 if (c0_ == '0') {	909 if (c0_ == '0') {

925 AddCharAdvance();	910 AddCharAdvance();

926	911

927 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number	912 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number

928 if (c0_ == 'x' \|\| c0_ == 'X') {	913 if (c0_ == 'x' \|\| c0_ == 'X') {

929 // hex number	914 // hex number

930 kind = HEX;	915 kind = HEX;

931 AddCharAdvance();	916 AddCharAdvance();

932 if (!IsHexDigit(c0_))	917 if (!IsHexDigit(c0_))

933 // we must have at least one hex digit after 'x'/'X'	918 // we must have at least one hex digit after 'x'/'X'

934 return Token::ILLEGAL;	919 return Token::ILLEGAL;

935 while (IsHexDigit(c0_))	920 while (IsHexDigit(c0_))

936 AddCharAdvance();	921 AddCharAdvance();

937	922

938 } else if ('0' <= c0_ && c0_ <= '7') {	923 } else if ('0' <= c0_ && c0_ <= '7') {

939 // (possible) octal number	924 // (possible) octal number

940 kind = OCTAL;	925 kind = OCTAL;

941 while (true) {	926 while (true) {

942 if (c0_ == '8' \|\| c0_ == '9') {	927 if (c0_ == '8' \|\| c0_ == '9') {

943 kind = DECIMAL;	928 kind = DECIMAL;

944 break;	929 break;

945 }	930 }

946 if (c0_ < '0' \|\| '7' < c0_)	931 if (c0_ < '0' \|\| '7' < c0_) break;

947 break;

948 AddCharAdvance();	932 AddCharAdvance();

949 }	933 }

950 }	934 }

951 }	935 }

952	936

953 // Parse decimal digits and allow trailing fractional part.	937 // Parse decimal digits and allow trailing fractional part.

954 if (kind == DECIMAL) {	938 if (kind == DECIMAL) {

955 ScanDecimalDigits(); // optional	939 ScanDecimalDigits(); // optional

956 if (c0_ == '.') {	940 if (c0_ == '.') {

957 AddCharAdvance();	941 AddCharAdvance();

958 ScanDecimalDigits(); // optional	942 ScanDecimalDigits(); // optional

959 }	943 }

960 }	944 }

961 }	945 }

962	946

963 // scan exponent, if any	947 // scan exponent, if any

964 if (c0_ == 'e' \|\| c0_ == 'E') {	948 if (c0_ == 'e' \|\| c0_ == 'E') {

965 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number	949 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number

966 if (kind == OCTAL)	950 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed

967 return Token::ILLEGAL; // no exponent for octals allowed

968 // scan exponent	951 // scan exponent

969 AddCharAdvance();	952 AddCharAdvance();

970 if (c0_ == '+' \|\| c0_ == '-')	953 if (c0_ == '+' \|\| c0_ == '-')

971 AddCharAdvance();	954 AddCharAdvance();

972 if (!IsDecimalDigit(c0_))	955 if (!IsDecimalDigit(c0_))

973 // we must have at least one decimal digit after 'e'/'E'	956 // we must have at least one decimal digit after 'e'/'E'

974 return Token::ILLEGAL;	957 return Token::ILLEGAL;

975 ScanDecimalDigits();	958 ScanDecimalDigits();

976 }	959 }

977 TerminateLiteral();	960 TerminateLiteral();

978	961

979 // The source character immediately following a numeric literal must	962 // The source character immediately following a numeric literal must

980 // not be an identifier start or a decimal digit; see ECMA-262	963 // not be an identifier start or a decimal digit; see ECMA-262

981 // section 7.8.3, page 17 (note that we read only one decimal digit	964 // section 7.8.3, page 17 (note that we read only one decimal digit

982 // if the value is 0).	965 // if the value is 0).

983 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))	966 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))

984 return Token::ILLEGAL;	967 return Token::ILLEGAL;

985	968

986 return Token::NUMBER;	969 return Token::NUMBER;

987 }	970 }

988	971

	972

989 uc32 Scanner::ScanIdentifierUnicodeEscape() {	973 uc32 Scanner::ScanIdentifierUnicodeEscape() {

990 Advance();	974 Advance();

991 if (c0_ != 'u')	975 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

992 return unibrow::Utf8::kBadChar;

993 Advance();	976 Advance();

994 uc32 c = ScanHexEscape('u', 4);	977 uc32 c = ScanHexEscape('u', 4);

995 // We do not allow a unicode escape sequence to start another	978 // We do not allow a unicode escape sequence to start another

996 // unicode escape sequence.	979 // unicode escape sequence.

997 if (c == '\\')	980 if (c == '\\') return unibrow::Utf8::kBadChar;

998 return unibrow::Utf8::kBadChar;

999 return c;	981 return c;

1000 }	982 }

1001	983

	984

1002 Token::Value Scanner::ScanIdentifier() {	985 Token::Value Scanner::ScanIdentifier() {

1003 ASSERT(kIsIdentifierStart.get(c0_));	986 ASSERT(kIsIdentifierStart.get(c0_));

1004	987

1005 StartLiteral();	988 StartLiteral();

1006 KeywordMatcher keyword_match;	989 KeywordMatcher keyword_match;

1007	990

1008 // Scan identifier start character.	991 // Scan identifier start character.

1009 if (c0_ == '\\') {	992 if (c0_ == '\\') {

1010 uc32 c = ScanIdentifierUnicodeEscape();	993 uc32 c = ScanIdentifierUnicodeEscape();

1011 // Only allow legal identifier start characters.	994 // Only allow legal identifier start characters.

1012 if (!kIsIdentifierStart.get(c))	995 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;

1013 return Token::ILLEGAL;

1014 AddChar(c);	996 AddChar(c);

1015 keyword_match.Fail();	997 keyword_match.Fail();

1016 } else {	998 } else {

1017 AddChar(c0_);	999 AddChar(c0_);

1018 keyword_match.AddChar(c0_);	1000 keyword_match.AddChar(c0_);

1019 Advance();	1001 Advance();

1020 }	1002 }

1021	1003

1022 // Scan the rest of the identifier characters.	1004 // Scan the rest of the identifier characters.

1023 while (kIsIdentifierPart.get(c0_)) {	1005 while (kIsIdentifierPart.get(c0_)) {

1024 if (c0_ == '\\') {	1006 if (c0_ == '\\') {

1025 uc32 c = ScanIdentifierUnicodeEscape();	1007 uc32 c = ScanIdentifierUnicodeEscape();

1026 // Only allow legal identifier part characters.	1008 // Only allow legal identifier part characters.

1027 if (!kIsIdentifierPart.get(c))	1009 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;

1028 return Token::ILLEGAL;

1029 AddChar(c);	1010 AddChar(c);

1030 keyword_match.Fail();	1011 keyword_match.Fail();

1031 } else {	1012 } else {

1032 AddChar(c0_);	1013 AddChar(c0_);

1033 keyword_match.AddChar(c0_);	1014 keyword_match.AddChar(c0_);

1034 Advance();	1015 Advance();

1035 }	1016 }

1036 }	1017 }

1037 TerminateLiteral();	1018 TerminateLiteral();

1038	1019

1039 return keyword_match.token();	1020 return keyword_match.token();

1040 }	1021 }

1041	1022

	1023

	1024

1042 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {	1025 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

1043 // Checks whether the buffer contains an identifier (no escape).	1026 // Checks whether the buffer contains an identifier (no escape).

1044 if (!buffer->has_more())	1027 if (!buffer->has_more()) return false;

1045 return false;	1028 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

1046 if (!kIsIdentifierStart.get(buffer->GetNext()))

1047 return false;

1048 while (buffer->has_more()) {	1029 while (buffer->has_more()) {

1049 if (!kIsIdentifierPart.get(buffer->GetNext()))	1030 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

1050 return false;

1051 }	1031 }

1052 return true;	1032 return true;

1053 }	1033 }

1054	1034

	1035

1055 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1036 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1056 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1037 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1057 bool in_character_class = false;	1038 bool in_character_class = false;

1058	1039

1059 // Previous token is either '/' or '/=', in the second case, the	1040 // Previous token is either '/' or '/=', in the second case, the

1060 // pattern starts at =.	1041 // pattern starts at =.

1061 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	1042 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1062 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	1043 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1063	1044

1064 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1045 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1065 // the scanner should pass uninterpreted bodies to the RegExp	1046 // the scanner should pass uninterpreted bodies to the RegExp

1066 // constructor.	1047 // constructor.

1067 StartLiteral();	1048 StartLiteral();

1068 if (seen_equal)	1049 if (seen_equal)

1069 AddChar('=');	1050 AddChar('=');

1070	1051

1071 while (c0_ != '/' \|\| in_character_class) {	1052 while (c0_ != '/' \|\| in_character_class) {

1072 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1053 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)

1073 return false;	1054 return false;

1074 if (c0_ == '\\') { // escaped character	1055 if (c0_ == '\\') { // escaped character

1075 AddCharAdvance();	1056 AddCharAdvance();

1076 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1057 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)

1077 return false;	1058 return false;

1078 AddCharAdvance();	1059 AddCharAdvance();

1079 } else { // unescaped character	1060 } else { // unescaped character

1080 if (c0_ == '[')	1061 if (c0_ == '[')

1081 in_character_class = true;	1062 in_character_class = true;

1082 if (c0_ == ']')	1063 if (c0_ == ']')

1083 in_character_class = false;	1064 in_character_class = false;

1084 AddCharAdvance();	1065 AddCharAdvance();

1085 }	1066 }

1086 }	1067 }

1087 Advance(); // consume '/'	1068 Advance(); // consume '/'

1088	1069

1089 TerminateLiteral();	1070 TerminateLiteral();

1090	1071

1091 return true;	1072 return true;

1092 }	1073 }

1093	1074

1094 bool Scanner::ScanRegExpFlags() {	1075 bool Scanner::ScanRegExpFlags() {

1095 // Scan regular expression flags.	1076 // Scan regular expression flags.

1096 StartLiteral();	1077 StartLiteral();

1097 while (kIsIdentifierPart.get(c0_)) {	1078 while (kIsIdentifierPart.get(c0_)) {

1098 if (c0_ == '\\') {	1079 if (c0_ == '\\') {

1099 uc32 c = ScanIdentifierUnicodeEscape();	1080 uc32 c = ScanIdentifierUnicodeEscape();

1100 if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) {	1081 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1101 // We allow any escaped character, unlike the restriction on	1082 // We allow any escaped character, unlike the restriction on

1102 // IdentifierPart when it is used to build an IdentifierName.	1083 // IdentifierPart when it is used to build an IdentifierName.

1103 AddChar(c);	1084 AddChar(c);

1104 continue;	1085 continue;

1105 }	1086 }

1106 }	1087 }

1107 AddCharAdvance();	1088 AddCharAdvance();

1108 }	1089 }

1109 TerminateLiteral();	1090 TerminateLiteral();

1110	1091

1111 next_.location.end_pos = source_pos() - 1;	1092 next_.location.end_pos = source_pos() - 1;

1112 return true;	1093 return true;

1113 }	1094 }

1114	1095

1115 }	1096 } } // namespace v8::internal

1116 } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »