src/scanner.h - Issue 2832018: Made scanner follow coding style.

Side by Side Diff: src/scanner.h

Issue 2832018: Made scanner follow coding style. (Closed)

Patch Set: Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 136 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 // Incrementally recognize keywords.	147 // Incrementally recognize keywords.

148 //	148 //

149 // Recognized keywords:	149 // Recognized keywords:

150 // break case catch const* continue debugger* default delete do else	150 // break case catch const* continue debugger* default delete do else

151 // finally false for function if in instanceof native* new null	151 // finally false for function if in instanceof native* new null

152 // return switch this throw true try typeof var void while with	152 // return switch this throw true try typeof var void while with

153 //	153 //

154 // *: Actually "future reserved keywords". These are the only ones we	154 // *: Actually "future reserved keywords". These are the only ones we

155 // recognized, the remaining are allowed as identifiers.	155 // recognized, the remaining are allowed as identifiers.

156 public:	156 public:

157 KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {}	157 KeywordMatcher()

	158 : state_(INITIAL),

	159 token_(Token::IDENTIFIER),

	160 keyword_(NULL),

	161 counter_(0),

	162 keyword_token_(Token::ILLEGAL) {}

158	163

159 Token::Value token() { return token_; }	164 Token::Value token() { return token_; }

160	165

161 inline void AddChar(uc32 input) {	166 inline void AddChar(uc32 input) {

162 if (state_ != UNMATCHABLE) {	167 if (state_ != UNMATCHABLE) {

163 Step(input);	168 Step(input);

164 }	169 }

165 }	170 }

166	171

167 void Fail() {	172 void Fail() {

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
199 };	204 };

200	205

201 // Range of possible first characters of a keyword.	206 // Range of possible first characters of a keyword.

202 static const unsigned int kFirstCharRangeMin = 'b';	207 static const unsigned int kFirstCharRangeMin = 'b';

203 static const unsigned int kFirstCharRangeMax = 'w';	208 static const unsigned int kFirstCharRangeMax = 'w';

204 static const unsigned int kFirstCharRangeLength =	209 static const unsigned int kFirstCharRangeLength =

205 kFirstCharRangeMax - kFirstCharRangeMin + 1;	210 kFirstCharRangeMax - kFirstCharRangeMin + 1;

206 // State map for first keyword character range.	211 // State map for first keyword character range.

207 static FirstState first_states_[kFirstCharRangeLength];	212 static FirstState first_states_[kFirstCharRangeLength];

208	213

209 // Current state.

210 State state_;

211 // Token for currently added characters.

212 Token::Value token_;

213

214 // Matching a specific keyword string (there is only one possible valid

215 // keyword with the current prefix).

216 const char* keyword_;

217 int counter_;

218 Token::Value keyword_token_;

219

220 // If input equals keyword's character at position, continue matching keyword	214 // If input equals keyword's character at position, continue matching keyword

221 // from that position.	215 // from that position.

222 inline bool MatchKeywordStart(uc32 input,	216 inline bool MatchKeywordStart(uc32 input,

223 const char* keyword,	217 const char* keyword,

224 int position,	218 int position,

225 Token::Value token_if_match) {	219 Token::Value token_if_match) {

226 if (input == keyword[position]) {	220 if (input == keyword[position]) {

227 state_ = KEYWORD_PREFIX;	221 state_ = KEYWORD_PREFIX;

228 this->keyword_ = keyword;	222 this->keyword_ = keyword;

229 this->counter_ = position + 1;	223 this->counter_ = position + 1;

230 this->keyword_token_ = token_if_match;	224 this->keyword_token_ = token_if_match;

231 return true;	225 return true;

232 }	226 }

233 return false;	227 return false;

234 }	228 }

235	229

236 // If input equals match character, transition to new state and return true.	230 // If input equals match character, transition to new state and return true.

237 inline bool MatchState(uc32 input, char match, State new_state) {	231 inline bool MatchState(uc32 input, char match, State new_state) {

238 if (input == match) {	232 if (input == match) {

239 state_ = new_state;	233 state_ = new_state;

240 return true;	234 return true;

241 }	235 }

242 return false;	236 return false;

243 }	237 }

244	238

245 inline bool MatchKeyword(uc32 input,	239 inline bool MatchKeyword(uc32 input,

246 char match,	240 char match,

247 State new_state,	241 State new_state,

248 Token::Value keyword_token) {	242 Token::Value keyword_token) {

249 if (input == match) { // Matched "do".	243 if (input != match) {

250 state_ = new_state;	244 return false;

251 token_ = keyword_token;

252 return true;

253 }	245 }

254 return false;	246 state_ = new_state;

	247 token_ = keyword_token;

	248 return true;

255 }	249 }

256	250

257 void Step(uc32 input);	251 void Step(uc32 input);

	252

	253 // Current state.

	254 State state_;

	255 // Token for currently added characters.

	256 Token::Value token_;

	257

	258 // Matching a specific keyword string (there is only one possible valid

	259 // keyword with the current prefix).

	260 const char* keyword_;

	261 int counter_;

	262 Token::Value keyword_token_;

258 };	263 };

259	264

260	265

261 enum ParserMode { PARSE, PREPARSE };	266 enum ParserMode { PARSE, PREPARSE };

262 enum ParserLanguage { JAVASCRIPT, JSON };	267 enum ParserLanguage { JAVASCRIPT, JSON };

263	268

264	269

265 class Scanner {	270 class Scanner {

266 public:	271 public:

267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;	272 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
355	360

356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;	361 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;

357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;	362 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;

358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;	363 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;

359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;	364 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

360	365

361 static const int kCharacterLookaheadBufferSize = 1;	366 static const int kCharacterLookaheadBufferSize = 1;

362 static const int kNoEndPosition = 1;	367 static const int kNoEndPosition = 1;

363	368

364 private:	369 private:

365 void Init(Handle<String> source,

366 unibrow::CharacterStream* stream,

367 int start_position, int end_position,

368 ParserLanguage language);

369

370

371 // Different UTF16 buffers used to pull characters from. Based on input one of

372 // these will be initialized as the actual data source.

373 CharacterStreamUTF16Buffer char_stream_buffer_;

374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>

375 two_byte_string_buffer_;

376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;

377

378 // Source. Will point to one of the buffers declared above.

379 UTF16Buffer* source_;

380

381 // Used to convert the source string into a character stream when a stream

382 // is not passed to the scanner.

383 SafeStringInputBuffer safe_string_input_buffer_;

384

385 // Buffer to hold literal values (identifiers, strings, numbers)

386 // using 0-terminated UTF-8 encoding.

387 UTF8Buffer literal_buffer_1_;

388 UTF8Buffer literal_buffer_2_;

389

390 bool stack_overflow_;

391 static StaticResource<Utf8Decoder> utf8_decoder_;

392

393 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

394 uc32 c0_;

395

396 // The current and look-ahead token.	370 // The current and look-ahead token.

397 struct TokenDesc {	371 struct TokenDesc {

398 Token::Value token;	372 Token::Value token;

399 Location location;	373 Location location;

400 UTF8Buffer* literal_buffer;	374 UTF8Buffer* literal_buffer;

401 };	375 };

402	376

403 TokenDesc current_; // desc for current token (as returned by Next())	377 void Init(Handle<String> source,

404 TokenDesc next_; // desc for next token (one token look-ahead)	378 unibrow::CharacterStream* stream,

405 bool has_line_terminator_before_next_;	379 int start_position, int end_position,

406 bool is_pre_parsing_;	380 ParserLanguage language);

407 bool is_parsing_json_;

408	381

409 // Literal buffer support	382 // Literal buffer support

410 void StartLiteral();	383 void StartLiteral();

411 void AddChar(uc32 ch);	384 void AddChar(uc32 ch);

412 void AddCharAdvance();	385 void AddCharAdvance();

413 void TerminateLiteral();	386 void TerminateLiteral();

414	387

415 // Low-level scanning support.	388 // Low-level scanning support.

416 void Advance() { c0_ = source_->Advance(); }	389 void Advance() { c0_ = source_->Advance(); }

417 void PushBack(uc32 ch) {	390 void PushBack(uc32 ch) {

418 source_->PushBack(ch);	391 source_->PushBack(ch);

419 c0_ = ch;	392 c0_ = ch;

420 }	393 }

421	394

422 bool SkipWhiteSpace() {	395 bool SkipWhiteSpace() {

423 if (is_parsing_json_) {	396 if (is_parsing_json_) {

424 return SkipJsonWhiteSpace();	397 return SkipJsonWhiteSpace();

425 } else {	398 } else {

426 return SkipJavaScriptWhiteSpace();	399 return SkipJavaScriptWhiteSpace();

427 }	400 }

428 }	401 }

	402

429 bool SkipJavaScriptWhiteSpace();	403 bool SkipJavaScriptWhiteSpace();

430 bool SkipJsonWhiteSpace();	404 bool SkipJsonWhiteSpace();

431 Token::Value SkipSingleLineComment();	405 Token::Value SkipSingleLineComment();

432 Token::Value SkipMultiLineComment();	406 Token::Value SkipMultiLineComment();

433	407

434 inline Token::Value Select(Token::Value tok);	408 inline Token::Value Select(Token::Value tok);

435 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);	409 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);

436	410

437 inline void Scan() {	411 inline void Scan() {

438 if (is_parsing_json_) {	412 if (is_parsing_json_) {

(...skipping 14 matching lines...) Expand all Loading...
453 // carrige-return, newline and space.	427 // carrige-return, newline and space.

454 void ScanJson();	428 void ScanJson();

455	429

456 // A JSON number (production JSONNumber) is a subset of the valid JavaScript	430 // A JSON number (production JSONNumber) is a subset of the valid JavaScript

457 // decimal number literals.	431 // decimal number literals.

458 // It includes an optional minus sign, must have at least one	432 // It includes an optional minus sign, must have at least one

459 // digit before and after a decimal point, may not have prefixed zeros (unless	433 // digit before and after a decimal point, may not have prefixed zeros (unless

460 // the integer part is zero), and may include an exponent part (e.g., "e-10").	434 // the integer part is zero), and may include an exponent part (e.g., "e-10").

461 // Hexadecimal and octal numbers are not allowed.	435 // Hexadecimal and octal numbers are not allowed.

462 Token::Value ScanJsonNumber();	436 Token::Value ScanJsonNumber();

	437

463 // A JSON string (production JSONString) is subset of valid JavaScript string	438 // A JSON string (production JSONString) is subset of valid JavaScript string

464 // literals. The string must only be double-quoted (not single-quoted), and	439 // literals. The string must only be double-quoted (not single-quoted), and

465 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and	440 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and

466 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.	441 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.

467 Token::Value ScanJsonString();	442 Token::Value ScanJsonString();

	443

468 // Used to recognizes one of the literals "true", "false", or "null". These	444 // Used to recognizes one of the literals "true", "false", or "null". These

469 // are the only valid JSON identifiers (productions JSONBooleanLiteral,	445 // are the only valid JSON identifiers (productions JSONBooleanLiteral,

470 // JSONNullLiteral).	446 // JSONNullLiteral).

471 Token::Value ScanJsonIdentifier(const char* text, Token::Value token);	447 Token::Value ScanJsonIdentifier(const char* text, Token::Value token);

472	448

473 void ScanDecimalDigits();	449 void ScanDecimalDigits();

474 Token::Value ScanNumber(bool seen_period);	450 Token::Value ScanNumber(bool seen_period);

475 Token::Value ScanIdentifier();	451 Token::Value ScanIdentifier();

476 uc32 ScanHexEscape(uc32 c, int length);	452 uc32 ScanHexEscape(uc32 c, int length);

477 uc32 ScanOctalEscape(uc32 c, int length);	453 uc32 ScanOctalEscape(uc32 c, int length);

478 void ScanEscape();	454 void ScanEscape();

479 Token::Value ScanString();	455 Token::Value ScanString();

480	456

481 // Scans a possible HTML comment -- begins with '<!'.	457 // Scans a possible HTML comment -- begins with '<!'.

482 Token::Value ScanHtmlComment();	458 Token::Value ScanHtmlComment();

483	459

484 // Return the current source position.	460 // Return the current source position.

485 int source_pos() {	461 int source_pos() {

486 return source_->pos() - kCharacterLookaheadBufferSize;	462 return source_->pos() - kCharacterLookaheadBufferSize;

487 }	463 }

488	464

489 // Decodes a unicode escape-sequence which is part of an identifier.	465 // Decodes a unicode escape-sequence which is part of an identifier.

490 // If the escape sequence cannot be decoded the result is kBadRune.	466 // If the escape sequence cannot be decoded the result is kBadRune.

491 uc32 ScanIdentifierUnicodeEscape();	467 uc32 ScanIdentifierUnicodeEscape();

	468

	469 TokenDesc current_; // desc for current token (as returned by Next())

	470 TokenDesc next_; // desc for next token (one token look-ahead)

	471 bool has_line_terminator_before_next_;

	472 bool is_pre_parsing_;

	473 bool is_parsing_json_;

	474

	475 // Different UTF16 buffers used to pull characters from. Based on input one of

	476 // these will be initialized as the actual data source.

	477 CharacterStreamUTF16Buffer char_stream_buffer_;

	478 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>

	479 two_byte_string_buffer_;

	480 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;

	481

	482 // Source. Will point to one of the buffers declared above.

	483 UTF16Buffer* source_;

	484

	485 // Used to convert the source string into a character stream when a stream

	486 // is not passed to the scanner.

	487 SafeStringInputBuffer safe_string_input_buffer_;

	488

	489 // Buffer to hold literal values (identifiers, strings, numbers)

	490 // using 0-terminated UTF-8 encoding.

	491 UTF8Buffer literal_buffer_1_;

	492 UTF8Buffer literal_buffer_2_;

	493

	494 bool stack_overflow_;

	495 static StaticResource<Utf8Decoder> utf8_decoder_;

	496

	497 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

	498 uc32 c0_;

492 };	499 };

493	500

494 } } // namespace v8::internal	501 } } // namespace v8::internal

495	502

496 #endif // V8_SCANNER_H_	503 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « no previous file | src/scanner.cc » ('j') | no next file with comments »