src/scanner-base.h - Issue 6577036: [Isolates] Merge from bleeding_edge to isolates, revisions 6100-6300.

Side by Side Diff: src/scanner-base.h

Issue 6577036: [Isolates] Merge from bleeding_edge to isolates, revisions 6100-6300. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/isolates/

Patch Set: '' Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
142 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;	142 unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;

143 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;	143 unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;

144 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;	144 unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

145 StaticResource<Utf8Decoder> utf8_decoder_;	145 StaticResource<Utf8Decoder> utf8_decoder_;

146	146

147 friend class Isolate;	147 friend class Isolate;

148 DISALLOW_COPY_AND_ASSIGN(ScannerConstants);	148 DISALLOW_COPY_AND_ASSIGN(ScannerConstants);

149 };	149 };

150	150

151 // ----------------------------------------------------------------------------	151 // ----------------------------------------------------------------------------

152 // LiteralCollector - Collector of chars of literals.	152 // LiteralBuffer - Collector of chars of literals.

153	153

154 class LiteralCollector {	154 class LiteralBuffer {

155 public:	155 public:

156 LiteralCollector();	156 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }

157 ~LiteralCollector();

158	157

159 inline void AddChar(uc32 c) {	158 ~LiteralBuffer() {

160 if (recording_) {	159 if (backing_store_.length() > 0) {

161 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {	160 backing_store_.Dispose();

162 buffer_.Add(static_cast<char>(c));

163 } else {

164 AddCharSlow(c);

165 }

166 }	161 }

167 }	162 }

168	163

169 void StartLiteral() {	164 inline void AddChar(uc16 character) {

170 buffer_.StartSequence();	165 if (position_ >= backing_store_.length()) ExpandBuffer();

171 recording_ = true;	166 if (is_ascii_) {

	167 if (character < kMaxAsciiCharCodeU) {

	168 backing_store_[position_] = static_cast<byte>(character);

	169 position_ += kASCIISize;

	170 return;

	171 }

	172 ConvertToUC16();

	173 }

	174 reinterpret_cast<uc16>(&backing_store_[position_]) = character;

	175 position_ += kUC16Size;

172 }	176 }

173	177

174 Vector<const char> EndLiteral() {	178 bool is_ascii() { return is_ascii_; }

175 if (recording_) {	179

176 recording_ = false;	180 Vector<const uc16> uc16_literal() {

177 buffer_.Add(kEndMarker);	181 ASSERT(!is_ascii_);

178 Vector<char> sequence = buffer_.EndSequence();	182 ASSERT((position_ & 0x1) == 0);

179 return Vector<const char>(sequence.start(), sequence.length());	183 return Vector<const uc16>(

180 }	184 reinterpret_cast<const uc16*>(backing_store_.start()),

181 return Vector<const char>();	185 position_ >> 1);

182 }	186 }

183	187

184 void DropLiteral() {	188 Vector<const char> ascii_literal() {

185 if (recording_) {	189 ASSERT(is_ascii_);

186 recording_ = false;	190 return Vector<const char>(

187 buffer_.DropSequence();	191 reinterpret_cast<const char*>(backing_store_.start()),

188 }	192 position_);

	193 }

	194

	195 int length() {

	196 return is_ascii_ ? position_ : (position_ >> 1);

189 }	197 }

190	198

191 void Reset() {	199 void Reset() {

192 buffer_.Reset();	200 position_ = 0;

	201 is_ascii_ = true;

	202 }

	203 private:

	204 static const int kInitialCapacity = 16;

	205 static const int kGrowthFactory = 4;

	206 static const int kMinConversionSlack = 256;

	207 static const int kMaxGrowth = 1 * MB;

	208 inline int NewCapacity(int min_capacity) {

	209 int capacity = Max(min_capacity, backing_store_.length());

	210 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);

	211 return new_capacity;

193 }	212 }

194	213

195 // The end marker added after a parsed literal.	214 void ExpandBuffer() {

196 // Using zero allows the usage of strlen and similar functions on	215 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));

197 // identifiers and numbers (but not strings, since they may contain zero	216 memcpy(new_store.start(), backing_store_.start(), position_);

198 // bytes).	217 backing_store_.Dispose();

199 static const char kEndMarker = '\x00';	218 backing_store_ = new_store;

200 private:	219 }

201 static const int kInitialCapacity = 256;	220

202 SequenceCollector<char, 4> buffer_;	221 void ConvertToUC16() {

203 bool recording_;	222 ASSERT(is_ascii_);

204 void AddCharSlow(uc32 c);	223 Vector<byte> new_store;

	224 int new_content_size = position_ * kUC16Size;

	225 if (new_content_size >= backing_store_.length()) {

	226 // Ensure room for all currently read characters as UC16 as well

	227 // as the character about to be stored.

	228 new_store = Vector<byte>::New(NewCapacity(new_content_size));

	229 } else {

	230 new_store = backing_store_;

	231 }

	232 char* src = reinterpret_cast<char*>(backing_store_.start());

	233 uc16* dst = reinterpret_cast<uc16*>(new_store.start());

	234 for (int i = position_ - 1; i >= 0; i--) {

	235 dst[i] = src[i];

	236 }

	237 if (new_store.start() != backing_store_.start()) {

	238 backing_store_.Dispose();

	239 backing_store_ = new_store;

	240 }

	241 position_ = new_content_size;

	242 is_ascii_ = false;

	243 }

	244

	245 bool is_ascii_;

	246 int position_;

	247 Vector<byte> backing_store_;

205 };	248 };

206	249

	250

207 // ----------------------------------------------------------------------------	251 // ----------------------------------------------------------------------------

208 // Scanner base-class.	252 // Scanner base-class.

209	253

210 // Generic functionality used by both JSON and JavaScript scanners.	254 // Generic functionality used by both JSON and JavaScript scanners.

211 class Scanner {	255 class Scanner {

212 public:	256 public:

213 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;	257 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

214	258

215 class LiteralScope {	259 class LiteralScope {

216 public:	260 public:

(...skipping 25 matching lines...) Expand all Loading...
242 // (the token returned by Next()).	286 // (the token returned by Next()).

243 Location location() const { return current_.location; }	287 Location location() const { return current_.location; }

244 Location peek_location() const { return next_.location; }	288 Location peek_location() const { return next_.location; }

245	289

246 // Returns the literal string, if any, for the current token (the	290 // Returns the literal string, if any, for the current token (the

247 // token returned by Next()). The string is 0-terminated and in	291 // token returned by Next()). The string is 0-terminated and in

248 // UTF-8 format; they may contain 0-characters. Literal strings are	292 // UTF-8 format; they may contain 0-characters. Literal strings are

249 // collected for identifiers, strings, and numbers.	293 // collected for identifiers, strings, and numbers.

250 // These functions only give the correct result if the literal	294 // These functions only give the correct result if the literal

251 // was scanned between calls to StartLiteral() and TerminateLiteral().	295 // was scanned between calls to StartLiteral() and TerminateLiteral().

252 const char* literal_string() const {	296 bool is_literal_ascii() {

253 return current_.literal_chars.start();	297 ASSERT_NOT_NULL(current_.literal_chars);

	298 return current_.literal_chars->is_ascii();

254 }	299 }

255	300 Vector<const char> literal_ascii_string() {

	301 ASSERT_NOT_NULL(current_.literal_chars);

	302 return current_.literal_chars->ascii_literal();

	303 }

	304 Vector<const uc16> literal_uc16_string() {

	305 ASSERT_NOT_NULL(current_.literal_chars);

	306 return current_.literal_chars->uc16_literal();

	307 }

256 int literal_length() const {	308 int literal_length() const {

257 // Excluding terminal '\x00' added by TerminateLiteral().	309 ASSERT_NOT_NULL(current_.literal_chars);

258 return current_.literal_chars.length() - 1;	310 return current_.literal_chars->length();

259 }

260

261 Vector<const char> literal() const {

262 return Vector<const char>(literal_string(), literal_length());

263 }	311 }

264	312

265 // Returns the literal string for the next token (the token that	313 // Returns the literal string for the next token (the token that

266 // would be returned if Next() were called).	314 // would be returned if Next() were called).

267 const char* next_literal_string() const {	315 bool is_next_literal_ascii() {

268 return next_.literal_chars.start();	316 ASSERT_NOT_NULL(next_.literal_chars);

	317 return next_.literal_chars->is_ascii();

269 }	318 }

270	319 Vector<const char> next_literal_ascii_string() {

271	320 ASSERT_NOT_NULL(next_.literal_chars);

272 // Returns the length of the next token (that would be returned if	321 return next_.literal_chars->ascii_literal();

273 // Next() were called).	322 }

	323 Vector<const uc16> next_literal_uc16_string() {

	324 ASSERT_NOT_NULL(next_.literal_chars);

	325 return next_.literal_chars->uc16_literal();

	326 }

274 int next_literal_length() const {	327 int next_literal_length() const {

275 // Excluding terminal '\x00' added by TerminateLiteral().	328 ASSERT_NOT_NULL(next_.literal_chars);

276 return next_.literal_chars.length() - 1;	329 return next_.literal_chars->length();

277 }

278

279 Vector<const char> next_literal() const {

280 return Vector<const char>(next_literal_string(), next_literal_length());

281 }	330 }

282	331

283 static const int kCharacterLookaheadBufferSize = 1;	332 static const int kCharacterLookaheadBufferSize = 1;

284	333

285 protected:	334 protected:

286 // The current and look-ahead token.	335 // The current and look-ahead token.

287 struct TokenDesc {	336 struct TokenDesc {

288 Token::Value token;	337 Token::Value token;

289 Location location;	338 Location location;

290 Vector<const char> literal_chars;	339 LiteralBuffer* literal_chars;

291 };	340 };

292	341

293 // Call this after setting source_ to the input.	342 // Call this after setting source_ to the input.

294 void Init() {	343 void Init() {

295 // Set c0_ (one character ahead)	344 // Set c0_ (one character ahead)

296 ASSERT(kCharacterLookaheadBufferSize == 1);	345 ASSERT(kCharacterLookaheadBufferSize == 1);

297 Advance();	346 Advance();

298 // Initialize current_ to not refer to a literal.	347 // Initialize current_ to not refer to a literal.

299 current_.literal_chars = Vector<const char>();	348 current_.literal_chars = NULL;

300 // Reset literal buffer.

301 literal_buffer_.Reset();

302 }	349 }

303	350

304 // Literal buffer support	351 // Literal buffer support

305 inline void StartLiteral() {	352 inline void StartLiteral() {

306 literal_buffer_.StartLiteral();	353 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?

	354 &literal_buffer2_ : &literal_buffer1_;

	355 free_buffer->Reset();

	356 next_.literal_chars = free_buffer;

307 }	357 }

308	358

309 inline void AddLiteralChar(uc32 c) {	359 inline void AddLiteralChar(uc32 c) {

310 literal_buffer_.AddChar(c);	360 ASSERT_NOT_NULL(next_.literal_chars);

	361 next_.literal_chars->AddChar(c);

311 }	362 }

312	363

313 // Complete scanning of a literal.	364 // Complete scanning of a literal.

314 inline void TerminateLiteral() {	365 inline void TerminateLiteral() {

315 next_.literal_chars = literal_buffer_.EndLiteral();	366 // Does nothing in the current implementation.

316 }	367 }

317	368

318 // Stops scanning of a literal and drop the collected characters,	369 // Stops scanning of a literal and drop the collected characters,

319 // e.g., due to an encountered error.	370 // e.g., due to an encountered error.

320 inline void DropLiteral() {	371 inline void DropLiteral() {

321 literal_buffer_.DropLiteral();	372 next_.literal_chars = NULL;

322 }	373 }

323	374

324 inline void AddLiteralCharAdvance() {	375 inline void AddLiteralCharAdvance() {

325 AddLiteralChar(c0_);	376 AddLiteralChar(c0_);

326 Advance();	377 Advance();

327 }	378 }

328	379

329 // Low-level scanning support.	380 // Low-level scanning support.

330 void Advance() { c0_ = source_->Advance(); }	381 void Advance() { c0_ = source_->Advance(); }

331 void PushBack(uc32 ch) {	382 void PushBack(uc32 ch) {

(...skipping 19 matching lines...) Expand all Loading...
351 uc32 ScanHexEscape(uc32 c, int length);	402 uc32 ScanHexEscape(uc32 c, int length);

352 uc32 ScanOctalEscape(uc32 c, int length);	403 uc32 ScanOctalEscape(uc32 c, int length);

353	404

354 // Return the current source position.	405 // Return the current source position.

355 int source_pos() {	406 int source_pos() {

356 return source_->pos() - kCharacterLookaheadBufferSize;	407 return source_->pos() - kCharacterLookaheadBufferSize;

357 }	408 }

358	409

359 ScannerConstants* scanner_constants_;	410 ScannerConstants* scanner_constants_;

360	411

	412 // Buffers collecting literal strings, numbers, etc.

	413 LiteralBuffer literal_buffer1_;

	414 LiteralBuffer literal_buffer2_;

	415

361 TokenDesc current_; // desc for current token (as returned by Next())	416 TokenDesc current_; // desc for current token (as returned by Next())

362 TokenDesc next_; // desc for next token (one token look-ahead)	417 TokenDesc next_; // desc for next token (one token look-ahead)

363	418

364 // Input stream. Must be initialized to an UC16CharacterStream.	419 // Input stream. Must be initialized to an UC16CharacterStream.

365 UC16CharacterStream* source_;	420 UC16CharacterStream* source_;

366	421

367 // Buffer to hold literal values (identifiers, strings, numbers)

368 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.

369 LiteralCollector literal_buffer_;

370	422

371 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	423 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

372 uc32 c0_;	424 uc32 c0_;

373 };	425 };

374	426

375 // ----------------------------------------------------------------------------	427 // ----------------------------------------------------------------------------

376 // JavaScriptScanner - base logic for JavaScript scanning.	428 // JavaScriptScanner - base logic for JavaScript scanning.

377	429

378 class JavaScriptScanner : public Scanner {	430 class JavaScriptScanner : public Scanner {

379 public:	431 public:

380

381 // Bit vector representing set of types of literals.

382 enum LiteralType {

383 kNoLiterals = 0,

384 kLiteralNumber = 1,

385 kLiteralIdentifier = 2,

386 kLiteralString = 4,

387 kLiteralRegExp = 8,

388 kLiteralRegExpFlags = 16,

389 kAllLiterals = 31

390 };

391

392 // A LiteralScope that disables recording of some types of JavaScript	432 // A LiteralScope that disables recording of some types of JavaScript

393 // literals. If the scanner is configured to not record the specific	433 // literals. If the scanner is configured to not record the specific

394 // type of literal, the scope will not call StartLiteral.	434 // type of literal, the scope will not call StartLiteral.

395 class LiteralScope {	435 class LiteralScope {

396 public:	436 public:

397 LiteralScope(JavaScriptScanner* self, LiteralType type)	437 explicit LiteralScope(JavaScriptScanner* self)

398 : scanner_(self), complete_(false) {	438 : scanner_(self), complete_(false) {

399 if (scanner_->RecordsLiteral(type)) {	439 scanner_->StartLiteral();

400 scanner_->StartLiteral();

401 }

402 }	440 }

403 ~LiteralScope() {	441 ~LiteralScope() {

404 if (!complete_) scanner_->DropLiteral();	442 if (!complete_) scanner_->DropLiteral();

405 }	443 }

406 void Complete() {	444 void Complete() {

407 scanner_->TerminateLiteral();	445 scanner_->TerminateLiteral();

408 complete_ = true;	446 complete_ = true;

409 }	447 }

410	448

411 private:	449 private:

(...skipping 21 matching lines...) Expand all Loading...
433 // Tells whether the buffer contains an identifier (no escapes).	471 // Tells whether the buffer contains an identifier (no escapes).

434 // Used for checking if a property name is an identifier.	472 // Used for checking if a property name is an identifier.

435 static bool IsIdentifier(unibrow::CharacterStream* buffer);	473 static bool IsIdentifier(unibrow::CharacterStream* buffer);

436	474

437 // Seek forward to the given position. This operation does not	475 // Seek forward to the given position. This operation does not

438 // work in general, for instance when there are pushed back	476 // work in general, for instance when there are pushed back

439 // characters, but works for seeking forward until simple delimiter	477 // characters, but works for seeking forward until simple delimiter

440 // tokens, which is what it is used for.	478 // tokens, which is what it is used for.

441 void SeekForward(int pos);	479 void SeekForward(int pos);

442	480

443 // Whether this scanner records the given literal type or not.

444 bool RecordsLiteral(LiteralType type) {

445 return (literal_flags_ & type) != 0;

446 }

447

448 protected:	481 protected:

449 bool SkipWhiteSpace();	482 bool SkipWhiteSpace();

450 Token::Value SkipSingleLineComment();	483 Token::Value SkipSingleLineComment();

451 Token::Value SkipMultiLineComment();	484 Token::Value SkipMultiLineComment();

452	485

453 // Scans a single JavaScript token.	486 // Scans a single JavaScript token.

454 void Scan();	487 void Scan();

455	488

456 void ScanDecimalDigits();	489 void ScanDecimalDigits();

457 Token::Value ScanNumber(bool seen_period);	490 Token::Value ScanNumber(bool seen_period);

458 Token::Value ScanIdentifierOrKeyword();	491 Token::Value ScanIdentifierOrKeyword();

459 Token::Value ScanIdentifierSuffix(LiteralScope* literal);	492 Token::Value ScanIdentifierSuffix(LiteralScope* literal);

460	493

461 void ScanEscape();	494 void ScanEscape();

462 Token::Value ScanString();	495 Token::Value ScanString();

463	496

464 // Scans a possible HTML comment -- begins with '<!'.	497 // Scans a possible HTML comment -- begins with '<!'.

465 Token::Value ScanHtmlComment();	498 Token::Value ScanHtmlComment();

466	499

467 // Decodes a unicode escape-sequence which is part of an identifier.	500 // Decodes a unicode escape-sequence which is part of an identifier.

468 // If the escape sequence cannot be decoded the result is kBadChar.	501 // If the escape sequence cannot be decoded the result is kBadChar.

469 uc32 ScanIdentifierUnicodeEscape();	502 uc32 ScanIdentifierUnicodeEscape();

470	503

471 int literal_flags_;

472 bool has_line_terminator_before_next_;	504 bool has_line_terminator_before_next_;

473 };	505 };

474	506

475	507

476 // ----------------------------------------------------------------------------	508 // ----------------------------------------------------------------------------

477 // Keyword matching state machine.	509 // Keyword matching state machine.

478	510

479 class KeywordMatcher {	511 class KeywordMatcher {

480 // Incrementally recognize keywords.	512 // Incrementally recognize keywords.

481 //	513 //

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
594 // keyword with the current prefix).	626 // keyword with the current prefix).

595 const char* keyword_;	627 const char* keyword_;

596 int counter_;	628 int counter_;

597 Token::Value keyword_token_;	629 Token::Value keyword_token_;

598 };	630 };

599	631

600	632

601 } } // namespace v8::internal	633 } } // namespace v8::internal

602	634

603 #endif // V8_SCANNER_BASE_H_	635 #endif // V8_SCANNER_BASE_H_

OLD	NEW

« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »