src/scanner.h - Issue 198583003: Convert scanner buffers to use standard character types.

Side by Side Diff: src/scanner.h

Issue 198583003: Convert scanner buffers to use standard character types. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
113 protected:	113 protected:

114 static const uc32 kEndOfInput = -1;	114 static const uc32 kEndOfInput = -1;

115	115

116 // Ensures that the buffer_cursor_ points to the code_unit at	116 // Ensures that the buffer_cursor_ points to the code_unit at

117 // position pos_ of the input, if possible. If the position	117 // position pos_ of the input, if possible. If the position

118 // is at or after the end of the input, return false. If there	118 // is at or after the end of the input, return false. If there

119 // are more code_units available, return true.	119 // are more code_units available, return true.

120 virtual bool ReadBlock() = 0;	120 virtual bool ReadBlock() = 0;

121 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;	121 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;

122	122

123 const uc16* buffer_cursor_;	123 const uint16_t* buffer_cursor_;

124 const uc16* buffer_end_;	124 const uint16_t* buffer_end_;

125 unsigned pos_;	125 unsigned pos_;

126 };	126 };

127	127

128	128

129 // ---------------------------------------------------------------------	129 // ---------------------------------------------------------------------

130 // Caching predicates used by scanners.	130 // Caching predicates used by scanners.

131	131

132 class UnicodeCache {	132 class UnicodeCache {

133 public:	133 public:

134 UnicodeCache() {}	134 UnicodeCache() {}

(...skipping 27 matching lines...) Expand all Loading...
162 // ---------------------------------------------------------------------	162 // ---------------------------------------------------------------------

163 // DuplicateFinder discovers duplicate symbols.	163 // DuplicateFinder discovers duplicate symbols.

164	164

165 class DuplicateFinder {	165 class DuplicateFinder {

166 public:	166 public:

167 explicit DuplicateFinder(UnicodeCache* constants)	167 explicit DuplicateFinder(UnicodeCache* constants)

168 : unicode_constants_(constants),	168 : unicode_constants_(constants),

169 backing_store_(16),	169 backing_store_(16),

170 map_(&Match) { }	170 map_(&Match) { }

171	171

172 int AddAsciiSymbol(Vector<const char> key, int value);	172 int AddOneByteSymbol(Vector<const uint8_t> key, int value);

173 int AddUtf16Symbol(Vector<const uint16_t> key, int value);	173 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);

174 // Add a a number literal by converting it (if necessary)	174 // Add a a number literal by converting it (if necessary)

175 // to the string that ToString(ToNumber(literal)) would generate.	175 // to the string that ToString(ToNumber(literal)) would generate.

176 // and then adding that string with AddAsciiSymbol.	176 // and then adding that string with AddAsciiSymbol.

177 // This string is the actual value used as key in an object literal,	177 // This string is the actual value used as key in an object literal,

178 // and the one that must be different from the other keys.	178 // and the one that must be different from the other keys.

179 int AddNumber(Vector<const char> key, int value);	179 int AddNumber(Vector<const uint8_t> key, int value);

180	180

181 private:	181 private:

182 int AddSymbol(Vector<const byte> key, bool is_one_byte, int value);	182 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);

183 // Backs up the key and its length in the backing store.	183 // Backs up the key and its length in the backing store.

184 // The backup is stored with a base 127 encoding of the	184 // The backup is stored with a base 127 encoding of the

185 // length (plus a bit saying whether the string is ASCII),	185 // length (plus a bit saying whether the string is one byte),

186 // followed by the bytes of the key.	186 // followed by the bytes of the key.

187 byte* BackupKey(Vector<const byte> key, bool is_one_byte);	187 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);

188	188

189 // Compare two encoded keys (both pointing into the backing store)	189 // Compare two encoded keys (both pointing into the backing store)

190 // for having the same base-127 encoded lengths and ASCII-ness,	190 // for having the same base-127 encoded lengths and ASCII-ness,

191 // and then having the same 'length' bytes following.	191 // and then having the same 'length' bytes following.

192 static bool Match(void* first, void* second);	192 static bool Match(void* first, void* second);

193 // Creates a hash from a sequence of bytes.	193 // Creates a hash from a sequence of bytes.

194 static uint32_t Hash(Vector<const byte> key, bool is_one_byte);	194 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);

195 // Checks whether a string containing a JS number is its canonical	195 // Checks whether a string containing a JS number is its canonical

196 // form.	196 // form.

197 static bool IsNumberCanonical(Vector<const char> key);	197 static bool IsNumberCanonical(Vector<const uint8_t> key);

198	198

199 // Size of buffer. Sufficient for using it to call DoubleToCString in	199 // Size of buffer. Sufficient for using it to call DoubleToCString in

200 // from conversions.h.	200 // from conversions.h.

201 static const int kBufferSize = 100;	201 static const int kBufferSize = 100;

202	202

203 UnicodeCache* unicode_constants_;	203 UnicodeCache* unicode_constants_;

204 // Backing store used to store strings used as hashmap keys.	204 // Backing store used to store strings used as hashmap keys.

205 SequenceCollector<unsigned char> backing_store_;	205 SequenceCollector<unsigned char> backing_store_;

206 HashMap map_;	206 HashMap map_;

207 // Buffer used for string->number->canonical string conversions.	207 // Buffer used for string->number->canonical string conversions.

(...skipping 15 matching lines...) Expand all Loading...
223 }	223 }

224	224

225 INLINE(void AddChar(uint32_t code_unit)) {	225 INLINE(void AddChar(uint32_t code_unit)) {

226 if (position_ >= backing_store_.length()) ExpandBuffer();	226 if (position_ >= backing_store_.length()) ExpandBuffer();

227 if (is_one_byte_) {	227 if (is_one_byte_) {

228 if (code_unit <= unibrow::Latin1::kMaxChar) {	228 if (code_unit <= unibrow::Latin1::kMaxChar) {

229 backing_store_[position_] = static_cast<byte>(code_unit);	229 backing_store_[position_] = static_cast<byte>(code_unit);

230 position_ += kOneByteSize;	230 position_ += kOneByteSize;

231 return;	231 return;

232 }	232 }

233 ConvertToUtf16();	233 ConvertToTwoByte();

234 }	234 }

235 ASSERT(code_unit < 0x10000u);	235 ASSERT(code_unit < 0x10000u);

236 reinterpret_cast<uc16>(&backing_store_[position_]) = code_unit;	236 reinterpret_cast<uint16_t>(&backing_store_[position_]) = code_unit;

237 position_ += kUC16Size;	237 position_ += kUC16Size;

238 }	238 }

239	239

240 bool is_one_byte() { return is_one_byte_; }	240 bool is_one_byte() { return is_one_byte_; }

241	241

242 bool is_contextual_keyword(Vector<const char> keyword) {	242 bool is_contextual_keyword(Vector<const char> keyword) {

243 return is_one_byte() && keyword.length() == position_ &&	243 return is_one_byte() && keyword.length() == position_ &&

244 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);	244 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);

245 }	245 }

246	246

247 Vector<const uc16> utf16_literal() {	247 Vector<const uint16_t> two_byte_literal() {

248 ASSERT(!is_one_byte_);	248 ASSERT(!is_one_byte_);

249 ASSERT((position_ & 0x1) == 0);	249 ASSERT((position_ & 0x1) == 0);

250 return Vector<const uc16>(	250 return Vector<const uint16_t>(

251 reinterpret_cast<const uc16*>(backing_store_.start()),	251 reinterpret_cast<const uint16_t*>(backing_store_.start()),

252 position_ >> 1);	252 position_ >> 1);

253 }	253 }

254	254

255 Vector<const char> one_byte_literal() {	255 Vector<const uint8_t> one_byte_literal() {

256 ASSERT(is_one_byte_);	256 ASSERT(is_one_byte_);

257 return Vector<const char>(	257 return Vector<const uint8_t>(

258 reinterpret_cast<const char*>(backing_store_.start()),	258 reinterpret_cast<const uint8_t*>(backing_store_.start()),

259 position_);	259 position_);

260 }	260 }

261	261

262 int length() {	262 int length() {

263 return is_one_byte_ ? position_ : (position_ >> 1);	263 return is_one_byte_ ? position_ : (position_ >> 1);

264 }	264 }

265	265

266 void Reset() {	266 void Reset() {

267 position_ = 0;	267 position_ = 0;

268 is_one_byte_ = true;	268 is_one_byte_ = true;

(...skipping 10 matching lines...) Expand all Loading...
279 return new_capacity;	279 return new_capacity;

280 }	280 }

281	281

282 void ExpandBuffer() {	282 void ExpandBuffer() {

283 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));	283 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));

284 OS::MemCopy(new_store.start(), backing_store_.start(), position_);	284 OS::MemCopy(new_store.start(), backing_store_.start(), position_);

285 backing_store_.Dispose();	285 backing_store_.Dispose();

286 backing_store_ = new_store;	286 backing_store_ = new_store;

287 }	287 }

288	288

289 void ConvertToUtf16() {	289 void ConvertToTwoByte() {

290 ASSERT(is_one_byte_);	290 ASSERT(is_one_byte_);

291 Vector<byte> new_store;	291 Vector<byte> new_store;

292 int new_content_size = position_ * kUC16Size;	292 int new_content_size = position_ * kUC16Size;

293 if (new_content_size >= backing_store_.length()) {	293 if (new_content_size >= backing_store_.length()) {

294 // Ensure room for all currently read code units as UC16 as well	294 // Ensure room for all currently read code units as UC16 as well

295 // as the code unit about to be stored.	295 // as the code unit about to be stored.

296 new_store = Vector<byte>::New(NewCapacity(new_content_size));	296 new_store = Vector<byte>::New(NewCapacity(new_content_size));

297 } else {	297 } else {

298 new_store = backing_store_;	298 new_store = backing_store_;

299 }	299 }

300 uint8_t* src = backing_store_.start();	300 uint8_t* src = backing_store_.start();

301 uc16* dst = reinterpret_cast<uc16*>(new_store.start());	301 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());

302 for (int i = position_ - 1; i >= 0; i--) {	302 for (int i = position_ - 1; i >= 0; i--) {

303 dst[i] = src[i];	303 dst[i] = src[i];

304 }	304 }

305 if (new_store.start() != backing_store_.start()) {	305 if (new_store.start() != backing_store_.start()) {

306 backing_store_.Dispose();	306 backing_store_.Dispose();

307 backing_store_ = new_store;	307 backing_store_ = new_store;

308 }	308 }

309 position_ = new_content_size;	309 position_ = new_content_size;

310 is_one_byte_ = false;	310 is_one_byte_ = false;

311 }	311 }

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
401	401

402 Handle<String> AllocateNextLiteralString(Isolate* isolate,	402 Handle<String> AllocateNextLiteralString(Isolate* isolate,

403 PretenureFlag tenured);	403 PretenureFlag tenured);

404 Handle<String> AllocateInternalizedString(Isolate* isolate);	404 Handle<String> AllocateInternalizedString(Isolate* isolate);

405	405

406 double DoubleValue();	406 double DoubleValue();

407 bool UnescapedLiteralMatches(const char* data, int length) {	407 bool UnescapedLiteralMatches(const char* data, int length) {

408 if (is_literal_one_byte() &&	408 if (is_literal_one_byte() &&

409 literal_length() == length &&	409 literal_length() == length &&

410 !literal_contains_escapes()) {	410 !literal_contains_escapes()) {

411 return !strncmp(literal_one_byte_string().start(), data, length);	411 const char* token =

	412 reinterpret_cast<const char*>(literal_one_byte_string().start());

	413 return !strncmp(token, data, length);

412 }	414 }

413 return false;	415 return false;

414 }	416 }

415 void IsGetOrSet(bool* is_get, bool* is_set) {	417 void IsGetOrSet(bool* is_get, bool* is_set) {

416 if (is_literal_one_byte() &&	418 if (is_literal_one_byte() &&

417 literal_length() == 3 &&	419 literal_length() == 3 &&

418 !literal_contains_escapes()) {	420 !literal_contains_escapes()) {

419 const char* token = literal_one_byte_string().start();	421 const char* token =

	422 reinterpret_cast<const char*>(literal_one_byte_string().start());

420 *is_get = strncmp(token, "get", 3) == 0;	423 *is_get = strncmp(token, "get", 3) == 0;

421 is_set = !is_get && strncmp(token, "set", 3) == 0;	424 is_set = !is_get && strncmp(token, "set", 3) == 0;

422 }	425 }

423 }	426 }

424	427

425 int FindNumber(DuplicateFinder* finder, int value);	428 int FindNumber(DuplicateFinder* finder, int value);

426 int FindSymbol(DuplicateFinder* finder, int value);	429 int FindSymbol(DuplicateFinder* finder, int value);

427	430

428 void LogSymbol(ParserRecorder* log, int position);	431 void LogSymbol(ParserRecorder* log, int position);

429	432

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
544 return else_;	547 return else_;

545 }	548 }

546 }	549 }

547	550

548 // Returns the literal string, if any, for the current token (the	551 // Returns the literal string, if any, for the current token (the

549 // token last returned by Next()). The string is 0-terminated.	552 // token last returned by Next()). The string is 0-terminated.

550 // Literal strings are collected for identifiers, strings, and	553 // Literal strings are collected for identifiers, strings, and

551 // numbers.	554 // numbers.

552 // These functions only give the correct result if the literal	555 // These functions only give the correct result if the literal

553 // was scanned between calls to StartLiteral() and TerminateLiteral().	556 // was scanned between calls to StartLiteral() and TerminateLiteral().

554 Vector<const char> literal_one_byte_string() {	557 Vector<const uint8_t> literal_one_byte_string() {

555 ASSERT_NOT_NULL(current_.literal_chars);	558 ASSERT_NOT_NULL(current_.literal_chars);

556 return current_.literal_chars->one_byte_literal();	559 return current_.literal_chars->one_byte_literal();

557 }	560 }

558 Vector<const uc16> literal_utf16_string() {	561 Vector<const uint16_t> literal_two_byte_string() {

559 ASSERT_NOT_NULL(current_.literal_chars);	562 ASSERT_NOT_NULL(current_.literal_chars);

560 return current_.literal_chars->utf16_literal();	563 return current_.literal_chars->two_byte_literal();

561 }	564 }

562 bool is_literal_one_byte() {	565 bool is_literal_one_byte() {

563 ASSERT_NOT_NULL(current_.literal_chars);	566 ASSERT_NOT_NULL(current_.literal_chars);

564 return current_.literal_chars->is_one_byte();	567 return current_.literal_chars->is_one_byte();

565 }	568 }

566 int literal_length() const {	569 int literal_length() const {

567 ASSERT_NOT_NULL(current_.literal_chars);	570 ASSERT_NOT_NULL(current_.literal_chars);

568 return current_.literal_chars->length();	571 return current_.literal_chars->length();

569 }	572 }

570 // Returns the literal string for the next token (the token that	573 // Returns the literal string for the next token (the token that

571 // would be returned if Next() were called).	574 // would be returned if Next() were called).

572 Vector<const char> next_literal_one_byte_string() {	575 Vector<const uint8_t> next_literal_one_byte_string() {

573 ASSERT_NOT_NULL(next_.literal_chars);	576 ASSERT_NOT_NULL(next_.literal_chars);

574 return next_.literal_chars->one_byte_literal();	577 return next_.literal_chars->one_byte_literal();

575 }	578 }

576 Vector<const uc16> next_literal_utf16_string() {	579 Vector<const uint16_t> next_literal_two_byte_string() {

577 ASSERT_NOT_NULL(next_.literal_chars);	580 ASSERT_NOT_NULL(next_.literal_chars);

578 return next_.literal_chars->utf16_literal();	581 return next_.literal_chars->two_byte_literal();

579 }	582 }

580 bool is_next_literal_one_byte() {	583 bool is_next_literal_one_byte() {

581 ASSERT_NOT_NULL(next_.literal_chars);	584 ASSERT_NOT_NULL(next_.literal_chars);

582 return next_.literal_chars->is_one_byte();	585 return next_.literal_chars->is_one_byte();

583 }	586 }

584 int next_literal_length() const {	587 int next_literal_length() const {

585 ASSERT_NOT_NULL(next_.literal_chars);	588 ASSERT_NOT_NULL(next_.literal_chars);

586 return next_.literal_chars->length();	589 return next_.literal_chars->length();

587 }	590 }

588	591

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
651 bool harmony_scoping_;	654 bool harmony_scoping_;

652 // Whether we scan 'module', 'import', 'export' as keywords.	655 // Whether we scan 'module', 'import', 'export' as keywords.

653 bool harmony_modules_;	656 bool harmony_modules_;

654 // Whether we scan 0o777 and 0b111 as numbers.	657 // Whether we scan 0o777 and 0b111 as numbers.

655 bool harmony_numeric_literals_;	658 bool harmony_numeric_literals_;

656 };	659 };

657	660

658 } } // namespace v8::internal	661 } } // namespace v8::internal

659	662

660 #endif // V8_SCANNER_H_	663 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/preparse-data.h ('k') | src/scanner.cc » ('j') | no next file with comments »