Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(463)

Side by Side Diff: src/scanner.h

Issue 198583003: Convert scanner buffers to use standard character types. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/preparse-data.h ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 protected: 113 protected:
114 static const uc32 kEndOfInput = -1; 114 static const uc32 kEndOfInput = -1;
115 115
116 // Ensures that the buffer_cursor_ points to the code_unit at 116 // Ensures that the buffer_cursor_ points to the code_unit at
117 // position pos_ of the input, if possible. If the position 117 // position pos_ of the input, if possible. If the position
118 // is at or after the end of the input, return false. If there 118 // is at or after the end of the input, return false. If there
119 // are more code_units available, return true. 119 // are more code_units available, return true.
120 virtual bool ReadBlock() = 0; 120 virtual bool ReadBlock() = 0;
121 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0; 121 virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
122 122
123 const uc16* buffer_cursor_; 123 const uint16_t* buffer_cursor_;
124 const uc16* buffer_end_; 124 const uint16_t* buffer_end_;
125 unsigned pos_; 125 unsigned pos_;
126 }; 126 };
127 127
128 128
129 // --------------------------------------------------------------------- 129 // ---------------------------------------------------------------------
130 // Caching predicates used by scanners. 130 // Caching predicates used by scanners.
131 131
132 class UnicodeCache { 132 class UnicodeCache {
133 public: 133 public:
134 UnicodeCache() {} 134 UnicodeCache() {}
(...skipping 27 matching lines...) Expand all
162 // --------------------------------------------------------------------- 162 // ---------------------------------------------------------------------
163 // DuplicateFinder discovers duplicate symbols. 163 // DuplicateFinder discovers duplicate symbols.
164 164
165 class DuplicateFinder { 165 class DuplicateFinder {
166 public: 166 public:
167 explicit DuplicateFinder(UnicodeCache* constants) 167 explicit DuplicateFinder(UnicodeCache* constants)
168 : unicode_constants_(constants), 168 : unicode_constants_(constants),
169 backing_store_(16), 169 backing_store_(16),
170 map_(&Match) { } 170 map_(&Match) { }
171 171
172 int AddAsciiSymbol(Vector<const char> key, int value); 172 int AddOneByteSymbol(Vector<const uint8_t> key, int value);
173 int AddUtf16Symbol(Vector<const uint16_t> key, int value); 173 int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
174 // Add a a number literal by converting it (if necessary) 174 // Add a a number literal by converting it (if necessary)
175 // to the string that ToString(ToNumber(literal)) would generate. 175 // to the string that ToString(ToNumber(literal)) would generate.
176 // and then adding that string with AddAsciiSymbol. 176 // and then adding that string with AddAsciiSymbol.
177 // This string is the actual value used as key in an object literal, 177 // This string is the actual value used as key in an object literal,
178 // and the one that must be different from the other keys. 178 // and the one that must be different from the other keys.
179 int AddNumber(Vector<const char> key, int value); 179 int AddNumber(Vector<const uint8_t> key, int value);
180 180
181 private: 181 private:
182 int AddSymbol(Vector<const byte> key, bool is_one_byte, int value); 182 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
183 // Backs up the key and its length in the backing store. 183 // Backs up the key and its length in the backing store.
184 // The backup is stored with a base 127 encoding of the 184 // The backup is stored with a base 127 encoding of the
185 // length (plus a bit saying whether the string is ASCII), 185 // length (plus a bit saying whether the string is one byte),
186 // followed by the bytes of the key. 186 // followed by the bytes of the key.
187 byte* BackupKey(Vector<const byte> key, bool is_one_byte); 187 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
188 188
189 // Compare two encoded keys (both pointing into the backing store) 189 // Compare two encoded keys (both pointing into the backing store)
190 // for having the same base-127 encoded lengths and ASCII-ness, 190 // for having the same base-127 encoded lengths and ASCII-ness,
191 // and then having the same 'length' bytes following. 191 // and then having the same 'length' bytes following.
192 static bool Match(void* first, void* second); 192 static bool Match(void* first, void* second);
193 // Creates a hash from a sequence of bytes. 193 // Creates a hash from a sequence of bytes.
194 static uint32_t Hash(Vector<const byte> key, bool is_one_byte); 194 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
195 // Checks whether a string containing a JS number is its canonical 195 // Checks whether a string containing a JS number is its canonical
196 // form. 196 // form.
197 static bool IsNumberCanonical(Vector<const char> key); 197 static bool IsNumberCanonical(Vector<const uint8_t> key);
198 198
199 // Size of buffer. Sufficient for using it to call DoubleToCString in 199 // Size of buffer. Sufficient for using it to call DoubleToCString in
200 // from conversions.h. 200 // from conversions.h.
201 static const int kBufferSize = 100; 201 static const int kBufferSize = 100;
202 202
203 UnicodeCache* unicode_constants_; 203 UnicodeCache* unicode_constants_;
204 // Backing store used to store strings used as hashmap keys. 204 // Backing store used to store strings used as hashmap keys.
205 SequenceCollector<unsigned char> backing_store_; 205 SequenceCollector<unsigned char> backing_store_;
206 HashMap map_; 206 HashMap map_;
207 // Buffer used for string->number->canonical string conversions. 207 // Buffer used for string->number->canonical string conversions.
(...skipping 15 matching lines...) Expand all
223 } 223 }
224 224
225 INLINE(void AddChar(uint32_t code_unit)) { 225 INLINE(void AddChar(uint32_t code_unit)) {
226 if (position_ >= backing_store_.length()) ExpandBuffer(); 226 if (position_ >= backing_store_.length()) ExpandBuffer();
227 if (is_one_byte_) { 227 if (is_one_byte_) {
228 if (code_unit <= unibrow::Latin1::kMaxChar) { 228 if (code_unit <= unibrow::Latin1::kMaxChar) {
229 backing_store_[position_] = static_cast<byte>(code_unit); 229 backing_store_[position_] = static_cast<byte>(code_unit);
230 position_ += kOneByteSize; 230 position_ += kOneByteSize;
231 return; 231 return;
232 } 232 }
233 ConvertToUtf16(); 233 ConvertToTwoByte();
234 } 234 }
235 ASSERT(code_unit < 0x10000u); 235 ASSERT(code_unit < 0x10000u);
236 *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; 236 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
237 position_ += kUC16Size; 237 position_ += kUC16Size;
238 } 238 }
239 239
240 bool is_one_byte() { return is_one_byte_; } 240 bool is_one_byte() { return is_one_byte_; }
241 241
242 bool is_contextual_keyword(Vector<const char> keyword) { 242 bool is_contextual_keyword(Vector<const char> keyword) {
243 return is_one_byte() && keyword.length() == position_ && 243 return is_one_byte() && keyword.length() == position_ &&
244 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); 244 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
245 } 245 }
246 246
247 Vector<const uc16> utf16_literal() { 247 Vector<const uint16_t> two_byte_literal() {
248 ASSERT(!is_one_byte_); 248 ASSERT(!is_one_byte_);
249 ASSERT((position_ & 0x1) == 0); 249 ASSERT((position_ & 0x1) == 0);
250 return Vector<const uc16>( 250 return Vector<const uint16_t>(
251 reinterpret_cast<const uc16*>(backing_store_.start()), 251 reinterpret_cast<const uint16_t*>(backing_store_.start()),
252 position_ >> 1); 252 position_ >> 1);
253 } 253 }
254 254
255 Vector<const char> one_byte_literal() { 255 Vector<const uint8_t> one_byte_literal() {
256 ASSERT(is_one_byte_); 256 ASSERT(is_one_byte_);
257 return Vector<const char>( 257 return Vector<const uint8_t>(
258 reinterpret_cast<const char*>(backing_store_.start()), 258 reinterpret_cast<const uint8_t*>(backing_store_.start()),
259 position_); 259 position_);
260 } 260 }
261 261
262 int length() { 262 int length() {
263 return is_one_byte_ ? position_ : (position_ >> 1); 263 return is_one_byte_ ? position_ : (position_ >> 1);
264 } 264 }
265 265
266 void Reset() { 266 void Reset() {
267 position_ = 0; 267 position_ = 0;
268 is_one_byte_ = true; 268 is_one_byte_ = true;
(...skipping 10 matching lines...) Expand all
279 return new_capacity; 279 return new_capacity;
280 } 280 }
281 281
282 void ExpandBuffer() { 282 void ExpandBuffer() {
283 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); 283 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
284 OS::MemCopy(new_store.start(), backing_store_.start(), position_); 284 OS::MemCopy(new_store.start(), backing_store_.start(), position_);
285 backing_store_.Dispose(); 285 backing_store_.Dispose();
286 backing_store_ = new_store; 286 backing_store_ = new_store;
287 } 287 }
288 288
289 void ConvertToUtf16() { 289 void ConvertToTwoByte() {
290 ASSERT(is_one_byte_); 290 ASSERT(is_one_byte_);
291 Vector<byte> new_store; 291 Vector<byte> new_store;
292 int new_content_size = position_ * kUC16Size; 292 int new_content_size = position_ * kUC16Size;
293 if (new_content_size >= backing_store_.length()) { 293 if (new_content_size >= backing_store_.length()) {
294 // Ensure room for all currently read code units as UC16 as well 294 // Ensure room for all currently read code units as UC16 as well
295 // as the code unit about to be stored. 295 // as the code unit about to be stored.
296 new_store = Vector<byte>::New(NewCapacity(new_content_size)); 296 new_store = Vector<byte>::New(NewCapacity(new_content_size));
297 } else { 297 } else {
298 new_store = backing_store_; 298 new_store = backing_store_;
299 } 299 }
300 uint8_t* src = backing_store_.start(); 300 uint8_t* src = backing_store_.start();
301 uc16* dst = reinterpret_cast<uc16*>(new_store.start()); 301 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
302 for (int i = position_ - 1; i >= 0; i--) { 302 for (int i = position_ - 1; i >= 0; i--) {
303 dst[i] = src[i]; 303 dst[i] = src[i];
304 } 304 }
305 if (new_store.start() != backing_store_.start()) { 305 if (new_store.start() != backing_store_.start()) {
306 backing_store_.Dispose(); 306 backing_store_.Dispose();
307 backing_store_ = new_store; 307 backing_store_ = new_store;
308 } 308 }
309 position_ = new_content_size; 309 position_ = new_content_size;
310 is_one_byte_ = false; 310 is_one_byte_ = false;
311 } 311 }
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
401 401
402 Handle<String> AllocateNextLiteralString(Isolate* isolate, 402 Handle<String> AllocateNextLiteralString(Isolate* isolate,
403 PretenureFlag tenured); 403 PretenureFlag tenured);
404 Handle<String> AllocateInternalizedString(Isolate* isolate); 404 Handle<String> AllocateInternalizedString(Isolate* isolate);
405 405
406 double DoubleValue(); 406 double DoubleValue();
407 bool UnescapedLiteralMatches(const char* data, int length) { 407 bool UnescapedLiteralMatches(const char* data, int length) {
408 if (is_literal_one_byte() && 408 if (is_literal_one_byte() &&
409 literal_length() == length && 409 literal_length() == length &&
410 !literal_contains_escapes()) { 410 !literal_contains_escapes()) {
411 return !strncmp(literal_one_byte_string().start(), data, length); 411 const char* token =
412 reinterpret_cast<const char*>(literal_one_byte_string().start());
413 return !strncmp(token, data, length);
412 } 414 }
413 return false; 415 return false;
414 } 416 }
415 void IsGetOrSet(bool* is_get, bool* is_set) { 417 void IsGetOrSet(bool* is_get, bool* is_set) {
416 if (is_literal_one_byte() && 418 if (is_literal_one_byte() &&
417 literal_length() == 3 && 419 literal_length() == 3 &&
418 !literal_contains_escapes()) { 420 !literal_contains_escapes()) {
419 const char* token = literal_one_byte_string().start(); 421 const char* token =
422 reinterpret_cast<const char*>(literal_one_byte_string().start());
420 *is_get = strncmp(token, "get", 3) == 0; 423 *is_get = strncmp(token, "get", 3) == 0;
421 *is_set = !*is_get && strncmp(token, "set", 3) == 0; 424 *is_set = !*is_get && strncmp(token, "set", 3) == 0;
422 } 425 }
423 } 426 }
424 427
425 int FindNumber(DuplicateFinder* finder, int value); 428 int FindNumber(DuplicateFinder* finder, int value);
426 int FindSymbol(DuplicateFinder* finder, int value); 429 int FindSymbol(DuplicateFinder* finder, int value);
427 430
428 void LogSymbol(ParserRecorder* log, int position); 431 void LogSymbol(ParserRecorder* log, int position);
429 432
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
544 return else_; 547 return else_;
545 } 548 }
546 } 549 }
547 550
548 // Returns the literal string, if any, for the current token (the 551 // Returns the literal string, if any, for the current token (the
549 // token last returned by Next()). The string is 0-terminated. 552 // token last returned by Next()). The string is 0-terminated.
550 // Literal strings are collected for identifiers, strings, and 553 // Literal strings are collected for identifiers, strings, and
551 // numbers. 554 // numbers.
552 // These functions only give the correct result if the literal 555 // These functions only give the correct result if the literal
553 // was scanned between calls to StartLiteral() and TerminateLiteral(). 556 // was scanned between calls to StartLiteral() and TerminateLiteral().
554 Vector<const char> literal_one_byte_string() { 557 Vector<const uint8_t> literal_one_byte_string() {
555 ASSERT_NOT_NULL(current_.literal_chars); 558 ASSERT_NOT_NULL(current_.literal_chars);
556 return current_.literal_chars->one_byte_literal(); 559 return current_.literal_chars->one_byte_literal();
557 } 560 }
558 Vector<const uc16> literal_utf16_string() { 561 Vector<const uint16_t> literal_two_byte_string() {
559 ASSERT_NOT_NULL(current_.literal_chars); 562 ASSERT_NOT_NULL(current_.literal_chars);
560 return current_.literal_chars->utf16_literal(); 563 return current_.literal_chars->two_byte_literal();
561 } 564 }
562 bool is_literal_one_byte() { 565 bool is_literal_one_byte() {
563 ASSERT_NOT_NULL(current_.literal_chars); 566 ASSERT_NOT_NULL(current_.literal_chars);
564 return current_.literal_chars->is_one_byte(); 567 return current_.literal_chars->is_one_byte();
565 } 568 }
566 int literal_length() const { 569 int literal_length() const {
567 ASSERT_NOT_NULL(current_.literal_chars); 570 ASSERT_NOT_NULL(current_.literal_chars);
568 return current_.literal_chars->length(); 571 return current_.literal_chars->length();
569 } 572 }
570 // Returns the literal string for the next token (the token that 573 // Returns the literal string for the next token (the token that
571 // would be returned if Next() were called). 574 // would be returned if Next() were called).
572 Vector<const char> next_literal_one_byte_string() { 575 Vector<const uint8_t> next_literal_one_byte_string() {
573 ASSERT_NOT_NULL(next_.literal_chars); 576 ASSERT_NOT_NULL(next_.literal_chars);
574 return next_.literal_chars->one_byte_literal(); 577 return next_.literal_chars->one_byte_literal();
575 } 578 }
576 Vector<const uc16> next_literal_utf16_string() { 579 Vector<const uint16_t> next_literal_two_byte_string() {
577 ASSERT_NOT_NULL(next_.literal_chars); 580 ASSERT_NOT_NULL(next_.literal_chars);
578 return next_.literal_chars->utf16_literal(); 581 return next_.literal_chars->two_byte_literal();
579 } 582 }
580 bool is_next_literal_one_byte() { 583 bool is_next_literal_one_byte() {
581 ASSERT_NOT_NULL(next_.literal_chars); 584 ASSERT_NOT_NULL(next_.literal_chars);
582 return next_.literal_chars->is_one_byte(); 585 return next_.literal_chars->is_one_byte();
583 } 586 }
584 int next_literal_length() const { 587 int next_literal_length() const {
585 ASSERT_NOT_NULL(next_.literal_chars); 588 ASSERT_NOT_NULL(next_.literal_chars);
586 return next_.literal_chars->length(); 589 return next_.literal_chars->length();
587 } 590 }
588 591
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
651 bool harmony_scoping_; 654 bool harmony_scoping_;
652 // Whether we scan 'module', 'import', 'export' as keywords. 655 // Whether we scan 'module', 'import', 'export' as keywords.
653 bool harmony_modules_; 656 bool harmony_modules_;
654 // Whether we scan 0o777 and 0b111 as numbers. 657 // Whether we scan 0o777 and 0b111 as numbers.
655 bool harmony_numeric_literals_; 658 bool harmony_numeric_literals_;
656 }; 659 };
657 660
658 } } // namespace v8::internal 661 } } // namespace v8::internal
659 662
660 #endif // V8_SCANNER_H_ 663 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/preparse-data.h ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698