OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
169 int AddAsciiSymbol(Vector<const char> key, int value); | 169 int AddAsciiSymbol(Vector<const char> key, int value); |
170 int AddUtf16Symbol(Vector<const uint16_t> key, int value); | 170 int AddUtf16Symbol(Vector<const uint16_t> key, int value); |
171 // Add a a number literal by converting it (if necessary) | 171 // Add a a number literal by converting it (if necessary) |
172 // to the string that ToString(ToNumber(literal)) would generate. | 172 // to the string that ToString(ToNumber(literal)) would generate. |
173 // and then adding that string with AddAsciiSymbol. | 173 // and then adding that string with AddAsciiSymbol. |
174 // This string is the actual value used as key in an object literal, | 174 // This string is the actual value used as key in an object literal, |
175 // and the one that must be different from the other keys. | 175 // and the one that must be different from the other keys. |
176 int AddNumber(Vector<const char> key, int value); | 176 int AddNumber(Vector<const char> key, int value); |
177 | 177 |
178 private: | 178 private: |
179 int AddSymbol(Vector<const byte> key, bool is_ascii, int value); | 179 int AddSymbol(Vector<const byte> key, bool is_one_byte, int value); |
180 // Backs up the key and its length in the backing store. | 180 // Backs up the key and its length in the backing store. |
181 // The backup is stored with a base 127 encoding of the | 181 // The backup is stored with a base 127 encoding of the |
182 // length (plus a bit saying whether the string is ASCII), | 182 // length (plus a bit saying whether the string is ASCII), |
183 // followed by the bytes of the key. | 183 // followed by the bytes of the key. |
184 byte* BackupKey(Vector<const byte> key, bool is_ascii); | 184 byte* BackupKey(Vector<const byte> key, bool is_one_byte); |
185 | 185 |
186 // Compare two encoded keys (both pointing into the backing store) | 186 // Compare two encoded keys (both pointing into the backing store) |
187 // for having the same base-127 encoded lengths and ASCII-ness, | 187 // for having the same base-127 encoded lengths and ASCII-ness, |
188 // and then having the same 'length' bytes following. | 188 // and then having the same 'length' bytes following. |
189 static bool Match(void* first, void* second); | 189 static bool Match(void* first, void* second); |
190 // Creates a hash from a sequence of bytes. | 190 // Creates a hash from a sequence of bytes. |
191 static uint32_t Hash(Vector<const byte> key, bool is_ascii); | 191 static uint32_t Hash(Vector<const byte> key, bool is_one_byte); |
192 // Checks whether a string containing a JS number is its canonical | 192 // Checks whether a string containing a JS number is its canonical |
193 // form. | 193 // form. |
194 static bool IsNumberCanonical(Vector<const char> key); | 194 static bool IsNumberCanonical(Vector<const char> key); |
195 | 195 |
196 // Size of buffer. Sufficient for using it to call DoubleToCString in | 196 // Size of buffer. Sufficient for using it to call DoubleToCString in |
197 // from conversions.h. | 197 // from conversions.h. |
198 static const int kBufferSize = 100; | 198 static const int kBufferSize = 100; |
199 | 199 |
200 UnicodeCache* unicode_constants_; | 200 UnicodeCache* unicode_constants_; |
201 // Backing store used to store strings used as hashmap keys. | 201 // Backing store used to store strings used as hashmap keys. |
202 SequenceCollector<unsigned char> backing_store_; | 202 SequenceCollector<unsigned char> backing_store_; |
203 HashMap map_; | 203 HashMap map_; |
204 // Buffer used for string->number->canonical string conversions. | 204 // Buffer used for string->number->canonical string conversions. |
205 char number_buffer_[kBufferSize]; | 205 char number_buffer_[kBufferSize]; |
206 }; | 206 }; |
207 | 207 |
208 | 208 |
209 // ---------------------------------------------------------------------------- | 209 // ---------------------------------------------------------------------------- |
210 // LiteralBuffer - Collector of chars of literals. | 210 // LiteralBuffer - Collector of chars of literals. |
211 | 211 |
212 class LiteralBuffer { | 212 class LiteralBuffer { |
213 public: | 213 public: |
214 LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } | 214 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } |
215 | 215 |
216 ~LiteralBuffer() { | 216 ~LiteralBuffer() { |
217 if (backing_store_.length() > 0) { | 217 if (backing_store_.length() > 0) { |
218 backing_store_.Dispose(); | 218 backing_store_.Dispose(); |
219 } | 219 } |
220 } | 220 } |
221 | 221 |
222 INLINE(void AddChar(uint32_t code_unit)) { | 222 INLINE(void AddChar(uint32_t code_unit)) { |
223 if (position_ >= backing_store_.length()) ExpandBuffer(); | 223 if (position_ >= backing_store_.length()) ExpandBuffer(); |
224 if (is_ascii_) { | 224 if (is_one_byte_) { |
225 if (code_unit <= unibrow::Latin1::kMaxChar) { | 225 if (code_unit <= unibrow::Latin1::kMaxChar) { |
226 backing_store_[position_] = static_cast<byte>(code_unit); | 226 backing_store_[position_] = static_cast<byte>(code_unit); |
227 position_ += kOneByteSize; | 227 position_ += kOneByteSize; |
228 return; | 228 return; |
229 } | 229 } |
230 ConvertToUtf16(); | 230 ConvertToUtf16(); |
231 } | 231 } |
232 ASSERT(code_unit < 0x10000u); | 232 ASSERT(code_unit < 0x10000u); |
233 *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; | 233 *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; |
234 position_ += kUC16Size; | 234 position_ += kUC16Size; |
235 } | 235 } |
236 | 236 |
237 bool is_ascii() { return is_ascii_; } | 237 bool is_one_byte() { return is_one_byte_; } |
238 | 238 |
239 bool is_contextual_keyword(Vector<const char> keyword) { | 239 bool is_contextual_keyword(Vector<const char> keyword) { |
240 return is_ascii() && keyword.length() == position_ && | 240 return is_one_byte() && keyword.length() == position_ && |
241 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | 241 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); |
242 } | 242 } |
243 | 243 |
244 Vector<const uc16> utf16_literal() { | 244 Vector<const uc16> utf16_literal() { |
245 ASSERT(!is_ascii_); | 245 ASSERT(!is_one_byte_); |
246 ASSERT((position_ & 0x1) == 0); | 246 ASSERT((position_ & 0x1) == 0); |
247 return Vector<const uc16>( | 247 return Vector<const uc16>( |
248 reinterpret_cast<const uc16*>(backing_store_.start()), | 248 reinterpret_cast<const uc16*>(backing_store_.start()), |
249 position_ >> 1); | 249 position_ >> 1); |
250 } | 250 } |
251 | 251 |
252 Vector<const char> ascii_literal() { | 252 Vector<const char> one_byte_literal() { |
253 ASSERT(is_ascii_); | 253 ASSERT(is_one_byte_); |
254 return Vector<const char>( | 254 return Vector<const char>( |
255 reinterpret_cast<const char*>(backing_store_.start()), | 255 reinterpret_cast<const char*>(backing_store_.start()), |
256 position_); | 256 position_); |
257 } | 257 } |
258 | 258 |
259 int length() { | 259 int length() { |
260 return is_ascii_ ? position_ : (position_ >> 1); | 260 return is_one_byte_ ? position_ : (position_ >> 1); |
261 } | 261 } |
262 | 262 |
263 void Reset() { | 263 void Reset() { |
264 position_ = 0; | 264 position_ = 0; |
265 is_ascii_ = true; | 265 is_one_byte_ = true; |
266 } | 266 } |
267 | 267 |
268 private: | 268 private: |
269 static const int kInitialCapacity = 16; | 269 static const int kInitialCapacity = 16; |
270 static const int kGrowthFactory = 4; | 270 static const int kGrowthFactory = 4; |
271 static const int kMinConversionSlack = 256; | 271 static const int kMinConversionSlack = 256; |
272 static const int kMaxGrowth = 1 * MB; | 272 static const int kMaxGrowth = 1 * MB; |
273 inline int NewCapacity(int min_capacity) { | 273 inline int NewCapacity(int min_capacity) { |
274 int capacity = Max(min_capacity, backing_store_.length()); | 274 int capacity = Max(min_capacity, backing_store_.length()); |
275 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | 275 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); |
276 return new_capacity; | 276 return new_capacity; |
277 } | 277 } |
278 | 278 |
279 void ExpandBuffer() { | 279 void ExpandBuffer() { |
280 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | 280 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); |
281 OS::MemCopy(new_store.start(), backing_store_.start(), position_); | 281 OS::MemCopy(new_store.start(), backing_store_.start(), position_); |
282 backing_store_.Dispose(); | 282 backing_store_.Dispose(); |
283 backing_store_ = new_store; | 283 backing_store_ = new_store; |
284 } | 284 } |
285 | 285 |
286 void ConvertToUtf16() { | 286 void ConvertToUtf16() { |
287 ASSERT(is_ascii_); | 287 ASSERT(is_one_byte_); |
288 Vector<byte> new_store; | 288 Vector<byte> new_store; |
289 int new_content_size = position_ * kUC16Size; | 289 int new_content_size = position_ * kUC16Size; |
290 if (new_content_size >= backing_store_.length()) { | 290 if (new_content_size >= backing_store_.length()) { |
291 // Ensure room for all currently read code units as UC16 as well | 291 // Ensure room for all currently read code units as UC16 as well |
292 // as the code unit about to be stored. | 292 // as the code unit about to be stored. |
293 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | 293 new_store = Vector<byte>::New(NewCapacity(new_content_size)); |
294 } else { | 294 } else { |
295 new_store = backing_store_; | 295 new_store = backing_store_; |
296 } | 296 } |
297 uint8_t* src = backing_store_.start(); | 297 uint8_t* src = backing_store_.start(); |
298 uc16* dst = reinterpret_cast<uc16*>(new_store.start()); | 298 uc16* dst = reinterpret_cast<uc16*>(new_store.start()); |
299 for (int i = position_ - 1; i >= 0; i--) { | 299 for (int i = position_ - 1; i >= 0; i--) { |
300 dst[i] = src[i]; | 300 dst[i] = src[i]; |
301 } | 301 } |
302 if (new_store.start() != backing_store_.start()) { | 302 if (new_store.start() != backing_store_.start()) { |
303 backing_store_.Dispose(); | 303 backing_store_.Dispose(); |
304 backing_store_ = new_store; | 304 backing_store_ = new_store; |
305 } | 305 } |
306 position_ = new_content_size; | 306 position_ = new_content_size; |
307 is_ascii_ = false; | 307 is_one_byte_ = false; |
308 } | 308 } |
309 | 309 |
310 bool is_ascii_; | 310 bool is_one_byte_; |
311 int position_; | 311 int position_; |
312 Vector<byte> backing_store_; | 312 Vector<byte> backing_store_; |
313 | 313 |
314 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | 314 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); |
315 }; | 315 }; |
316 | 316 |
317 | 317 |
318 // ---------------------------------------------------------------------------- | 318 // ---------------------------------------------------------------------------- |
319 // JavaScript Scanner. | 319 // JavaScript Scanner. |
320 | 320 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 Token::Value current_token() { return current_.token; } | 369 Token::Value current_token() { return current_.token; } |
370 // Returns the location information for the current token | 370 // Returns the location information for the current token |
371 // (the token last returned by Next()). | 371 // (the token last returned by Next()). |
372 Location location() const { return current_.location; } | 372 Location location() const { return current_.location; } |
373 // Returns the literal string, if any, for the current token (the | 373 // Returns the literal string, if any, for the current token (the |
374 // token last returned by Next()). The string is 0-terminated. | 374 // token last returned by Next()). The string is 0-terminated. |
375 // Literal strings are collected for identifiers, strings, and | 375 // Literal strings are collected for identifiers, strings, and |
376 // numbers. | 376 // numbers. |
377 // These functions only give the correct result if the literal | 377 // These functions only give the correct result if the literal |
378 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 378 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
379 Vector<const char> literal_ascii_string() { | 379 Vector<const char> literal_one_byte_string() { |
380 ASSERT_NOT_NULL(current_.literal_chars); | 380 ASSERT_NOT_NULL(current_.literal_chars); |
381 return current_.literal_chars->ascii_literal(); | 381 return current_.literal_chars->one_byte_literal(); |
382 } | 382 } |
383 Vector<const uc16> literal_utf16_string() { | 383 Vector<const uc16> literal_utf16_string() { |
384 ASSERT_NOT_NULL(current_.literal_chars); | 384 ASSERT_NOT_NULL(current_.literal_chars); |
385 return current_.literal_chars->utf16_literal(); | 385 return current_.literal_chars->utf16_literal(); |
386 } | 386 } |
387 bool is_literal_ascii() { | 387 bool is_literal_one_byte() { |
388 ASSERT_NOT_NULL(current_.literal_chars); | 388 ASSERT_NOT_NULL(current_.literal_chars); |
389 return current_.literal_chars->is_ascii(); | 389 return current_.literal_chars->is_one_byte(); |
390 } | 390 } |
391 bool is_literal_contextual_keyword(Vector<const char> keyword) { | 391 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
392 ASSERT_NOT_NULL(current_.literal_chars); | 392 ASSERT_NOT_NULL(current_.literal_chars); |
393 return current_.literal_chars->is_contextual_keyword(keyword); | 393 return current_.literal_chars->is_contextual_keyword(keyword); |
394 } | 394 } |
395 int literal_length() const { | 395 int literal_length() const { |
396 ASSERT_NOT_NULL(current_.literal_chars); | 396 ASSERT_NOT_NULL(current_.literal_chars); |
397 return current_.literal_chars->length(); | 397 return current_.literal_chars->length(); |
398 } | 398 } |
399 | 399 |
400 bool literal_contains_escapes() const { | 400 bool literal_contains_escapes() const { |
401 Location location = current_.location; | 401 Location location = current_.location; |
402 int source_length = (location.end_pos - location.beg_pos); | 402 int source_length = (location.end_pos - location.beg_pos); |
403 if (current_.token == Token::STRING) { | 403 if (current_.token == Token::STRING) { |
404 // Subtract delimiters. | 404 // Subtract delimiters. |
405 source_length -= 2; | 405 source_length -= 2; |
406 } | 406 } |
407 return current_.literal_chars->length() != source_length; | 407 return current_.literal_chars->length() != source_length; |
408 } | 408 } |
409 | 409 |
410 // Similar functions for the upcoming token. | 410 // Similar functions for the upcoming token. |
411 | 411 |
412 // One token look-ahead (past the token returned by Next()). | 412 // One token look-ahead (past the token returned by Next()). |
413 Token::Value peek() const { return next_.token; } | 413 Token::Value peek() const { return next_.token; } |
414 | 414 |
415 Location peek_location() const { return next_.location; } | 415 Location peek_location() const { return next_.location; } |
416 | 416 |
417 // Returns the literal string for the next token (the token that | 417 // Returns the literal string for the next token (the token that |
418 // would be returned if Next() were called). | 418 // would be returned if Next() were called). |
419 Vector<const char> next_literal_ascii_string() { | 419 Vector<const char> next_literal_one_byte_string() { |
420 ASSERT_NOT_NULL(next_.literal_chars); | 420 ASSERT_NOT_NULL(next_.literal_chars); |
421 return next_.literal_chars->ascii_literal(); | 421 return next_.literal_chars->one_byte_literal(); |
422 } | 422 } |
423 Vector<const uc16> next_literal_utf16_string() { | 423 Vector<const uc16> next_literal_utf16_string() { |
424 ASSERT_NOT_NULL(next_.literal_chars); | 424 ASSERT_NOT_NULL(next_.literal_chars); |
425 return next_.literal_chars->utf16_literal(); | 425 return next_.literal_chars->utf16_literal(); |
426 } | 426 } |
427 bool is_next_literal_ascii() { | 427 bool is_next_literal_one_byte() { |
428 ASSERT_NOT_NULL(next_.literal_chars); | 428 ASSERT_NOT_NULL(next_.literal_chars); |
429 return next_.literal_chars->is_ascii(); | 429 return next_.literal_chars->is_one_byte(); |
430 } | 430 } |
431 bool is_next_contextual_keyword(Vector<const char> keyword) { | 431 bool is_next_contextual_keyword(Vector<const char> keyword) { |
432 ASSERT_NOT_NULL(next_.literal_chars); | 432 ASSERT_NOT_NULL(next_.literal_chars); |
433 return next_.literal_chars->is_contextual_keyword(keyword); | 433 return next_.literal_chars->is_contextual_keyword(keyword); |
434 } | 434 } |
435 int next_literal_length() const { | 435 int next_literal_length() const { |
436 ASSERT_NOT_NULL(next_.literal_chars); | 436 ASSERT_NOT_NULL(next_.literal_chars); |
437 return next_.literal_chars->length(); | 437 return next_.literal_chars->length(); |
438 } | 438 } |
439 | 439 |
| 440 Handle<String> AllocateLiteralString(Isolate* isolate, PretenureFlag tenured); |
| 441 Handle<String> AllocateNextLiteralString(Isolate* isolate, |
| 442 PretenureFlag tenured); |
| 443 Handle<String> AllocateInternalizedString(Isolate* isolate); |
| 444 |
| 445 double DoubleValue(); |
| 446 bool UnescapedLiteralMatches(const char* data, int length) { |
| 447 if (is_literal_one_byte() && |
| 448 literal_length() == length && |
| 449 !literal_contains_escapes()) { |
| 450 return !strncmp(literal_one_byte_string().start(), data, length); |
| 451 } |
| 452 return false; |
| 453 } |
| 454 void IsGetOrSet(bool* is_get, bool* is_set) { |
| 455 if (is_literal_one_byte() && |
| 456 literal_length() == 3 && |
| 457 !literal_contains_escapes()) { |
| 458 const char* token = literal_one_byte_string().start(); |
| 459 *is_get = strncmp(token, "get", 3) == 0; |
| 460 *is_set = !*is_get && strncmp(token, "set", 3) == 0; |
| 461 } |
| 462 } |
| 463 |
440 UnicodeCache* unicode_cache() { return unicode_cache_; } | 464 UnicodeCache* unicode_cache() { return unicode_cache_; } |
441 | 465 |
442 static const int kCharacterLookaheadBufferSize = 1; | 466 static const int kCharacterLookaheadBufferSize = 1; |
443 | 467 |
444 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 468 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
445 uc32 ScanOctalEscape(uc32 c, int length); | 469 uc32 ScanOctalEscape(uc32 c, int length); |
446 | 470 |
447 // Returns the location of the last seen octal literal. | 471 // Returns the location of the last seen octal literal. |
448 Location octal_position() const { return octal_pos_; } | 472 Location octal_position() const { return octal_pos_; } |
449 void clear_octal_position() { octal_pos_ = Location::invalid(); } | 473 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
620 bool harmony_scoping_; | 644 bool harmony_scoping_; |
621 // Whether we scan 'module', 'import', 'export' as keywords. | 645 // Whether we scan 'module', 'import', 'export' as keywords. |
622 bool harmony_modules_; | 646 bool harmony_modules_; |
623 // Whether we scan 0o777 and 0b111 as numbers. | 647 // Whether we scan 0o777 and 0b111 as numbers. |
624 bool harmony_numeric_literals_; | 648 bool harmony_numeric_literals_; |
625 }; | 649 }; |
626 | 650 |
627 } } // namespace v8::internal | 651 } } // namespace v8::internal |
628 | 652 |
629 #endif // V8_SCANNER_H_ | 653 #endif // V8_SCANNER_H_ |
OLD | NEW |