Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(69)

Side by Side Diff: src/heap.cc

Issue 11593007: Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 3284 matching lines...) Expand 10 before | Expand all | Expand 10 after
3295 3295
3296 // Returns true for a character in a range. Both limits are inclusive. 3296 // Returns true for a character in a range. Both limits are inclusive.
3297 static inline bool Between(uint32_t character, uint32_t from, uint32_t to) { 3297 static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {
3298 // This makes uses of the the unsigned wraparound. 3298 // This makes uses of the the unsigned wraparound.
3299 return character - from <= to - from; 3299 return character - from <= to - from;
3300 } 3300 }
3301 3301
3302 3302
3303 MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString( 3303 MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
3304 Heap* heap, 3304 Heap* heap,
3305 uint32_t c1, 3305 uint16_t c1,
3306 uint32_t c2) { 3306 uint16_t c2) {
3307 String* symbol; 3307 String* symbol;
3308 // Numeric strings have a different hash algorithm not known by 3308 // Numeric strings have a different hash algorithm not known by
3309 // LookupTwoCharsSymbolIfExists, so we skip this step for such strings. 3309 // LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
3310 if ((!Between(c1, '0', '9') || !Between(c2, '0', '9')) && 3310 if ((!Between(c1, '0', '9') || !Between(c2, '0', '9')) &&
3311 heap->symbol_table()->LookupTwoCharsSymbolIfExists(c1, c2, &symbol)) { 3311 heap->symbol_table()->LookupTwoCharsSymbolIfExists(c1, c2, &symbol)) {
3312 return symbol; 3312 return symbol;
3313 // Now we know the length is 2, we might as well make use of that fact 3313 // Now we know the length is 2, we might as well make use of that fact
3314 // when building the new string. 3314 // when building the new string.
3315 } else if ((c1 | c2) <= String::kMaxAsciiCharCodeU) { // We can do this 3315 } else if ((c1 | c2) <= String::kMaxAsciiCharCodeU) { // We can do this
3316 ASSERT(IsPowerOf2(String::kMaxAsciiCharCodeU + 1)); // because of this. 3316 ASSERT(IsPowerOf2(String::kMaxAsciiCharCodeU + 1)); // because of this.
(...skipping 28 matching lines...) Expand all
3345 if (second_length == 0) { 3345 if (second_length == 0) {
3346 return first; 3346 return first;
3347 } 3347 }
3348 3348
3349 int length = first_length + second_length; 3349 int length = first_length + second_length;
3350 3350
3351 // Optimization for 2-byte strings often used as keys in a decompression 3351 // Optimization for 2-byte strings often used as keys in a decompression
3352 // dictionary. Check whether we already have the string in the symbol 3352 // dictionary. Check whether we already have the string in the symbol
3353 // table to prevent creation of many unneccesary strings. 3353 // table to prevent creation of many unneccesary strings.
3354 if (length == 2) { 3354 if (length == 2) {
3355 unsigned c1 = first->Get(0); 3355 uint16_t c1 = first->Get(0);
3356 unsigned c2 = second->Get(0); 3356 uint16_t c2 = second->Get(0);
3357 return MakeOrFindTwoCharacterString(this, c1, c2); 3357 return MakeOrFindTwoCharacterString(this, c1, c2);
3358 } 3358 }
3359 3359
3360 bool first_is_ascii = first->IsOneByteRepresentation(); 3360 bool first_is_ascii = first->IsOneByteRepresentation();
3361 bool second_is_ascii = second->IsOneByteRepresentation(); 3361 bool second_is_ascii = second->IsOneByteRepresentation();
3362 bool is_ascii = first_is_ascii && second_is_ascii; 3362 bool is_ascii = first_is_ascii && second_is_ascii;
3363 3363
3364 // Make sure that an out of memory exception is thrown if the length 3364 // Make sure that an out of memory exception is thrown if the length
3365 // of the new cons string is too large. 3365 // of the new cons string is too large.
3366 if (length > String::kMaxLength || length < 0) { 3366 if (length > String::kMaxLength || length < 0) {
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
3460 PretenureFlag pretenure) { 3460 PretenureFlag pretenure) {
3461 int length = end - start; 3461 int length = end - start;
3462 if (length <= 0) { 3462 if (length <= 0) {
3463 return empty_string(); 3463 return empty_string();
3464 } else if (length == 1) { 3464 } else if (length == 1) {
3465 return LookupSingleCharacterStringFromCode(buffer->Get(start)); 3465 return LookupSingleCharacterStringFromCode(buffer->Get(start));
3466 } else if (length == 2) { 3466 } else if (length == 2) {
3467 // Optimization for 2-byte strings often used as keys in a decompression 3467 // Optimization for 2-byte strings often used as keys in a decompression
3468 // dictionary. Check whether we already have the string in the symbol 3468 // dictionary. Check whether we already have the string in the symbol
3469 // table to prevent creation of many unneccesary strings. 3469 // table to prevent creation of many unneccesary strings.
3470 unsigned c1 = buffer->Get(start); 3470 uint16_t c1 = buffer->Get(start);
3471 unsigned c2 = buffer->Get(start + 1); 3471 uint16_t c2 = buffer->Get(start + 1);
3472 return MakeOrFindTwoCharacterString(this, c1, c2); 3472 return MakeOrFindTwoCharacterString(this, c1, c2);
3473 } 3473 }
3474 3474
3475 // Make an attempt to flatten the buffer to reduce access time. 3475 // Make an attempt to flatten the buffer to reduce access time.
3476 buffer = buffer->TryFlattenGetString(); 3476 buffer = buffer->TryFlattenGetString();
3477 3477
3478 if (!FLAG_string_slices || 3478 if (!FLAG_string_slices ||
3479 !buffer->IsFlat() || 3479 !buffer->IsFlat() ||
3480 length < SlicedString::kMinLength || 3480 length < SlicedString::kMinLength ||
3481 pretenure == TENURED) { 3481 pretenure == TENURED) {
(...skipping 1134 matching lines...) Expand 10 before | Expand all | Expand 10 after
4616 case SHORT_EXTERNAL_STRING_TYPE: return short_external_symbol_map(); 4616 case SHORT_EXTERNAL_STRING_TYPE: return short_external_symbol_map();
4617 case SHORT_EXTERNAL_ASCII_STRING_TYPE: 4617 case SHORT_EXTERNAL_ASCII_STRING_TYPE:
4618 return short_external_ascii_symbol_map(); 4618 return short_external_ascii_symbol_map();
4619 case SHORT_EXTERNAL_STRING_WITH_ASCII_DATA_TYPE: 4619 case SHORT_EXTERNAL_STRING_WITH_ASCII_DATA_TYPE:
4620 return short_external_symbol_with_ascii_data_map(); 4620 return short_external_symbol_with_ascii_data_map();
4621 default: return NULL; // No match found. 4621 default: return NULL; // No match found.
4622 } 4622 }
4623 } 4623 }
4624 4624
4625 4625
4626 MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer, 4626 template<typename T>
4627 struct AllocateInternalSymbolHelper {
4628 static bool IsOneByte(T t, int chars);
4629 static void WriteOneByteData(T t, char* chars, int len);
4630 static void WriteTwoByteData(T t, uint16_t* chars, int len);
4631 };
4632
4633
4634 template<>
4635 struct AllocateInternalSymbolHelper< Vector<const char> > {
4636 static bool IsOneByte(Vector<const char> vector, int chars) {
4637 // TODO(dcarney): check for Latin-1 when Latin-1 is actually supported.
4638 // At the moment this code path is never hit for ascii.
4639 ASSERT(chars <= vector.length());
4640 return chars == vector.length();
4641 }
4642
4643 static inline void WriteOneByteData(Vector<const char> vector,
4644 char* chars,
4645 int len) {
4646 // Only works for ascii.
4647 ASSERT(vector.length() == len);
4648 memcpy(chars, vector.start(), len);
4649 }
4650
4651 static inline void WriteTwoByteData(Vector<const char> vector,
4652 uint16_t* chars,
4653 int len) {
4654 const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
4655 unsigned stream_length = vector.length();
4656 while (stream_length != 0) {
4657 unsigned consumed = 0;
4658 uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
4659 if (c == unibrow::Utf8::kBadChar) break;
4660 ASSERT(consumed <= stream_length);
4661 stream_length -= consumed;
4662 stream += consumed;
4663 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
4664 len -= 2;
4665 if (len < 0) break;
4666 *chars++ = unibrow::Utf16::LeadSurrogate(c);
4667 *chars++ = unibrow::Utf16::TrailSurrogate(c);
4668 } else {
4669 len -= 1;
4670 if (len < 0) break;
4671 *chars++ = c;
4672 }
4673 }
4674 ASSERT(stream_length == 0);
4675 ASSERT(len == 0);
4676 }
4677 };
4678
4679
4680 template<>
4681 struct AllocateInternalSymbolHelper<String*> {
4682 static inline bool IsOneByte(String* string, int chars) {
4683 return string->IsOneByteRepresentation();
4684 }
4685
4686 static inline void WriteOneByteData(String* s, char* chars, int len) {
4687 ASSERT(s->length() == len);
4688 String::WriteToFlat(s, chars, 0, len);
4689 }
4690
4691 static inline void WriteTwoByteData(String* s, uint16_t* chars, int len) {
4692 ASSERT(s->length() == len);
4693 String::WriteToFlat(s, chars, 0, len);
4694 }
4695 };
4696
4697
4698 template<typename T>
4699 MaybeObject* Heap::AllocateInternalSymbol(T t,
4627 int chars, 4700 int chars,
4628 uint32_t hash_field) { 4701 uint32_t hash_field) {
4702 typedef AllocateInternalSymbolHelper<T> H;
4629 ASSERT(chars >= 0); 4703 ASSERT(chars >= 0);
4630 // Ensure the chars matches the number of characters in the buffer. 4704 // Determine whether the string is potentially one byte.
4631 ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length()); 4705 bool is_one_byte = H::IsOneByte(t, chars);
4632 // Determine whether the string is ASCII.
4633 bool is_ascii = true;
4634 while (buffer->has_more()) {
4635 if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
4636 is_ascii = false;
4637 break;
4638 }
4639 }
4640 buffer->Rewind();
4641 4706
4642 // Compute map and object size. 4707 // Compute map and object size.
4643 int size; 4708 int size;
4644 Map* map; 4709 Map* map;
4645 4710
4646 if (is_ascii) { 4711 if (is_one_byte) {
4647 if (chars > SeqOneByteString::kMaxLength) { 4712 if (chars > SeqOneByteString::kMaxLength) {
4648 return Failure::OutOfMemoryException(); 4713 return Failure::OutOfMemoryException();
4649 } 4714 }
4650 map = ascii_symbol_map(); 4715 map = ascii_symbol_map();
4651 size = SeqOneByteString::SizeFor(chars); 4716 size = SeqOneByteString::SizeFor(chars);
4652 } else { 4717 } else {
4653 if (chars > SeqTwoByteString::kMaxLength) { 4718 if (chars > SeqTwoByteString::kMaxLength) {
4654 return Failure::OutOfMemoryException(); 4719 return Failure::OutOfMemoryException();
4655 } 4720 }
4656 map = symbol_map(); 4721 map = symbol_map();
4657 size = SeqTwoByteString::SizeFor(chars); 4722 size = SeqTwoByteString::SizeFor(chars);
4658 } 4723 }
4659 4724
4660 // Allocate string. 4725 // Allocate string.
4661 Object* result; 4726 Object* result;
4662 { MaybeObject* maybe_result = (size > Page::kMaxNonCodeHeapObjectSize) 4727 { MaybeObject* maybe_result = (size > Page::kMaxNonCodeHeapObjectSize)
4663 ? lo_space_->AllocateRaw(size, NOT_EXECUTABLE) 4728 ? lo_space_->AllocateRaw(size, NOT_EXECUTABLE)
4664 : old_data_space_->AllocateRaw(size); 4729 : old_data_space_->AllocateRaw(size);
4665 if (!maybe_result->ToObject(&result)) return maybe_result; 4730 if (!maybe_result->ToObject(&result)) return maybe_result;
4666 } 4731 }
4667 4732
4668 reinterpret_cast<HeapObject*>(result)->set_map_no_write_barrier(map); 4733 reinterpret_cast<HeapObject*>(result)->set_map_no_write_barrier(map);
4669 // Set length and hash fields of the allocated string. 4734 // Set length and hash fields of the allocated string.
4670 String* answer = String::cast(result); 4735 String* answer = String::cast(result);
4671 answer->set_length(chars); 4736 answer->set_length(chars);
4672 answer->set_hash_field(hash_field); 4737 answer->set_hash_field(hash_field);
4673 4738
4674 ASSERT_EQ(size, answer->Size()); 4739 ASSERT_EQ(size, answer->Size());
4675 4740
4676 // Fill in the characters. 4741 if (is_one_byte) {
4677 int i = 0; 4742 H::WriteOneByteData(t, SeqOneByteString::cast(answer)->GetChars(), chars);
4678 while (i < chars) { 4743 } else {
4679 uint32_t character = buffer->GetNext(); 4744 H::WriteTwoByteData(t, SeqTwoByteString::cast(answer)->GetChars(), chars);
4680 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
4681 answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
4682 answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
4683 } else {
4684 answer->Set(i++, character);
4685 }
4686 } 4745 }
4687 return answer; 4746 return answer;
4688 } 4747 }
4689 4748
4690 4749
4750 // Need explicit instantiations.
4751 template
4752 MaybeObject* Heap::AllocateInternalSymbol(String*, int, uint32_t);
4753 template
4754 MaybeObject* Heap::AllocateInternalSymbol(Vector<const char>, int, uint32_t);
4755
4756
4691 MaybeObject* Heap::AllocateRawOneByteString(int length, 4757 MaybeObject* Heap::AllocateRawOneByteString(int length,
4692 PretenureFlag pretenure) { 4758 PretenureFlag pretenure) {
4693 if (length < 0 || length > SeqOneByteString::kMaxLength) { 4759 if (length < 0 || length > SeqOneByteString::kMaxLength) {
4694 return Failure::OutOfMemoryException(); 4760 return Failure::OutOfMemoryException();
4695 } 4761 }
4696 4762
4697 int size = SeqOneByteString::SizeFor(length); 4763 int size = SeqOneByteString::SizeFor(length);
4698 ASSERT(size <= SeqOneByteString::kMaxSize); 4764 ASSERT(size <= SeqOneByteString::kMaxSize);
4699 4765
4700 AllocationSpace space = (pretenure == TENURED) ? OLD_DATA_SPACE : NEW_SPACE; 4766 AllocationSpace space = (pretenure == TENURED) ? OLD_DATA_SPACE : NEW_SPACE;
(...skipping 2705 matching lines...) Expand 10 before | Expand all | Expand 10 after
7406 static_cast<int>(object_sizes_last_time_[index])); 7472 static_cast<int>(object_sizes_last_time_[index]));
7407 FIXED_ARRAY_SUB_INSTANCE_TYPE_LIST(ADJUST_LAST_TIME_OBJECT_COUNT) 7473 FIXED_ARRAY_SUB_INSTANCE_TYPE_LIST(ADJUST_LAST_TIME_OBJECT_COUNT)
7408 #undef ADJUST_LAST_TIME_OBJECT_COUNT 7474 #undef ADJUST_LAST_TIME_OBJECT_COUNT
7409 7475
7410 memcpy(object_counts_last_time_, object_counts_, sizeof(object_counts_)); 7476 memcpy(object_counts_last_time_, object_counts_, sizeof(object_counts_));
7411 memcpy(object_sizes_last_time_, object_sizes_, sizeof(object_sizes_)); 7477 memcpy(object_sizes_last_time_, object_sizes_, sizeof(object_sizes_));
7412 ClearObjectStats(); 7478 ClearObjectStats();
7413 } 7479 }
7414 7480
7415 } } // namespace v8::internal 7481 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698